summaryrefslogtreecommitdiffstats
path: root/src/recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/recompiler')
-rw-r--r--src/recompiler/.scm-settings34
-rw-r--r--src/recompiler/COPYING.LIB504
-rw-r--r--src/recompiler/Makefile.kmk340
-rw-r--r--src/recompiler/README.vbox6
-rw-r--r--src/recompiler/Sun/Makefile.kup0
-rw-r--r--src/recompiler/Sun/config-host.h46
-rw-r--r--src/recompiler/Sun/config.h44
-rw-r--r--src/recompiler/Sun/crt/stdio.h73
-rw-r--r--src/recompiler/Sun/deftoimp.sed37
-rw-r--r--src/recompiler/Sun/e_powl-amd64.S371
-rw-r--r--src/recompiler/Sun/e_powl-x86.S413
-rw-r--r--src/recompiler/Sun/kvm.h28
-rw-r--r--src/recompiler/Sun/testmath.c828
-rw-r--r--src/recompiler/VBoxREM.def50
-rw-r--r--src/recompiler/VBoxREM.rc51
-rw-r--r--src/recompiler/VBoxREMWrapper.cpp2477
-rw-r--r--src/recompiler/VBoxREMWrapperA.asm912
-rw-r--r--src/recompiler/VBoxRecompiler.c5481
-rw-r--r--src/recompiler/bswap.h225
-rw-r--r--src/recompiler/cache-utils.h41
-rw-r--r--src/recompiler/cpu-all.h1184
-rw-r--r--src/recompiler/cpu-common.h135
-rw-r--r--src/recompiler/cpu-defs.h235
-rw-r--r--src/recompiler/cpu-exec.c1455
-rw-r--r--src/recompiler/cutils.c752
-rw-r--r--src/recompiler/def-helper.h258
-rw-r--r--src/recompiler/disas.h47
-rw-r--r--src/recompiler/dyngen-exec.h131
-rw-r--r--src/recompiler/elf.h1196
-rw-r--r--src/recompiler/exec-all.h400
-rw-r--r--src/recompiler/exec.c4586
-rw-r--r--src/recompiler/fpu/Makefile.kup0
-rw-r--r--src/recompiler/fpu/softfloat-macros.h719
-rw-r--r--src/recompiler/fpu/softfloat-native.c521
-rw-r--r--src/recompiler/fpu/softfloat-native.h493
-rw-r--r--src/recompiler/fpu/softfloat-specialize.h581
-rw-r--r--src/recompiler/fpu/softfloat.c5883
-rw-r--r--src/recompiler/fpu/softfloat.h537
-rw-r--r--src/recompiler/gen-icount.h48
-rw-r--r--src/recompiler/host-utils.c109
-rw-r--r--src/recompiler/host-utils.h236
-rw-r--r--src/recompiler/hostregs_helper.h70
-rw-r--r--src/recompiler/ioport.h71
-rw-r--r--src/recompiler/osdep.h166
-rw-r--r--src/recompiler/qemu-barrier.h10
-rw-r--r--src/recompiler/qemu-common.h348
-rw-r--r--src/recompiler/qemu-lock.h261
-rw-r--r--src/recompiler/qemu-log.h135
-rw-r--r--src/recompiler/qemu-queue.h449
-rw-r--r--src/recompiler/qemu-timer.h272
-rw-r--r--src/recompiler/softmmu_defs.h57
-rw-r--r--src/recompiler/softmmu_exec.h153
-rw-r--r--src/recompiler/softmmu_header.h208
-rw-r--r--src/recompiler/softmmu_template.h366
-rw-r--r--src/recompiler/target-i386/Makefile.kup0
-rw-r--r--src/recompiler/target-i386/TODO32
-rw-r--r--src/recompiler/target-i386/cpu.h1215
-rw-r--r--src/recompiler/target-i386/exec.h370
-rw-r--r--src/recompiler/target-i386/helper.c1227
-rw-r--r--src/recompiler/target-i386/helper.h253
-rw-r--r--src/recompiler/target-i386/helper_template.h344
-rw-r--r--src/recompiler/target-i386/op_helper.c7164
-rw-r--r--src/recompiler/target-i386/ops_sse.h2111
-rw-r--r--src/recompiler/target-i386/ops_sse_header.h359
-rw-r--r--src/recompiler/target-i386/svm.h222
-rw-r--r--src/recompiler/target-i386/translate.c8385
-rw-r--r--src/recompiler/targphys.h21
-rw-r--r--src/recompiler/tcg-runtime.c89
-rw-r--r--src/recompiler/tcg/LICENSE3
-rw-r--r--src/recompiler/tcg/Makefile.kup0
-rw-r--r--src/recompiler/tcg/README497
-rw-r--r--src/recompiler/tcg/TODO14
-rw-r--r--src/recompiler/tcg/i386/Makefile.kup0
-rw-r--r--src/recompiler/tcg/i386/tcg-target.c2231
-rw-r--r--src/recompiler/tcg/i386/tcg-target.h134
-rw-r--r--src/recompiler/tcg/tcg-dyngen.c437
-rw-r--r--src/recompiler/tcg/tcg-op.h2469
-rw-r--r--src/recompiler/tcg/tcg-opc.h304
-rw-r--r--src/recompiler/tcg/tcg-runtime.h18
-rw-r--r--src/recompiler/tcg/tcg.c2212
-rw-r--r--src/recompiler/tcg/tcg.h512
-rw-r--r--src/recompiler/tests/Makefile109
-rw-r--r--src/recompiler/tests/hello-arm.c113
-rw-r--r--src/recompiler/tests/hello-i386.c26
-rw-r--r--src/recompiler/tests/linux-test.c545
-rw-r--r--src/recompiler/tests/pi_10.combin0 -> 54 bytes
-rw-r--r--src/recompiler/tests/qruncom.c284
-rw-r--r--src/recompiler/tests/runcom.c195
-rw-r--r--src/recompiler/tests/sha1.c240
-rw-r--r--src/recompiler/tests/test-i386-code16.S79
-rw-r--r--src/recompiler/tests/test-i386-muldiv.h76
-rw-r--r--src/recompiler/tests/test-i386-shift.h185
-rw-r--r--src/recompiler/tests/test-i386-ssse3.c57
-rw-r--r--src/recompiler/tests/test-i386-vm86.S103
-rw-r--r--src/recompiler/tests/test-i386.c2769
-rw-r--r--src/recompiler/tests/test-i386.h152
-rw-r--r--src/recompiler/tests/test-mmap.c485
-rw-r--r--src/recompiler/tests/test_path.c151
-rw-r--r--src/recompiler/tests/testthread.c51
-rw-r--r--src/recompiler/translate-all.c186
100 files changed, 70462 insertions, 0 deletions
diff --git a/src/recompiler/.scm-settings b/src/recompiler/.scm-settings
new file mode 100644
index 00000000..946bad9f
--- /dev/null
+++ b/src/recompiler/.scm-settings
@@ -0,0 +1,34 @@
+# $Id: .scm-settings $
+## @file
+# Source code massager settings for the recompiler.
+#
+
+#
+# Copyright (C) 2017-2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+
+# This is external stuff.
+--external-copyright --no-convert-tabs
+/.scm-settings: --no-external-copyright --convert-tabs
+/Makefile.kmk: --no-external-copyright --convert-tabs
+/VBox*: --no-external-copyright --convert-tabs
+/Sun/*: --no-external-copyright --convert-tabs
+/Sun/e_*.S: --external-copyright --no-convert-tabs
+
+*.com: --treat-as binary
+
+*.h: --no-fix-header-guards
+
+/tests/linux-test.c: --lgpl-disclaimer
+/tests/test-i386.c: --lgpl-disclaimer
+/tests/test-mmap.c: --lgpl-disclaimer
+
diff --git a/src/recompiler/COPYING.LIB b/src/recompiler/COPYING.LIB
new file mode 100644
index 00000000..bfd28e23
--- /dev/null
+++ b/src/recompiler/COPYING.LIB
@@ -0,0 +1,504 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/src/recompiler/Makefile.kmk b/src/recompiler/Makefile.kmk
new file mode 100644
index 00000000..28c8ea95
--- /dev/null
+++ b/src/recompiler/Makefile.kmk
@@ -0,0 +1,340 @@
+# $Id: Makefile.kmk $
+## @file
+# The Recompiler Sub-Makefile.
+#
+
+#
+# Copyright (C) 2006-2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+
+SUB_DEPTH = ../..
+include $(KBUILD_PATH)/subheader.kmk
+
+#
+# Globals
+#
+VBOX_PATH_RECOMPILER_SRC := $(PATH_SUB_CURRENT)
+# Workaround for darwin hell.
+ifeq ($(KBUILD_TARGET),darwin)
+ VBOX_WITHOUT_REM_LDR_CYCLE := 1
+endif
+ifeq ($(KBUILD_TARGET).$(KBUILD_TARGET_ARCH),win.amd64)
+ VBOX_USE_MINGWW64 = 1
+endif
+
+
+#
+# The primary REM module definition.
+#
+# This is extended by one of the VBoxREM* modules below. Currently, this
+# isn't done by inheritance because of some obscure bug wrt inheriting from
+# unused targets that I'm not going to fix now.
+#
+ifneq ($(KBUILD_TARGET),win)
+ VBoxRemPrimary_TEMPLATE = VBOXR3NP
+ # workaround the regparm bug in gcc <= 3.3
+ VBoxRemPrimary_DEFS = $(if $(VBOX_GCC_BUGGY_REGPARM),GCC_WITH_BUGGY_REGPARM,)
+else
+ VBoxRemPrimary_TEMPLATE = DUMMY
+ VBoxRemPrimary_TOOL.win.x86 = MINGW32
+ ifdef VBOX_USE_MINGWW64
+ VBoxRemPrimary_TOOL.win.amd64 = MINGWW64
+ else
+ VBoxRemPrimary_TOOL.win.amd64 = XGCCAMD64LINUX
+ endif
+ VBoxRemPrimary_SDKS.win.x86 = W32API
+ VBoxRemPrimary_ASFLAGS = -x assembler-with-cpp
+ VBoxRemPrimary_CFLAGS = -Wall -g -fno-omit-frame-pointer -fno-strict-aliasing -Wno-shadow
+ VBoxRemPrimary_CFLAGS.debug = -O0
+ VBoxRemPrimary_CFLAGS.release += -fno-gcse -O2
+ VBoxRemPrimary_CFLAGS.profile = $(VBoxRemPrimary_CFLAGS.release)
+ VBoxRemPrimary_DEFS += IN_RING3 $(ARCH_BITS_DEFS)
+ # Workaround the regparm bug in gcc <= 3.3.
+ VBoxRemPrimary_DEFS.win.x86 += GCC_WITH_BUGGY_REGPARM
+ # Missing fpclassify. Is there a better define or flag for this?
+ VBoxRemPrimary_DEFS.solaris += __C99FEATURES__
+endif # win
+VBoxRemPrimary_DEFS += IN_REM_R3 REM_INCLUDE_CPU_H NEED_CPU_H
+#VBoxRemPrimary_DEFS += REM_PHYS_ADDR_IN_TLB
+#VBoxRemPrimary_DEFS += DEBUG_ALL_LOGGING DEBUG_DISAS DEBUG_PCALL CONFIG_DEBUG_EXEC DEBUG_FLUSH DEBUG_IOPORT DEBUG_SIGNAL DEBUG_TLB_CHECK DEBUG_TB_INVALIDATE DEBUG_TLB # Enables huge amounts of debug logging.
+#VBoxRemPrimary_DEFS += DEBUG_DISAS DEBUG_PCALL CONFIG_DEBUG_EXEC DEBUG_FLUSH DEBUG_IOPORT DEBUG_SIGNAL DEBUG_TLB_CHECK DEBUG_TB_INVALIDATE DEBUG_TLB # Enables huge amounts of debug logging.
+ifdef VBOX_WITH_RAW_MODE
+ VBoxRemPrimary_DEFS += VBOX_WITH_RAW_MODE
+endif
+ifdef VBOX_WITH_RAW_RING1
+ VBoxRemPrimary_DEFS += VBOX_WITH_RAW_RING1
+endif
+VBoxRemPrimary_DEFS.linux = _GNU_SOURCE
+ifdef VBOX_SOLARIS_10
+ VBoxRemPrimary_DEFS.solaris = CONFIG_SOLARIS_VERSION=10
+else
+ VBoxRemPrimary_DEFS.solaris = CONFIG_SOLARIS_VERSION=11
+endif
+VBoxRemPrimary_DEFS.freebsd += _BSD
+VBoxRemPrimary_DEFS.amd64 += __x86_64__
+VBoxRemPrimary_DEFS.x86 += __i386__
+
+VBoxRemPrimary_INCS = \
+ Sun \
+ target-i386 \
+ tcg \
+ fpu \
+ $(VBoxRemPrimary_0_OUTDIR) \
+ $(PATH_ROOT)/src/VBox/VMM/include \
+ tcg/i386 \
+ .
+ifn1of ($(VBoxRemPrimary_DEFS),DEBUG_TMP_LOGGING)
+ VBoxRemPrimary_DEFS += LOG_USE_C99
+ VBoxRemPrimary_INCS <= \
+ Sun/crt
+endif
+
+VBoxRemPrimary_SOURCES = \
+ VBoxRecompiler.c \
+ cpu-exec.c \
+ exec.c \
+ translate-all.c \
+ host-utils.c \
+ cutils.c \
+ tcg-runtime.c \
+ tcg/tcg.c \
+ tcg/tcg-dyngen.c \
+ fpu/softfloat-native.c \
+ target-i386/op_helper.c \
+ target-i386/helper.c \
+ target-i386/translate.c
+VBoxRemPrimary_SOURCES.debug += \
+ Sun/testmath.c
+VBoxRemPrimary_SOURCES.win = $(VBoxREMImp_0_OUTDIR)/VBoxREMRes.o
+VBoxRemPrimary_SOURCES.win.x86 = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def
+ifdef VBOX_USE_MINGWW64
+ if 0 # exporting all helps when windbg pops up on crashes
+ VBoxRemPrimary_SOURCES.win.amd64 = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def
+ else
+ VBoxRemPrimary_LDFLAGS.win.amd64 = --export-all
+ endif
+endif
+
+ifndef VBOX_USE_MINGWW64
+VBoxRemPrimary_LIBS = \
+ $(LIB_VMM) \
+ $(LIB_RUNTIME)
+else
+VBoxRemPrimary_LIBS = \
+ $(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.a \
+ $(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.a
+VBoxRemPrimary_CLEAN = \
+ $(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.a \
+ $(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.def \
+ $(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.a \
+ $(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.def
+endif
+
+VBoxRemPrimary_LDFLAGS.solaris = -mimpure-text
+if defined(VBOX_WITH_HARDENING) && "$(KBUILD_TARGET)" == "win"
+ VBoxRemPrimary_POST_CMDS = \
+ $(VBOX_VCC_EDITBIN) /LargeAddressAware /DynamicBase /NxCompat /Release /IntegrityCheck \
+ /Version:$(VBOX_VERSION_MAJOR)0$(VBOX_VERSION_MINOR).$(VBOX_VERSION_BUILD) "$(out)" \
+ $$(NLTAB)$(VBOX_SIGN_IMAGE_CMDS)
+else
+ VBoxRemPrimary_POST_CMDS = $(VBOX_SIGN_IMAGE_CMDS)
+endif
+
+
+if "$(KBUILD_TARGET).$(KBUILD_TARGET_ARCH)" == "win.amd64" && !defined(VBOX_USE_MINGWW64)
+ #
+ # VBoxREM2/VBoxRemPrimary - Currently only used by 64-bit Windows.
+ # (e_powl-xxx.S doesn't fit in IPRT because it requires GAS and is LGPL.)
+ #
+ SYSMODS += VBoxRemPrimary
+ VBoxRemPrimary_TEMPLATE = VBOXNOCRTGAS
+ VBoxRemPrimary_NAME = VBoxREM2
+ VBoxRemPrimary_DEFS += LOG_USE_C99 $(ARCH_BITS_DEFS)
+ VBoxRemPrimary_SOURCES += \
+ Sun/e_powl-$(KBUILD_TARGET_ARCH).S
+ VBoxRemPrimary_INCS += \
+ Sun/crt
+ VBoxRemPrimary_SYSSUFF = .rel
+ VBoxRemPrimary_LIBS = \
+ $(PATH_STAGE_LIB)/RuntimeR3NoCRTGCC$(VBOX_SUFF_LIB)
+ VBoxRemPrimary_POST_CMDS = $(NO_SUCH_VARIABLE)
+ VBOX_REM_WRAPPER = 2
+
+else if "$(KBUILD_TARGET_ARCH)" == "x86" && defined(VBOX_WITH_64_BITS_GUESTS)
+ #
+ # For 32-bit targets when enabled 64-bit guests we build 2 REM DLLs:
+ # with 64-bit support (slow and buggy at the moment) VBOXREM64
+ # only 32-bit support (faster, stable, but not suitable for 64-bit guests) VBOXREM32
+ # During the runtime, we load appropriate library from VBOXREM, depending on guest settings.
+ # 64-bit targets have 64-bit enabled REM by default, so is not part of this mess
+ #
+
+ #
+ # VBoxREM32/VBoxRemPrimary
+ #
+ DLLS += VBoxRemPrimary
+ VBoxRemPrimary_NAME = VBoxREM32
+ VBoxRemPrimary_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM32.dylib
+ VBOX_REM_WRAPPER = 32
+
+ #
+ # VBoxREM64
+ #
+ DLLS += VBoxREM64
+ VBoxREM64_EXTENDS = VBoxRemPrimary
+ VBoxREM64_EXTENDS_BY = appending
+ VBoxREM64_NAME = VBoxREM64
+ VBoxREM64_DEFS = VBOX_ENABLE_VBOXREM64
+ VBoxREM64_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM64.dylib
+
+else
+ #
+ # VBoxREM/VBoxRemPrimary - Normal.
+ #
+ DLLS += VBoxRemPrimary
+ VBoxRemPrimary_NAME = VBoxREM
+ VBoxRemPrimary_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM3.dylib
+
+ ifdef VBOX_USE_MINGWW64
+ # GNU ld (rubenvb-4.5.4) 2.22.52.20120716 doesn't fix up rip relative
+ # addressing in the import libraries generated by microsoft link.exe. So, we
+ # have to regenerate these.
+ # Note! The chdir to the output directory is because dlltool writes temporary files to the current directory.
+ $$(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.a \
+ $$(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.a : $$(VBoxRemPrimary_0_OUTDIR)/$$(notdir $$(basename $$@)).def
+ $(REDIRECT) -C "$(dir $@)" -- $(TOOL_MINGWW64_DLLTOOL) \
+ --output-lib "$@" \
+ --input-def "$<" \
+ --dllname "$(patsubst %Imp.a,%.dll,$(notdir $@))"
+
+ $$(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.def \
+ $$(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.def : \
+ $(PATH_STAGE_BIN)/$$(patsubst %Imp.def,%.dll,$$(notdir $$@)) \
+ | $$(dir $$@)
+ $(APPEND) -nt $@ "LIBRARY $(notdir $<)" "EXPORTS"
+ $(TOOL_$(VBOX_VCC_TOOL)_DUMPBIN) /EXPORTS "$<" \
+ | $(SED) -e '/ = /!d' \
+ -e 's/^.* \([^ ][^ ]*\) = .*$(DOLLAR)/ \"\1\"/' \
+ --append $@
+ endif # VBOX_USE_MINGWW64
+
+endif
+
+
+ifdef VBOX_REM_WRAPPER
+ #
+ # VBoxREM - Wrapper for loading VBoxREM2, VBoxREM32 or VBoxREM64.
+ #
+ DLLS += VBoxREMWrapper
+ VBoxREMWrapper_TEMPLATE = VBoxR3DllWarnNoPic
+ VBoxREMWrapper_NAME = VBoxREM
+ VBoxREMWrapper_DEFS = IN_REM_R3
+ if "$(KBUILD_TARGET_ARCH)" == "x86" && defined(VBOX_WITH_64_BITS_GUESTS)
+ VBoxREMWrapper_DEFS += VBOX_USE_BITNESS_SELECTOR
+ endif
+ ifdef VBOX_WITHOUT_REM_LDR_CYCLE
+ VBoxREMWrapper_DEFS += VBOX_WITHOUT_REM_LDR_CYCLE
+ endif
+ VBoxREMWrapper_SOURCES = \
+ VBoxREMWrapper.cpp
+ VBoxREMWrapper_SOURCES.win = VBoxREM.rc
+ if "$(KBUILD_TARGET).$(KBUILD_TARGET_ARCH)" == "win.amd64" && !defined(VBOX_USE_MINGWW64)
+ VBoxREMWrapper_SOURCES += \
+ VBoxREMWrapperA.asm
+ endif
+ VBoxREMWrapper_LIBS = \
+ $(LIB_RUNTIME)
+ ifndef VBOX_WITHOUT_REM_LDR_CYCLE
+ VBoxREMWrapper_LIBS += \
+ $(LIB_VMM)
+ VBoxREMWrapper_LIBS.darwin += \
+ $(TARGET_VBoxREMImp)
+ endif
+ VBoxREMWrapper_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM.dylib
+endif
+
+
+#
+# The VBoxREM import library.
+#
+# This is a HACK to get around (a) the cyclic dependency between VBoxVMM and
+# VBoxREM during linking and (b) the recursive build ordering which means VBoxREM
+# won't be built until after all the other DLLs.
+#
+IMPORT_LIBS += VBoxREMImp
+VBoxREMImp_TEMPLATE = VBoxR3Dll
+ ifn1of ($(KBUILD_TARGET), os2 win)
+VBoxREMImp_NAME = VBoxREM
+ endif
+VBoxREMImp_INST = $(INST_LIB)
+VBoxREMImp_SOURCES.win = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def
+VBoxREMImp_CLEAN.win = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def
+VBoxREMImp_SOURCES.os2 = $(VBoxREMImp_0_OUTDIR)/VBoxREMOS2.def
+VBoxREMImp_CLEAN.os2 = $(VBoxREMImp_0_OUTDIR)/VBoxREMOS2.def
+ ifn1of ($(KBUILD_TARGET), os2 win)
+VBoxREMImp_SOURCES = $(VBoxREMImp_0_OUTDIR)/VBoxREMImp.c
+VBoxREMImp_CLEAN = $(VBoxREMImp_0_OUTDIR)/VBoxREMImp.c
+ endif
+ ifn1of ($(KBUILD_TARGET), darwin os2 win)
+VBoxREMImp_SONAME = VBoxREM$(SUFF_DLL)
+ endif
+ifdef VBOX_WITHOUT_REM_LDR_CYCLE
+ VBoxREMImp_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM.dylib
+else
+ VBoxREMImp_LDFLAGS.darwin = -install_name $(subst @rpath,@executable_path,$(VBOX_DYLD_EXECUTABLE_PATH))/VBoxREM.dylib
+endif
+VBoxREMImp_LDFLAGS.l4 = -T$(L4_LIBDIR)/../main_rel.ld -nostdlib
+
+$$(VBoxREMImp_0_OUTDIR)/VBoxREMImp.c: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(VBOX_PATH_RECOMPILER_SRC)/Sun/deftoimp.sed $(MAKEFILE_CURRENT) | $$(dir $$@)
+ $(call MSG_GENERATE,,$@)
+ $(QUIET)$(APPEND) -t $@ '#ifdef VBOX_HAVE_VISIBILITY_HIDDEN'
+ $(QUIET)$(APPEND) $@ '# define EXPORT __attribute__((visibility("default")))'
+ $(QUIET)$(APPEND) $@ '#else'
+ $(QUIET)$(APPEND) $@ '# define EXPORT'
+ $(QUIET)$(APPEND) $@ '#endif'
+ $(QUIET)$(APPEND) $@ ''
+ $(QUIET)$(SED) -f $(VBOX_PATH_RECOMPILER_SRC)/Sun/deftoimp.sed --append $@ $<
+
+$$(VBoxREMImp_0_OUTDIR)/VBoxREMOS2.def: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(MAKEFILE_CURRENT) | $$(dir $$@)
+ $(SED) \
+ -e 's/^[ \t][ \t]*REMR3/ _REMR3/' \
+ -e 's/\.[Dd][Ll][Ll]//' \
+ -e 's/^LIBRARY .*/LIBRARY VBoxREM INITINSTANCE TERMINSTANCE\nDATA MULTIPLE\n/' \
+ --output $@ \
+ $<
+
+$$(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(MAKEFILE_CURRENT) | $$(dir $$@)
+ $(CP) -f $< $@
+
+$$(VBoxREMImp_0_OUTDIR)/VBoxREMRes.o: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.rc $(MAKEFILE_CURRENT) $(VBOX_VERSION_MK) | $$(dir $$@)
+ $(call MSG_GENERATE,,$@)
+ $(QUIET)$(REDIRECT) -E 'COMSPEC=$(VBOX_GOOD_COMSPEC_BS)' \
+ -- $(TOOL_$(VBoxRemPrimary_TOOL.win.$(KBUILD_TARGET_ARCH))_PREFIX)windres \
+ $(addprefix -I,$(INCS) $(PATH_SDK_$(VBOX_WINPSDK)_INC) $(PATH_TOOL_$(VBOX_VCC_TOOL)_INC)) \
+ -DVBOX_SVN_REV=$(VBOX_SVN_REV) \
+ -DVBOX_SVN_REV_MOD_5K=$(expr $(VBOX_SVN_REV) % 50000) \
+ $< $@
+
+#
+# The math testcase as a standalone program for testing and debugging purposes.
+#
+## @todo This is a bit messy because of MINGW32.
+testmath_ASFLAGS.amd64 = -m amd64
+testmath_CFLAGS = -Wall -g
+testmath_CFLAGS.release = -O3
+testmath_LDFLAGS = -g
+testmath_DEFS = MATHTEST_STANDALONE
+testmath_SOURCES = Sun/testmath.c
+
+
+include $(FILE_KBUILD_SUB_FOOTER)
+
diff --git a/src/recompiler/README.vbox b/src/recompiler/README.vbox
new file mode 100644
index 00000000..d478b71e
--- /dev/null
+++ b/src/recompiler/README.vbox
@@ -0,0 +1,6 @@
+QEMU is based on v0.13.0 (6ed912999d6ef636a5be5ccb266d7d3c0f0310b4)
+from git://git.savannah.nongnu.org/qemu.git.
+
+Modified parts related to calls of external functions, to allow loading
+recompiler library as regular shared object in high memory on 64-bit systems.
+
diff --git a/src/recompiler/Sun/Makefile.kup b/src/recompiler/Sun/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/recompiler/Sun/Makefile.kup
diff --git a/src/recompiler/Sun/config-host.h b/src/recompiler/Sun/config-host.h
new file mode 100644
index 00000000..a18a147e
--- /dev/null
+++ b/src/recompiler/Sun/config-host.h
@@ -0,0 +1,46 @@
+/* $Id: config-host.h $ */
+/** @file
+ * Sun host config - maintained by hand
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+#if defined(RT_ARCH_AMD64)
+# define HOST_X86_64 1
+# define HOST_LONG_BITS 64
+#else
+# define HOST_I386 1
+# define HOST_LONG_BITS 32
+#endif
+
+#ifndef IPRT_NO_CRT
+# ifdef RT_OS_WINDOWS
+# define CONFIG_WIN32 1
+# elif defined(RT_OS_OS2)
+# define CONFIG_OS2
+# elif defined(RT_OS_DARWIN)
+# define CONFIG_DARWIN
+# elif defined(RT_OS_FREEBSD) || defined(RT_OS_NETBSD) || defined(RT_OS_OPENBSD)
+# define HAVE_MACHINE_BSWAP_H
+/*# define CONFIG_BSD*/
+# elif defined(RT_OS_SOLARIS)
+# define CONFIG_SOLARIS
+# else
+# define HAVE_BYTESWAP_H 1
+# endif
+#endif
+#define QEMU_VERSION "0.13.0"
+#define CONFIG_UNAME_RELEASE ""
+#define CONFIG_QEMU_SHAREDIR "."
+
diff --git a/src/recompiler/Sun/config.h b/src/recompiler/Sun/config.h
new file mode 100644
index 00000000..173b660d
--- /dev/null
+++ b/src/recompiler/Sun/config.h
@@ -0,0 +1,44 @@
+/* $Id: config.h $ */
+/** @file
+ * Sun config - Maintained by hand
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include "config-host.h"
+#define CONFIG_QEMU_PREFIX "/usr/gnemul/qemu-i386"
+#define TARGET_ARCH "i386"
+#define TARGET_I386 1
+#define CONFIG_SOFTMMU 1
+#define TARGET_PHYS_ADDR_BITS 64
+
+#ifdef VBOX_WITH_64_BITS_GUESTS
+# if defined(__x86_64__) || defined (VBOX_ENABLE_VBOXREM64)
+# define TARGET_X86_64
+# endif
+#endif
+
+/* Uncomment to see all phys memory accesses */
+/* #define VBOX_DEBUG_PHYS */
+/* Uncomment to see emulated CPU state changes */
+/* #define VBOX_DUMP_STATE */
+/* Uncomment to see QEMU logging, goes to /tmp/vbox-qemu.log */
+/* #define DEBUG_ALL_LOGGING */
+/* Uncomment to see generated code */
+/* #define DEBUG_DISAS */
+
+#if 0 /*defined(RT_ARCH_AMD64) && defined(VBOX_STRICT)*/
+# define VBOX_CHECK_ADDR(ptr) do { if ((uintptr_t)(ptr) >= _4G) __asm__("int3"); } while (0)
+#else
+# define VBOX_CHECK_ADDR(ptr) do { } while (0)
+#endif
diff --git a/src/recompiler/Sun/crt/stdio.h b/src/recompiler/Sun/crt/stdio.h
new file mode 100644
index 00000000..5c73fb1a
--- /dev/null
+++ b/src/recompiler/Sun/crt/stdio.h
@@ -0,0 +1,73 @@
+/* $Id: stdio.h $ */
+/** @file
+ * Our minimal stdio
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef ___Sun_stdio_h
+#define ___Sun_stdio_h
+
+#ifndef LOG_GROUP
+# define UNDO_LOG_GROUP
+#endif
+
+#include <VBox/log.h>
+
+#ifdef UNDO_LOG_GROUP
+# undef UNDO_LOG_GROUP
+# undef LOG_GROUP
+#endif
+
+#ifndef LOG_USE_C99
+# error "LOG_USE_C99 isn't defined."
+#endif
+
+RT_C_DECLS_BEGIN
+
+typedef struct FILE FILE;
+
+#if defined(RT_OS_SOLARIS)
+/** @todo Check solaris' floatingpoint.h as to why we do this */
+# define _FILEDEFED
+#endif
+
+DECLINLINE(int) fprintf(FILE *ignored, const char *pszFormat, ...)
+{
+/** @todo We don't support wrapping calls taking a va_list yet. It's not worth it yet,
+ * since there are only a couple of cases where this fprintf implementation is used.
+ * (The macro below will deal with the majority of the fprintf calls.) */
+#if 0 /*def LOG_ENABLED*/
+ if (LogIsItEnabled(NULL, 0, LOG_GROUP_REM_PRINTF))
+ {
+ va_list va;
+ va_start(va, pszFormat);
+ RTLogLoggerExV(NULL, 0, LOG_GROUP_REM_PRINTF, pszFormat, va);
+ va_end(va);
+ }
+#endif
+ return 0;
+}
+
+#define fflush(file) RTLogFlush(NULL)
+#define printf(...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__))
+#define fprintf(logfile, ...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__))
+
+#ifdef DEBUG_TMP_LOGGING
+# error "DEBUG_TMP_LOGGING doesn't work with the Sun/crt/stdio.h wrapper."
+#endif
+
+RT_C_DECLS_END
+
+#endif
+
diff --git a/src/recompiler/Sun/deftoimp.sed b/src/recompiler/Sun/deftoimp.sed
new file mode 100644
index 00000000..3e35efa1
--- /dev/null
+++ b/src/recompiler/Sun/deftoimp.sed
@@ -0,0 +1,37 @@
+# $Id: deftoimp.sed $
+## @file
+# SED script for generating a dummy .so from a windows .def file.
+#
+
+#
+# Copyright (C) 2006-2019 Oracle Corporation
+#
+# This file is part of VirtualBox Open Source Edition (OSE), as
+# available from http://www.virtualbox.org. This file is free software;
+# you can redistribute it and/or modify it under the terms of the GNU
+# General Public License (GPL) as published by the Free Software
+# Foundation, in version 2 as it comes in the "COPYING" file of the
+# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+#
+
+s/;.*$//g
+s/^[[:space:]][[:space:]]*//g
+s/[[:space:]][[:space:]]*$//g
+/^$/d
+
+# Handle text after EXPORTS
+/EXPORTS/,//{
+s/^EXPORTS$//
+/^$/b end
+
+s/^\(.*\)$/EXPORT\nvoid \1(void);\nvoid \1(void){}/
+b end
+}
+d
+b end
+
+
+# next expression
+:end
+
diff --git a/src/recompiler/Sun/e_powl-amd64.S b/src/recompiler/Sun/e_powl-amd64.S
new file mode 100644
index 00000000..5e0353e7
--- /dev/null
+++ b/src/recompiler/Sun/e_powl-amd64.S
@@ -0,0 +1,371 @@
+/* ix87 specific implementation of pow function.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+/*#include <machine/asm.h>*/
+#include <iprt/cdefs.h>
+
+#define ALIGNARG(log2) 1<<log2
+#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg;
+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
+#define ASM_GLOBAL_DIRECTIVE .global
+
+#define C_LABEL(name) name:
+#define C_SYMBOL_NAME(name) name
+
+#define ENTRY(name) \
+ ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \
+ ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \
+ .align ALIGNARG(4); \
+ C_LABEL(name)
+
+#undef END
+#define END(name) \
+ ASM_SIZE_DIRECTIVE(name)
+
+
+#ifdef __ELF__
+ .section .rodata
+#else
+ .text
+#endif
+
+ .align ALIGNARG(4)
+ ASM_TYPE_DIRECTIVE(infinity,@object)
+inf_zero:
+infinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
+ ASM_SIZE_DIRECTIVE(infinity)
+ ASM_TYPE_DIRECTIVE(zero,@object)
+zero: .double 0.0
+ ASM_SIZE_DIRECTIVE(zero)
+ ASM_TYPE_DIRECTIVE(minf_mzero,@object)
+minf_mzero:
+minfinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
+mzero:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ ASM_SIZE_DIRECTIVE(minf_mzero)
+ ASM_TYPE_DIRECTIVE(one,@object)
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ ASM_TYPE_DIRECTIVE(limit,@object)
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63:
+ .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
+
+//#ifdef PIC
+//#define MO(op) op##(%rip)
+//#else
+#define MO(op) op
+//#endif
+
+ .text
+/*ENTRY(__ieee754_powl)*/
+ENTRY(RT_NOCRT(powl))
+
+ fldt 24(%rsp) // y
+ fxam
+
+
+ fnstsw
+ movb %ah, %dl
+ andb $0x45, %ah
+ cmpb $0x40, %ah // is y == 0 ?
+ je 11f
+
+ cmpb $0x05, %ah // is y == ±inf ?
+ je 12f
+
+ cmpb $0x01, %ah // is y == NaN ?
+ je 30f
+
+ fldt 8(%rsp) // x : y
+
+ fxam
+ fnstsw
+ movb %ah, %dh
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ je 20f // x is ±0
+
+ cmpb $0x05, %ah
+ je 15f // x is ±inf
+
+ fxch // y : x
+
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fldl MO(p63) // 1L<<63 : y : x
+ fld %st(1) // y : 1L<<63 : y : x
+ fabs // |y| : 1L<<63 : y : x
+ fcomip %st(1), %st // 1L<<63 : y : x
+ fstp %st(0) // y : x
+ jnc 2f
+
+ /* First see whether `y' is a natural number. In this case we
+ can use a more precise algorithm. */
+ fld %st // y : y : x
+ fistpll -8(%rsp) // y : x
+ fildll -8(%rsp) // int(y) : y : x
+ fucomip %st(1),%st // y : x
+ jne 2f
+
+ /* OK, we have an integer value for y. */
+ mov -8(%rsp),%eax
+ mov -4(%rsp),%edx
+ orl $0, %edx
+ fstp %st(0) // x
+ jns 4f // y >= 0, jump
+ fdivrl MO(one) // 1/x (now referred to as x)
+ negl %eax
+ adcl $0, %edx
+ negl %edx
+4: fldl MO(one) // 1 : x
+ fxch
+
+6: shrdl $1, %edx, %eax
+ jnc 5f
+ fxch
+ fmul %st(1) // x : ST*x
+ fxch
+5: fmul %st(0), %st // x*x : ST*x
+ shrl $1, %edx
+ movl %eax, %ecx
+ orl %edx, %ecx
+ jnz 6b
+ fstp %st(0) // ST*x
+ ret
+
+ /* y is ±NAN */
+30: fldt 8(%rsp) // x : y
+ fldl MO(one) // 1.0 : x : y
+ fucomip %st(1),%st // x : y
+ je 31f
+ fxch // y : x
+31: fstp %st(1)
+ ret
+
+ .align ALIGNARG(4)
+2: /* y is a real number. */
+ fxch // x : y
+ fldl MO(one) // 1.0 : x : y
+ fld %st(1) // x : 1.0 : x : y
+ fsub %st(1) // x-1 : 1.0 : x : y
+ fabs // |x-1| : 1.0 : x : y
+ fcompl MO(limit) // 1.0 : x : y
+ fnstsw
+ fxch // x : 1.0 : y
+ test $4500,%eax
+ jz 7f
+ fsub %st(1) // x-1 : 1.0 : y
+ fyl2xp1 // log2(x) : y
+ jmp 8f
+
+7: fyl2x // log2(x) : y
+8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
+ fst %st(1) // y*log2(x) : y*log2(x)
+ frndint // int(y*log2(x)) : y*log2(x)
+ fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
+ fxch // fract(y*log2(x)) : int(y*log2(x))
+ f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
+ faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
+ fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+ fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
+ ret
+
+28: fstp %st(1) // y*log2(x)
+ fldl MO(one) // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ fstp %st(1) // 2^(y*log2(x))
+ ret
+
+ // pow(x,±0) = 1
+ .align ALIGNARG(4)
+11: fstp %st(0) // pop y
+ fldl MO(one)
+ ret
+
+ // y == ±inf
+ .align ALIGNARG(4)
+12: fstp %st(0) // pop y
+ fldt 8(%rsp) // x
+ fabs
+ fcompl MO(one) // < 1, == 1, or > 1
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x45, %ah
+ je 13f // jump if x is NaN
+
+ cmpb $0x40, %ah
+ je 14f // jump if |x| == 1
+
+ shlb $1, %ah
+ xorb %ah, %dl
+ andl $2, %edx
+#ifdef PIC
+ lea inf_zero(%rip),%rcx
+ fldl (%rcx, %rdx, 4)
+#else
+ fldl inf_zero(,%rdx, 4)
+#endif
+ ret
+
+ .align ALIGNARG(4)
+14: fldl MO(one)
+ ret
+
+ .align ALIGNARG(4)
+13: fldt 8(%rsp) // load x == NaN
+ ret
+
+ .align ALIGNARG(4)
+ // x is ±inf
+15: fstp %st(0) // y
+ testb $2, %dh
+ jz 16f // jump if x == +inf
+
+ // We must find out whether y is an odd integer.
+ fld %st // y : y
+ fistpll -8(%rsp) // y
+ fildll -8(%rsp) // int(y) : y
+ fucomip %st(1),%st
+ ffreep %st // <empty>
+ jne 17f
+
+ // OK, the value is an integer, but is it odd?
+ mov -8(%rsp), %eax
+ mov -4(%rsp), %edx
+ andb $1, %al
+ jz 18f // jump if not odd
+ // It's an odd integer.
+ shrl $31, %edx
+#ifdef PIC
+ lea minf_mzero(%rip),%rcx
+ fldl (%rcx, %rdx, 8)
+#else
+ fldl minf_mzero(,%rdx, 8)
+#endif
+ ret
+
+ .align ALIGNARG(4)
+16: fcompl MO(zero)
+ fnstsw
+ shrl $5, %eax
+ andl $8, %eax
+#ifdef PIC
+ lea inf_zero(%rip),%rcx
+ fldl (%rcx, %rax, 1)
+#else
+ fldl inf_zero(,%rax, 1)
+#endif
+ ret
+
+ .align ALIGNARG(4)
+17: shll $30, %edx // sign bit for y in right position
+18: shrl $31, %edx
+#ifdef PIC
+ lea inf_zero(%rip),%rcx
+ fldl (%rcx, %rdx, 8)
+#else
+ fldl inf_zero(,%rdx, 8)
+#endif
+ ret
+
+ .align ALIGNARG(4)
+ // x is ±0
+20: fstp %st(0) // y
+ testb $2, %dl
+ jz 21f // y > 0
+
+ // x is ±0 and y is < 0. We must find out whether y is an odd integer.
+ testb $2, %dh
+ jz 25f
+
+ fld %st // y : y
+ fistpll -8(%rsp) // y
+ fildll -8(%rsp) // int(y) : y
+ fucomip %st(1),%st
+ ffreep %st // <empty>
+ jne 26f
+
+ // OK, the value is an integer, but is it odd?
+ mov -8(%rsp),%eax
+ mov -4(%rsp),%edx
+ andb $1, %al
+ jz 27f // jump if not odd
+ // It's an odd integer.
+ // Raise divide-by-zero exception and get minus infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ fchs
+ ret
+
+25: fstp %st(0)
+26:
+27: // Raise divide-by-zero exception and get infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ ret
+
+ .align ALIGNARG(4)
+ // x is ±0 and y is > 0. We must find out whether y is an odd integer.
+21: testb $2, %dh
+ jz 22f
+
+ fld %st // y : y
+ fistpll -8(%rsp) // y
+ fildll -8(%rsp) // int(y) : y
+ fucomip %st(1),%st
+ ffreep %st // <empty>
+ jne 23f
+
+ // OK, the value is an integer, but is it odd?
+ mov -8(%rsp),%eax
+ mov -4(%rsp),%edx
+ andb $1, %al
+ jz 24f // jump if not odd
+ // It's an odd integer.
+ fldl MO(mzero)
+ ret
+
+22: fstp %st(0)
+23:
+24: fldl MO(zero)
+ ret
+
+/*END(__ieee754_powl)*/
+END(RT_NOCRT(powl))
+
diff --git a/src/recompiler/Sun/e_powl-x86.S b/src/recompiler/Sun/e_powl-x86.S
new file mode 100644
index 00000000..cbc99256
--- /dev/null
+++ b/src/recompiler/Sun/e_powl-x86.S
@@ -0,0 +1,413 @@
+/* ix87 specific implementation of pow function.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004, 2005
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+/*#include <machine/asm.h>*/
+#include <iprt/cdefs.h>
+
+#ifdef __MINGW32__
+# define ASM_TYPE_DIRECTIVE(name,typearg)
+# define ASM_SIZE_DIRECTIVE(name)
+# define cfi_adjust_cfa_offset(a)
+# define C_LABEL(name) _ ## name:
+# define C_SYMBOL_NAME(name) _ ## name
+# define ASM_GLOBAL_DIRECTIVE .global
+# define ALIGNARG(log2) 1<<log2
+#elif __APPLE__
+# define ASM_TYPE_DIRECTIVE(name,typearg)
+# define ASM_SIZE_DIRECTIVE(name)
+# define cfi_adjust_cfa_offset(a)
+# define C_LABEL(name) _ ## name:
+# define C_SYMBOL_NAME(name) _ ## name
+# define ASM_GLOBAL_DIRECTIVE .globl
+# define ALIGNARG(log2) log2
+#else
+# define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg;
+# define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
+# define C_LABEL(name) name:
+# define C_SYMBOL_NAME(name) name
+# /* figure this one out. */
+# define cfi_adjust_cfa_offset(a)
+# define ASM_GLOBAL_DIRECTIVE .global
+# define ALIGNARG(log2) 1<<log2
+#endif
+
+#define ENTRY(name) \
+ ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \
+ ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \
+ .align ALIGNARG(4); \
+ C_LABEL(name)
+
+#undef END
+#define END(name) \
+ ASM_SIZE_DIRECTIVE(name)
+
+#ifdef __ELF__
+ .section .rodata
+#else
+ .text
+#endif
+
+ .align ALIGNARG(4)
+ ASM_TYPE_DIRECTIVE(infinity,@object)
+inf_zero:
+infinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
+ ASM_SIZE_DIRECTIVE(infinity)
+ ASM_TYPE_DIRECTIVE(zero,@object)
+zero: .double 0.0
+ ASM_SIZE_DIRECTIVE(zero)
+ ASM_TYPE_DIRECTIVE(minf_mzero,@object)
+minf_mzero:
+minfinity:
+ .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
+mzero:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0x80
+ ASM_SIZE_DIRECTIVE(minf_mzero)
+ ASM_TYPE_DIRECTIVE(one,@object)
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ ASM_TYPE_DIRECTIVE(limit,@object)
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
+
+#ifdef PIC
+#define MO(op) op##@GOTOFF(%ecx)
+#define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
+#else
+#define MO(op) op
+#define MOX(op,x,f) op(,x,f)
+#endif
+
+ .text
+//ENTRY(__ieee754_powl)
+ENTRY(RT_NOCRT(powl))
+#ifdef RT_OS_DARWIN /* 16-byte long double with 8 byte alignment requirements */
+ fldt 20(%esp) // y
+#else
+ fldt 16(%esp) // y
+#endif
+ fxam
+
+#ifdef PIC
+ LOAD_PIC_REG (cx)
+#endif
+
+ fnstsw
+ movb %ah, %dl
+ andb $0x45, %ah
+ cmpb $0x40, %ah // is y == 0 ?
+ je .L11
+
+ cmpb $0x05, %ah // is y == ±inf ?
+ je .L12
+
+ cmpb $0x01, %ah // is y == NaN ?
+ je .L30
+
+ fldt 4(%esp) // x : y
+
+ subl $8,%esp
+ cfi_adjust_cfa_offset (8)
+
+ fxam
+ fnstsw
+ movb %ah, %dh
+ andb $0x45, %ah
+ cmpb $0x40, %ah
+ je .L20 // x is ±0
+
+ cmpb $0x05, %ah
+ je .L15 // x is ±inf
+
+ fxch // y : x
+
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc .L2
+
+ /* First see whether `y' is a natural number. In this case we
+ can use a more precise algorithm. */
+ fld %st // y : y : x
+ fistpll (%esp) // y : x
+ fildll (%esp) // int(y) : y : x
+ fucomp %st(1) // y : x
+ fnstsw
+ sahf
+ jne .L2
+
+ /* OK, we have an integer value for y. */
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ orl $0, %edx
+ fstp %st(0) // x
+ jns .L4 // y >= 0, jump
+ fdivrl MO(one) // 1/x (now referred to as x)
+ negl %eax
+ adcl $0, %edx
+ negl %edx
+.L4: fldl MO(one) // 1 : x
+ fxch
+
+.L6: shrdl $1, %edx, %eax
+ jnc .L5
+ fxch
+ fmul %st(1) // x : ST*x
+ fxch
+.L5: fmul %st(0), %st // x*x : ST*x
+ shrl $1, %edx
+ movl %eax, %ecx
+ orl %edx, %ecx
+ jnz .L6
+ fstp %st(0) // ST*x
+ ret
+
+ /* y is ±NAN */
+.L30: fldt 4(%esp) // x : y
+ fldl MO(one) // 1.0 : x : y
+ fucomp %st(1) // x : y
+ fnstsw
+ sahf
+ je .L31
+ fxch // y : x
+.L31: fstp %st(1)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+.L2: /* y is a real number. */
+ fxch // x : y
+ fldl MO(one) // 1.0 : x : y
+ fld %st(1) // x : 1.0 : x : y
+ fsub %st(1) // x-1 : 1.0 : x : y
+ fabs // |x-1| : 1.0 : x : y
+ fcompl MO(limit) // 1.0 : x : y
+ fnstsw
+ fxch // x : 1.0 : y
+ sahf
+ ja .L7
+ fsub %st(1) // x-1 : 1.0 : y
+ fyl2xp1 // log2(x) : y
+ jmp .L8
+
+.L7: fyl2x // log2(x) : y
+.L8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je .L28
+ fst %st(1) // y*log2(x) : y*log2(x)
+ frndint // int(y*log2(x)) : y*log2(x)
+ fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
+ fxch // fract(y*log2(x)) : int(y*log2(x))
+ f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
+ faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
+ fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
+ ret
+
+ cfi_adjust_cfa_offset (8)
+.L28: fstp %st(1) // y*log2(x)
+ fldl MO(one) // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fstp %st(1) // 2^(y*log2(x))
+ ret
+
+ // pow(x,±0) = 1
+ .align ALIGNARG(4)
+.L11: fstp %st(0) // pop y
+ fldl MO(one)
+ ret
+
+ // y == ±inf
+ .align ALIGNARG(4)
+.L12: fstp %st(0) // pop y
+ fldt 4(%esp) // x
+ fabs
+ fcompl MO(one) // < 1, == 1, or > 1
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x45, %ah
+ je .L13 // jump if x is NaN
+
+ cmpb $0x40, %ah
+ je .L14 // jump if |x| == 1
+
+ shlb $1, %ah
+ xorb %ah, %dl
+ andl $2, %edx
+ fldl MOX(inf_zero, %edx, 4)
+ ret
+
+ .align ALIGNARG(4)
+.L14: fldl MO(one)
+ ret
+
+ .align ALIGNARG(4)
+.L13: fldt 4(%esp) // load x == NaN
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±inf
+.L15: fstp %st(0) // y
+ testb $2, %dh
+ jz .L16 // jump if x == +inf
+
+ // We must find out whether y is an odd integer.
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne .L17
+
+ // OK, the value is an integer, but is it odd?
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz .L18 // jump if not odd
+ // It's an odd integer.
+ shrl $31, %edx
+ fldl MOX(minf_mzero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+.L16: fcompl MO(zero)
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+ fnstsw
+ shrl $5, %eax
+ andl $8, %eax
+ fldl MOX(inf_zero, %eax, 1)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+.L17: shll $30, %edx // sign bit for y in right position
+ addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+.L18: shrl $31, %edx
+ fldl MOX(inf_zero, %edx, 8)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±0
+.L20: fstp %st(0) // y
+ testb $2, %dl
+ jz .L21 // y > 0
+
+ // x is ±0 and y is < 0. We must find out whether y is an odd integer.
+ testb $2, %dh
+ jz .L25
+
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne .L26
+
+ // OK, the value is an integer, but is it odd?
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz .L27 // jump if not odd
+ // It's an odd integer.
+ // Raise divide-by-zero exception and get minus infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ fchs
+ ret
+
+ cfi_adjust_cfa_offset (8)
+.L25: fstp %st(0)
+.L26: addl $8, %esp
+ cfi_adjust_cfa_offset (-8)
+.L27: // Raise divide-by-zero exception and get infinity value.
+ fldl MO(one)
+ fdivl MO(zero)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+ .align ALIGNARG(4)
+ // x is ±0 and y is > 0. We must find out whether y is an odd integer.
+.L21: testb $2, %dh
+ jz .L22
+
+ fld %st // y : y
+ fistpll (%esp) // y
+ fildll (%esp) // int(y) : y
+ fucompp // <empty>
+ fnstsw
+ sahf
+ jne .L23
+
+ // OK, the value is an integer, but is it odd?
+ popl %eax
+ cfi_adjust_cfa_offset (-4)
+ popl %edx
+ cfi_adjust_cfa_offset (-4)
+ andb $1, %al
+ jz .L24 // jump if not odd
+ // It's an odd integer.
+ fldl MO(mzero)
+ ret
+
+ cfi_adjust_cfa_offset (8)
+.L22: fstp %st(0)
+.L23: addl $8, %esp // Don't use 2 x pop
+ cfi_adjust_cfa_offset (-8)
+.L24: fldl MO(zero)
+ ret
+
+END(RT_NOCRT(powl))
+//END(__ieee754_powl)
diff --git a/src/recompiler/Sun/kvm.h b/src/recompiler/Sun/kvm.h
new file mode 100644
index 00000000..0403e3e4
--- /dev/null
+++ b/src/recompiler/Sun/kvm.h
@@ -0,0 +1,28 @@
+/* $Id: kvm.h $ */
+/** @file
+ * VBox Recompiler - kvm stub header.
+ */
+
+/*
+ * Copyright (C) 2011-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef ___kvm_stub_h___
+#define ___kvm_stub_h___
+
+#define kvm_enabled() false
+#define kvm_update_guest_debug(a, b) AssertFailed()
+#define kvm_set_phys_mem(a, b, c) AssertFailed()
+#define kvm_arch_get_registers(a) AssertFailed()
+#define cpu_synchronize_state(a) do { } while (0)
+
+#endif
+
diff --git a/src/recompiler/Sun/testmath.c b/src/recompiler/Sun/testmath.c
new file mode 100644
index 00000000..b1650753
--- /dev/null
+++ b/src/recompiler/Sun/testmath.c
@@ -0,0 +1,828 @@
+/* $Id: testmath.c $ */
+/** @file
+ * Testcase for the no-crt math stuff.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#ifndef MATHTEST_STANDALONE
+# include <iprt/assert.h>
+# include <math.h>
+# undef printf
+# define printf RTAssertMsg2Weak
+#else
+# include <stdio.h>
+# include <math.h>
+#endif
+
+/* gcc starting with version 4.3 uses the MPFR library which results in more accurate results. gcc-4.3.1 seems to emit the less accurate result. So just allow both results. */
+#define SIN180a -0.8011526357338304777463731115L
+#define SIN180b -0.801152635733830477871L
+
+static void bitch(const char *pszWhat, const long double *plrdResult, const long double *plrdExpected)
+{
+ const unsigned char *pach1 = (const unsigned char *)plrdResult;
+ const unsigned char *pach2 = (const unsigned char *)plrdExpected;
+#ifndef MATHTEST_STANDALONE
+ printf("error: %s - %d instead of %d\n", pszWhat, (int)(*plrdResult * 100000), (int)(*plrdExpected * 100000));
+#else
+ printf("error: %s - %.25f instead of %.25f\n", pszWhat, (double)*plrdResult, (double)*plrdExpected);
+#endif
+ printf(" %02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x\n", pach1[0], pach1[1], pach1[2], pach1[3], pach1[4], pach1[5], pach1[6], pach1[7], pach1[8], pach1[9]);
+ printf(" %02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x\n", pach2[0], pach2[1], pach2[2], pach2[3], pach2[4], pach2[5], pach2[6], pach2[7], pach2[8], pach2[9]);
+}
+
+static void bitchll(const char *pszWhat, long long llResult, long long llExpected)
+{
+#if defined(__MINGW32__) && !defined(Assert)
+ printf("error: %s - %I64d instead of %I64d\n", pszWhat, llResult, llExpected);
+#else
+ printf("error: %s - %lld instead of %lld\n", pszWhat, llResult, llExpected);
+#endif
+}
+
+static void bitchl(const char *pszWhat, long lResult, long lExpected)
+{
+ printf("error: %s - %ld instead of %ld\n", pszWhat, lResult, lExpected);
+}
+
+extern int testsin(void)
+{
+ return sinl(180.0L) == SIN180a || sinl(180.0L) == SIN180b;
+}
+
+extern int testremainder(void)
+{
+ static double s_rd1 = 2.5;
+ static double s_rd2 = 2.0;
+ static double s_rd3 = 0.5;
+ return remainder(s_rd1, s_rd2) == s_rd3;
+}
+
+static __inline__ void set_cw(unsigned cw)
+{
+ __asm __volatile("fldcw %0" : : "m" (cw));
+}
+
+static __inline__ unsigned get_cw(void)
+{
+ unsigned cw;
+ __asm __volatile("fstcw %0" : "=m" (cw));
+ return cw & 0xffff;
+}
+
+static long double check_lrd(const long double lrd, const unsigned long long ull, const unsigned short us)
+{
+ static volatile long double lrd2;
+ lrd2 = lrd;
+ if ( *(unsigned long long *)&lrd2 != ull
+ || ((unsigned short *)&lrd2)[4] != us)
+ {
+#if defined(__MINGW32__) && !defined(Assert)
+ printf("%I64x:%04x instead of %I64x:%04x\n", *(unsigned long long *)&lrd2, ((unsigned short *)&lrd2)[4], ull, us);
+#else
+ printf("%llx:%04x instead of %llx:%04x\n", *(unsigned long long *)&lrd2, ((unsigned short *)&lrd2)[4], ull, us);
+#endif
+ __asm__("int $3\n");
+ }
+ return lrd;
+}
+
+
+static long double make_lrd(const unsigned long long ull, const unsigned short us)
+{
+ union
+ {
+ long double lrd;
+ struct
+ {
+ unsigned long long ull;
+ unsigned short us;
+ } i;
+ } u;
+
+ u.i.ull = ull;
+ u.i.us = us;
+ return u.lrd;
+}
+
+static long double check_lrd_cw(const long double lrd, const unsigned long long ull, const unsigned short us, const unsigned cw)
+{
+ set_cw(cw);
+ if (cw != get_cw())
+ {
+ printf("get_cw() -> %#x expected %#x\n", get_cw(), cw);
+ __asm__("int $3\n");
+ }
+ return check_lrd(lrd, ull, us);
+}
+
+static long double make_lrd_cw(unsigned long long ull, unsigned short us, unsigned cw)
+{
+ set_cw(cw);
+ return check_lrd_cw(make_lrd(ull, us), ull, us, cw);
+}
+
+extern int testmath(void)
+{
+ unsigned cErrors = 0;
+ long double lrdResult;
+ long double lrdExpect;
+ long double lrd;
+#define CHECK(operation, expect) \
+ do { \
+ lrdExpect = expect; \
+ lrdResult = operation; \
+ if (lrdResult != lrdExpect) \
+ { \
+ bitch(#operation, &lrdResult, &lrdExpect); \
+ cErrors++; \
+ } \
+ } while (0)
+
+ long long llResult;
+ long long llExpect;
+#define CHECKLL(operation, expect) \
+ do { \
+ llExpect = expect; \
+ llResult = operation; \
+ if (llResult != llExpect) \
+ { \
+ bitchll(#operation, llResult, llExpect); \
+ cErrors++; \
+ } \
+ } while (0)
+
+ long lResult;
+ long lExpect;
+#define CHECKL(operation, expect) \
+ do { \
+ lExpect = expect; \
+ lResult = operation; \
+ if (lResult != lExpect) \
+ { \
+ bitchl(#operation, lResult, lExpect); \
+ cErrors++; \
+ } \
+ } while (0)
+
+ CHECK(atan2l(1.0L, 1.0L), 0.785398163397448309603L);
+ CHECK(atan2l(2.3L, 3.3L), 0.608689307327411694890L);
+
+ CHECK(ceill(1.9L), 2.0L);
+ CHECK(ceill(4.5L), 5.0L);
+ CHECK(ceill(3.3L), 4.0L);
+ CHECK(ceill(6.1L), 7.0L);
+
+ CHECK(floorl(1.9L), 1.0L);
+ CHECK(floorl(4.5L), 4.0L);
+ CHECK(floorl(7.3L), 7.0L);
+ CHECK(floorl(1234.1L), 1234.0L);
+ CHECK(floor(1233.1), 1233.0);
+ CHECK(floor(1239.98989898), 1239.0);
+ CHECK(floorf(9999.999), 9999.0);
+
+ CHECK(ldexpl(1.0L, 1), 2.0L);
+ CHECK(ldexpl(1.0L, 10), 1024.0L);
+ CHECK(ldexpl(2.25L, 10), 2304.0L);
+
+ CHECKLL(llrintl(1.0L), 1);
+ CHECKLL(llrintl(1.3L), 1);
+ CHECKLL(llrintl(1.5L), 2);
+ CHECKLL(llrintl(1.9L), 2);
+ CHECKLL(llrintf(123.34), 123);
+ CHECKLL(llrintf(-123.50), -124);
+ CHECKLL(llrint(42.42), 42);
+ CHECKLL(llrint(-2147483648.12343), -2147483648LL);
+#if !defined(RT_ARCH_AMD64)
+ CHECKLL(lrint(-21474836499.12343), -2147483648LL);
+ CHECKLL(lrint(-2147483649932412.12343), -2147483648LL);
+#else
+ CHECKLL(lrint(-21474836499.12343), -21474836499L);
+ CHECKLL(lrint(-2147483649932412.12343), -2147483649932412L);
+#endif
+
+// __asm__("int $3");
+ CHECKL(lrintl(make_lrd_cw(000000000000000000ULL,000000,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x067f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x067f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0a7f)), 1L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0a7f)), 1L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x067f)), -1L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x067f)), -1L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0a7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0a7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x067f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x067f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0a7f)), 1L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0a7f)), 1L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x027f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x067f)), -1L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x067f)), -1L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0a7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0a7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0e7f)), 0L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x027f)), 32768L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x027f)), 32768L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x067f)), 32768L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x067f)), 32768L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0a7f)), 32768L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0a7f)), 32768L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0e7f)), 32768L);
+ CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0e7f)), 32768L);
+#if !defined(RT_ARCH_AMD64)
+ /* c90 says that the constant is 2147483648 (which is not representable as a signed 32-bit
+ * value). To that constant you've then applied the negation operation. c90 doesn't have
+ * negative constants, only positive ones that have been negated. */
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x027f)), (long)(-2147483647L - 1));
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x027f)), (long)(-2147483647L - 1));
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x067f)), (long)(-2147483647L - 1));
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x067f)), (long)(-2147483647L - 1));
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0a7f)), (long)(-2147483647L - 1));
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0a7f)), (long)(-2147483647L - 1));
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0e7f)), (long)(-2147483647L - 1));
+ CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0e7f)), (long)(-2147483647L - 1));
+#endif
+ set_cw(0x27f);
+
+ CHECK(logl(2.7182818284590452353602874713526625L), 1.0);
+
+ CHECK(remainderl(1.0L, 1.0L), 0.0);
+ CHECK(remainderl(1.0L, 1.5L), -0.5);
+ CHECK(remainderl(42.0L, 34.25L), 7.75);
+ CHECK(remainderf(43.0, 34.25), 8.75);
+ CHECK(remainder(44.25, 34.25), 10.00);
+ double rd1 = 44.25;
+ double rd2 = 34.25;
+ CHECK(remainder(rd1, rd2), 10.00);
+ CHECK(remainder(2.5, 2.0), 0.5);
+ CHECK(remainder(2.5, 2.0), 0.5);
+ CHECK(remainder(2.5, 2.0), 0.5);
+ CHECKLL(testremainder(), 1);
+
+
+ /* Only works in extended precision, while double precision is default on BSD (including Darwin) */
+ set_cw(0x37f);
+ CHECK(rintl(1.0L), 1.0);
+ CHECK(rintl(1.4L), 1.0);
+ CHECK(rintl(1.3L), 1.0);
+ CHECK(rintl(0.9L), 1.0);
+ CHECK(rintl(3123.1232L), 3123.0);
+ CHECK(rint(3985.13454), 3985.0);
+ CHECK(rintf(9999.999), 10000.0);
+ set_cw(0x27f);
+
+ CHECK(sinl(1.0L), 0.84147098480789650664L);
+#if 0
+ lrd = 180.0L;
+ CHECK(sinl(lrd), -0.801152635733830477871L);
+#else
+ lrd = 180.0L;
+ lrdExpect = SIN180a;
+ lrdResult = sinl(lrd);
+ if (lrdResult != lrdExpect)
+ {
+ lrdExpect = SIN180b;
+ if (lrdResult != lrdExpect)
+ {
+ bitch("sinl(lrd)", &lrdResult, &lrdExpect);
+ cErrors++;
+ }
+ }
+#endif
+#if 0
+ CHECK(sinl(180.0L), SIN180);
+#else
+ lrdExpect = SIN180a;
+ lrdResult = sinl(180.0L);
+ if (lrdResult != lrdExpect)
+ {
+ lrdExpect = SIN180b;
+ if (lrdResult != lrdExpect)
+ {
+ bitch("sinl(180.0L)", &lrdResult, &lrdExpect);
+ cErrors++;
+ }
+ }
+#endif
+ CHECKLL(testsin(), 1);
+
+ CHECK(sqrtl(1.0L), 1.0);
+ CHECK(sqrtl(4.0L), 2.0);
+ CHECK(sqrtl(1525225.0L), 1235.0);
+
+ CHECK(tanl(0.0L), 0.0);
+ CHECK(tanl(0.7853981633974483096156608458198757L), 1.0);
+
+ CHECK(powl(0.0, 0.0), 1.0);
+ CHECK(powl(2.0, 2.0), 4.0);
+ CHECK(powl(3.0, 3.0), 27.0);
+
+ return cErrors;
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+#if 0
+
+#define floatx_to_int32 floatx80_to_int32
+#define floatx_to_int64 floatx80_to_int64
+#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
+#define floatx_abs floatx80_abs
+#define floatx_chs floatx80_chs
+#define floatx_round_to_int(foo, bar) floatx80_round_to_int(foo, NULL)
+#define floatx_compare floatx80_compare
+#define floatx_compare_quiet floatx80_compare_quiet
+#undef sin
+#undef cos
+#undef sqrt
+#undef pow
+#undef log
+#undef tan
+#undef atan2
+#undef floor
+#undef ceil
+#undef ldexp
+#define sin sinl
+#define cos cosl
+#define sqrt sqrtl
+#define pow powl
+#define log logl
+#define tan tanl
+#define atan2 atan2l
+#define floor floorl
+#define ceil ceill
+#define ldexp ldexpl
+
+
+typedef long double CPU86_LDouble;
+
+typedef union {
+ long double d;
+ struct {
+ unsigned long long lower;
+ unsigned short upper;
+ } l;
+} CPU86_LDoubleU;
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp) (fp.l.upper & 0x7fff)
+#define SIGND(fp) ((fp.l.upper) & 0x8000)
+#define MANTD(fp) (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+typedef long double floatx80;
+#define STATUS_PARAM , void *pv
+
+static floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM)
+{
+ return rintl(a);
+}
+
+
+
+struct myenv
+{
+ unsigned int fpstt; /* top of stack index */
+ unsigned int fpus;
+ unsigned int fpuc;
+ unsigned char fptags[8]; /* 0 = valid, 1 = empty */
+ union {
+#ifdef USE_X86LDOUBLE
+ CPU86_LDouble d __attribute__((aligned(16)));
+#else
+ CPU86_LDouble d;
+#endif
+ } fpregs[8];
+
+} my_env, env_org, env_res, *env = &my_env;
+
+
+#define ST0 (env->fpregs[env->fpstt].d)
+#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1 ST(1)
+#define MAXTAN 9223372036854775808.0
+
+
+static inline void fpush(void)
+{
+ env->fpstt = (env->fpstt - 1) & 7;
+ env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(void)
+{
+ env->fptags[env->fpstt] = 1; /* invalidate stack entry */
+ env->fpstt = (env->fpstt + 1) & 7;
+}
+
+static void helper_f2xm1(void)
+{
+ ST0 = pow(2.0,ST0) - 1.0;
+}
+
+static void helper_fyl2x(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if (fptemp>0.0){
+ fptemp = log(fptemp)/log(2.0); /* log2(ST) */
+ ST1 *= fptemp;
+ fpop();
+ } else {
+ env->fpus &= (~0x4700);
+ env->fpus |= 0x400;
+ }
+}
+
+static void helper_fptan(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = tan(fptemp);
+ fpush();
+ ST0 = 1.0;
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**52 only */
+ }
+}
+
+static void helper_fpatan(void)
+{
+ CPU86_LDouble fptemp, fpsrcop;
+
+ fpsrcop = ST1;
+ fptemp = ST0;
+ ST1 = atan2(fpsrcop,fptemp);
+ fpop();
+}
+
+static void helper_fxtract(void)
+{
+ CPU86_LDoubleU temp;
+ unsigned int expdif;
+
+ temp.d = ST0;
+ expdif = EXPD(temp) - EXPBIAS;
+ /*DP exponent bias*/
+ ST0 = expdif;
+ fpush();
+ BIASEXPONENT(temp);
+ ST0 = temp.d;
+}
+
+static void helper_fprem1(void)
+{
+ CPU86_LDouble dblq, fpsrcop, fptemp;
+ CPU86_LDoubleU fpsrcop1, fptemp1;
+ int expdif;
+ int q;
+
+ fpsrcop = ST0;
+ fptemp = ST1;
+ fpsrcop1.d = fpsrcop;
+ fptemp1.d = fptemp;
+ expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+ if (expdif < 53) {
+ dblq = fpsrcop / fptemp;
+ dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
+ ST0 = fpsrcop - fptemp*dblq;
+ q = (int)dblq; /* cutting off top bits is assumed here */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* (C0,C1,C3) <-- (q2,q1,q0) */
+ env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
+ env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
+ env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
+ } else {
+ env->fpus |= 0x400; /* C2 <-- 1 */
+ fptemp = pow(2.0, expdif-50);
+ fpsrcop = (ST0 / ST1) / fptemp;
+ /* fpsrcop = integer obtained by rounding to the nearest */
+ fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)?
+ floor(fpsrcop): ceil(fpsrcop);
+ ST0 -= (ST1 * fpsrcop * fptemp);
+ }
+}
+
+static void helper_fprem(void)
+{
+#if 0
+LogFlow(("helper_fprem: ST0=%.*Rhxs ST1=%.*Rhxs fpus=%#x\n", sizeof(ST0), &ST0, sizeof(ST1), &ST1, env->fpus));
+
+ __asm__ __volatile__("fldt (%2)\n"
+ "fldt (%1)\n"
+ "fprem \n"
+ "fnstsw (%0)\n"
+ "fstpt (%1)\n"
+ "fstpt (%2)\n"
+ : : "r" (&env->fpus), "r" (&ST0), "r" (&ST1) : "memory");
+
+LogFlow(("helper_fprem: -> ST0=%.*Rhxs fpus=%#x c\n", sizeof(ST0), &ST0, env->fpus));
+#else
+ CPU86_LDouble dblq, fpsrcop, fptemp;
+ CPU86_LDoubleU fpsrcop1, fptemp1;
+ int expdif;
+ int q;
+
+ fpsrcop = ST0;
+ fptemp = ST1;
+ fpsrcop1.d = fpsrcop;
+ fptemp1.d = fptemp;
+
+ expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+ if ( expdif < 53 ) {
+ dblq = fpsrcop / fptemp;
+ dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
+ ST0 = fpsrcop - fptemp*dblq;
+ q = (int)dblq; /* cutting off top bits is assumed here */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* (C0,C1,C3) <-- (q2,q1,q0) */
+ env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
+ env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
+ env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
+ } else {
+ env->fpus |= 0x400; /* C2 <-- 1 */
+ fptemp = pow(2.0, expdif-50);
+ fpsrcop = (ST0 / ST1) / fptemp;
+ /* fpsrcop = integer obtained by chopping */
+ fpsrcop = (fpsrcop < 0.0)?
+ -(floor(fabs(fpsrcop))): floor(fpsrcop);
+ ST0 -= (ST1 * fpsrcop * fptemp);
+ }
+#endif
+}
+
+static void helper_fyl2xp1(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp+1.0)>0.0) {
+ fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
+ ST1 *= fptemp;
+ fpop();
+ } else {
+ env->fpus &= (~0x4700);
+ env->fpus |= 0x400;
+ }
+}
+
+static void helper_fsqrt(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if (fptemp<0.0) {
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ env->fpus |= 0x400;
+ }
+ ST0 = sqrt(fptemp);
+}
+
+static void helper_fsincos(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = sin(fptemp);
+ fpush();
+ ST0 = cos(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**63 only */
+ }
+}
+
+static void helper_frndint(void)
+{
+ ST0 = floatx_round_to_int(ST0, &env->fp_status);
+}
+
+static void helper_fscale(void)
+{
+ ST0 = ldexp (ST0, (int)(ST1));
+}
+
+static void helper_fsin(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = sin(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**53 only */
+ }
+}
+
+static void helper_fcos(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = cos(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg5 < 2**63 only */
+ }
+}
+
+static void helper_fxam_ST0(void)
+{
+ CPU86_LDoubleU temp;
+ int expdif;
+
+ temp.d = ST0;
+
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ if (SIGND(temp))
+ env->fpus |= 0x200; /* C1 <-- 1 */
+
+ /* XXX: test fptags too */
+ expdif = EXPD(temp);
+ if (expdif == MAXEXPD) {
+#ifdef USE_X86LDOUBLE
+ if (MANTD(temp) == 0x8000000000000000ULL)
+#else
+ if (MANTD(temp) == 0)
+#endif
+ env->fpus |= 0x500 /*Infinity*/;
+ else
+ env->fpus |= 0x100 /*NaN*/;
+ } else if (expdif == 0) {
+ if (MANTD(temp) == 0)
+ env->fpus |= 0x4000 /*Zero*/;
+ else
+ env->fpus |= 0x4400 /*Denormal*/;
+ } else {
+ env->fpus |= 0x400;
+ }
+}
+
+
+void check_env(void)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ {
+ CPU86_LDoubleU my, res;
+ my.d = env->fpregs[i].d;
+ res.d = env_res.fpregs[i].d;
+
+ if ( my.l.lower != res.l.lower
+ || my.l.upper != res.l.upper)
+ printf("register %i: %#018llx:%#06x\n"
+ " expected %#018llx:%#06x\n",
+ i,
+ my.l.lower, my.l.upper,
+ res.l.lower, res.l.upper);
+ }
+ for (i = 0; i < 8; i++)
+ if (env->fptags[i] != env_res.fptags[i])
+ printf("tag %i: %d != %d\n", i, env->fptags[i], env_res.fptags[i]);
+ if (env->fpstt != env_res.fpstt)
+ printf("fpstt: %#06x != %#06x\n", env->fpstt, env_res.fpstt);
+ if (env->fpuc != env_res.fpuc)
+ printf("fpuc: %#06x != %#06x\n", env->fpuc, env_res.fpuc);
+ if (env->fpus != env_res.fpus)
+ printf("fpus: %#06x != %#06x\n", env->fpus, env_res.fpus);
+}
+#endif /* not used. */
+
+#if 0 /* insert this into helper.c */
+/* FPU helpers */
+CPU86_LDoubleU my_st[8];
+unsigned int my_fpstt;
+unsigned int my_fpus;
+unsigned int my_fpuc;
+unsigned char my_fptags[8];
+
+void hlp_fpu_enter(void)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ my_st[i].d = env->fpregs[i].d;
+ my_fpstt = env->fpstt;
+ my_fpus = env->fpus;
+ my_fpuc = env->fpuc;
+ memcpy(&my_fptags, &env->fptags, sizeof(my_fptags));
+}
+
+void hlp_fpu_leave(const char *psz)
+{
+ int i;
+ Log(("/*code*/ \n"));
+ for (i = 0; i < 8; i++)
+ Log(("/*code*/ *(unsigned long long *)&env_org.fpregs[%d] = %#018llxULL; ((unsigned short *)&env_org.fpregs[%d])[4] = %#06x; env_org.fptags[%d]=%d;\n",
+ i, my_st[i].l.lower, i, my_st[i].l.upper, i, my_fptags[i]));
+ Log(("/*code*/ env_org.fpstt=%#x;\n", my_fpstt));
+ Log(("/*code*/ env_org.fpus=%#x;\n", my_fpus));
+ Log(("/*code*/ env_org.fpuc=%#x;\n", my_fpuc));
+ for (i = 0; i < 8; i++)
+ {
+ CPU86_LDoubleU u;
+ u.d = env->fpregs[i].d;
+ Log(("/*code*/ *(unsigned long long *)&env_res.fpregs[%d] = %#018llxULL; ((unsigned short *)&env_res.fpregs[%d])[4] = %#06x; env_res.fptags[%d]=%d;\n",
+ i, u.l.lower, i, u.l.upper, i, env->fptags[i]));
+ }
+ Log(("/*code*/ env_res.fpstt=%#x;\n", env->fpstt));
+ Log(("/*code*/ env_res.fpus=%#x;\n", env->fpus));
+ Log(("/*code*/ env_res.fpuc=%#x;\n", env->fpuc));
+
+ Log(("/*code*/ my_env = env_org;\n"));
+ Log(("/*code*/ %s();\n", psz));
+ Log(("/*code*/ check_env();\n"));
+}
+#endif /* helper.c */
+
+extern void testmath2(void )
+{
+#if 0
+#include "/tmp/code.h"
+#endif
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef MATHTEST_STANDALONE
+
+void test_fops(double a, double b)
+{
+ printf("a=%f b=%f a+b=%f\n", a, b, a + b);
+ printf("a=%f b=%f a-b=%f\n", a, b, a - b);
+ printf("a=%f b=%f a*b=%f\n", a, b, a * b);
+ printf("a=%f b=%f a/b=%f\n", a, b, a / b);
+ printf("a=%f b=%f fmod(a, b)=%f\n", a, b, (double)fmod(a, b));
+ printf("a=%f sqrt(a)=%f\n", a, (double)sqrtl(a));
+ printf("a=%f sin(a)=%f\n", a, (double)sinl(a));
+ printf("a=%f cos(a)=%f\n", a, (double)cos(a));
+ printf("a=%f tan(a)=%f\n", a, (double)tanl(a));
+ printf("a=%f log(a)=%f\n", a, (double)log(a));
+ printf("a=%f exp(a)=%f\n", a, (double)exp(a));
+ printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
+ /* just to test some op combining */
+ printf("a=%f asin(sinl(a))=%f\n", a, (double)asin(sinl(a)));
+ printf("a=%f acos(cos(a))=%f\n", a, (double)acos(cos(a)));
+ printf("a=%f atan(tanl(a))=%f\n", a, (double)atan(tanl(a)));
+}
+
+int main()
+{
+ unsigned cErrors = testmath();
+
+ testmath2();
+ test_fops(2, 3);
+ test_fops(1.4, -5);
+
+ printf("cErrors=%u\n", cErrors);
+ return cErrors;
+}
+#endif
+
diff --git a/src/recompiler/VBoxREM.def b/src/recompiler/VBoxREM.def
new file mode 100644
index 00000000..96e28350
--- /dev/null
+++ b/src/recompiler/VBoxREM.def
@@ -0,0 +1,50 @@
+; $Id: VBoxREM.def $
+;; @file
+; VBoxREM Definition File.
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+LIBRARY VBoxREM.dll
+
+EXPORTS
+ REMR3Init
+ REMR3InitFinalize
+ REMR3Term
+ REMR3Reset
+ REMR3Step
+ REMR3BreakpointSet
+ REMR3BreakpointClear
+ REMR3Run
+ REMR3EmulateInstruction
+ REMR3State
+ REMR3StateBack
+ REMR3StateUpdate
+ REMR3A20Set
+ REMR3DisasEnableStepping
+ REMR3ReplayHandlerNotifications
+ REMR3NotifyPhysRamRegister
+ REMR3NotifyPhysRamDeregister
+ REMR3NotifyPhysRomRegister
+ REMR3NotifyHandlerPhysicalModify
+ REMR3NotifyHandlerPhysicalRegister
+ REMR3NotifyHandlerPhysicalDeregister
+ REMR3NotifyInterruptSet
+ REMR3NotifyInterruptClear
+ REMR3NotifyTimerPending
+ REMR3NotifyDmaPending
+ REMR3NotifyQueuePending
+ REMR3NotifyFF
+ REMR3NotifyCodePageChanged
+ REMR3IsPageAccessHandled
+
diff --git a/src/recompiler/VBoxREM.rc b/src/recompiler/VBoxREM.rc
new file mode 100644
index 00000000..ccf23339
--- /dev/null
+++ b/src/recompiler/VBoxREM.rc
@@ -0,0 +1,51 @@
+/* $Id: VBoxREM.rc $ */
+/** @file
+ * VBoxREM - Resource file containing version info.
+ */
+
+/*
+ * Copyright (C) 2015-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#include <windows.h>
+#include <VBox/version.h>
+
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION VBOX_RC_FILE_VERSION
+ PRODUCTVERSION VBOX_RC_PRODUCT_VERSION
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+ FILEFLAGS VBOX_RC_FILE_FLAGS
+ FILEOS VBOX_RC_FILE_OS
+ FILETYPE VBOX_RC_TYPE_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0" // Lang=US English, CharSet=Unicode
+ BEGIN
+ VALUE "FileDescription", "VirtualBox Recompiler\0"
+ VALUE "InternalName", "VBoxREM\0"
+ VALUE "OriginalFilename", "VBoxREM.dll\0"
+ VALUE "CompanyName", VBOX_RC_COMPANY_NAME
+ VALUE "FileVersion", VBOX_RC_FILE_VERSION_STR
+ VALUE "LegalCopyright", VBOX_RC_LEGAL_COPYRIGHT
+ VALUE "ProductName", VBOX_RC_PRODUCT_NAME_STR
+ VALUE "ProductVersion", VBOX_RC_PRODUCT_VERSION_STR
+ VBOX_RC_MORE_STRINGS
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
diff --git a/src/recompiler/VBoxREMWrapper.cpp b/src/recompiler/VBoxREMWrapper.cpp
new file mode 100644
index 00000000..4349bd9a
--- /dev/null
+++ b/src/recompiler/VBoxREMWrapper.cpp
@@ -0,0 +1,2477 @@
+/* $Id: VBoxREMWrapper.cpp $ */
+/** @file
+ *
+ * VBoxREM Win64 DLL Wrapper.
+ */
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @page pg_vboxrem_amd64 VBoxREM Hacks on AMD64
+ *
+ * There are problems with building BoxREM both on WIN64 and 64-bit linux.
+ *
+ * On linux binutils refuses to link shared objects without -fPIC compiled code
+ * (bitches about some fixup types). But when trying to build with -fPIC dyngen
+ * doesn't like the code anymore. Sweet. The current solution is to build the
+ * VBoxREM code as a relocatable module and use our ELF loader to load it.
+ *
+ * On WIN64 we're not aware of any GCC port which can emit code using the MSC
+ * calling convention. So, we're in for some real fun here. The choice is between
+ * porting GCC to AMD64 WIN64 and coming up with some kind of wrapper around
+ * either the win32 build or the 64-bit linux build.
+ *
+ * -# Porting GCC will be a lot of work. For one thing the calling convention differs
+ * and messing with such stuff can easily create ugly bugs. We would also have to
+ * do some binutils changes, but I think those are rather small compared to GCC.
+ * (That said, the MSC calling convention is far simpler than the linux one, it
+ * reminds me of _Optlink which we have working already.)
+ * -# Wrapping win32 code will work, but addresses outside the first 4GB are
+ * inaccessible and we will have to create 32-64 thunks for all imported functions.
+ * (To switch between 32-bit and 64-bit is load the right CS using far jmps (32->64)
+ * or far returns (both).)
+ * -# Wrapping 64-bit linux code might be the easier solution. The requirements here
+ * are:
+ * - Remove all CRT references we possibly, either by using intrinsics or using
+ * IPRT. Part of IPRT will be linked into VBoxREM2.rel, this will be yet another
+ * IPRT mode which I've dubbed 'no-crt'. The no-crt mode provide basic non-system
+ * dependent stuff.
+ * - Compile and link it into a relocatable object (include the gcc intrinsics
+ * in libgcc). Call this VBoxREM2.rel.
+ * - Write a wrapper dll, VBoxREM.dll, for which during REMR3Init() will load
+ * VBoxREM2.rel (using IPRT) and generate calling convention wrappers
+ * for all IPRT functions and VBoxVMM functions that it uses. All exports
+ * will be wrapped vice versa.
+ * - For building on windows hosts, we will use a mingw32 hosted cross compiler.
+ * and add a 'no-crt' mode to IPRT where it provides the necessary CRT headers
+ * and function implementations.
+ *
+ * The 3rd solution will be tried out first since it requires the least effort and
+ * will let us make use of the full 64-bit register set.
+ *
+ *
+ *
+ * @section sec_vboxrem_amd64_compare Comparing the GCC and MSC calling conventions
+ *
+ * GCC expects the following (cut & past from page 20 in the ABI draft 0.96):
+ *
+ * @verbatim
+ %rax temporary register; with variable arguments passes information about the
+ number of SSE registers used; 1st return register.
+ [Not preserved]
+ %rbx callee-saved register; optionally used as base pointer.
+ [Preserved]
+ %rcx used to pass 4th integer argument to functions.
+ [Not preserved]
+ %rdx used to pass 3rd argument to functions; 2nd return register
+ [Not preserved]
+ %rsp stack pointer
+ [Preserved]
+ %rbp callee-saved register; optionally used as frame pointer
+ [Preserved]
+ %rsi used to pass 2nd argument to functions
+ [Not preserved]
+ %rdi used to pass 1st argument to functions
+ [Not preserved]
+ %r8 used to pass 5th argument to functions
+ [Not preserved]
+ %r9 used to pass 6th argument to functions
+ [Not preserved]
+ %r10 temporary register, used for passing a function's static chain
+ pointer [Not preserved]
+ %r11 temporary register
+ [Not preserved]
+ %r12-r15 callee-saved registers
+ [Preserved]
+ %xmm0-%xmm1 used to pass and return floating point arguments
+ [Not preserved]
+ %xmm2-%xmm7 used to pass floating point arguments
+ [Not preserved]
+ %xmm8-%xmm15 temporary registers
+ [Not preserved]
+ %mmx0-%mmx7 temporary registers
+ [Not preserved]
+ %st0 temporary register; used to return long double arguments
+ [Not preserved]
+ %st1 temporary registers; used to return long double arguments
+ [Not preserved]
+ %st2-%st7 temporary registers
+ [Not preserved]
+ %fs Reserved for system use (as thread specific data register)
+ [Not preserved]
+ @endverbatim
+ *
+ * Direction flag is preserved as cleared.
+ * The stack must be aligned on a 16-byte boundary before the 'call/jmp' instruction.
+ *
+ *
+ *
+ * MSC expects the following:
+ * @verbatim
+ rax return value, not preserved.
+ rbx preserved.
+ rcx 1st argument, integer, not preserved.
+ rdx 2nd argument, integer, not preserved.
+ rbp preserved.
+ rsp preserved.
+ rsi preserved.
+ rdi preserved.
+ r8 3rd argument, integer, not preserved.
+ r9 4th argument, integer, not preserved.
+ r10 scratch register, not preserved.
+ r11 scratch register, not preserved.
+ r12-r15 preserved.
+ xmm0 1st argument, fp, return value, not preserved.
+ xmm1 2st argument, fp, not preserved.
+ xmm2 3st argument, fp, not preserved.
+ xmm3 4st argument, fp, not preserved.
+ xmm4-xmm5 scratch, not preserved.
+ xmm6-xmm15 preserved.
+ @endverbatim
+ *
+ * Dunno what the direction flag is...
+ * The stack must be aligned on a 16-byte boundary before the 'call/jmp' instruction.
+ *
+ *
+ * Thus, When GCC code is calling MSC code we don't really have to preserve
+ * anything. But but MSC code is calling GCC code, we'll have to save esi and edi.
+ *
+ */
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** @def USE_REM_STUBS
+ * Define USE_REM_STUBS to stub the entire REM stuff. This is useful during
+ * early porting (before we start running stuff).
+ */
+#if defined(DOXYGEN_RUNNING)
+# define USE_REM_STUBS
+#endif
+
+/** @def USE_REM_CALLING_CONVENTION_GLUE
+ * Define USE_REM_CALLING_CONVENTION_GLUE for platforms where it's necessary to
+ * use calling convention wrappers.
+ */
+#if (defined(RT_ARCH_AMD64) && defined(RT_OS_WINDOWS)) || defined(DOXYGEN_RUNNING)
+# define USE_REM_CALLING_CONVENTION_GLUE
+#endif
+
+/** @def USE_REM_IMPORT_JUMP_GLUE
+ * Define USE_REM_IMPORT_JUMP_GLUE for platforms where we need to
+ * emit some jump glue to deal with big addresses.
+ */
+#if (defined(RT_ARCH_AMD64) && !defined(USE_REM_CALLING_CONVENTION_GLUE) && !defined(RT_OS_DARWIN)) || defined(DOXYGEN_RUNNING)
+# define USE_REM_IMPORT_JUMP_GLUE
+#endif
+
+/** @def VBOX_USE_BITNESS_SELECTOR
+ * Define VBOX_USE_BITNESS_SELECTOR to build this module as a bitness selector
+ * between VBoxREM32 and VBoxREM64.
+ */
+#if defined(DOXYGEN_RUNNING)
+# define VBOX_USE_BITNESS_SELECTOR
+#endif
+
+/** @def VBOX_WITHOUT_REM_LDR_CYCLE
+ * Define VBOX_WITHOUT_REM_LDR_CYCLE dynamically resolve any dependencies on
+ * VBoxVMM and thus avoid the cyclic dependency between VBoxREM and VBoxVMM.
+ */
+#if defined(DOXYGEN_RUNNING)
+# define VBOX_WITHOUT_REM_LDR_CYCLE
+#endif
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_REM
+#include <VBox/vmm/rem.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/vmm/dbgf.h>
+#include <VBox/dbg.h>
+#include <VBox/vmm/csam.h>
+#include <VBox/vmm/mm.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/ssm.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/patm.h>
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/pdmcritsect.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/vm.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <VBox/dis.h>
+
+#include <iprt/alloc.h>
+#include <iprt/assert.h>
+#include <iprt/ldr.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/string.h>
+#include <iprt/stream.h>
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * Parameter descriptor.
+ */
+typedef struct REMPARMDESC
+{
+ /** Parameter flags (REMPARMDESC_FLAGS_*). */
+ uint8_t fFlags;
+ /** The parameter size if REMPARMDESC_FLAGS_SIZE is set. */
+ uint8_t cb;
+ /** Pointer to additional data.
+ * For REMPARMDESC_FLAGS_PFN this is a PREMFNDESC. */
+ void *pvExtra;
+
+} REMPARMDESC, *PREMPARMDESC;
+/** Pointer to a constant parameter descriptor. */
+typedef const REMPARMDESC *PCREMPARMDESC;
+
+/** @name Parameter descriptor flags.
+ * @{ */
+/** The parameter type is a kind of integer which could fit in a register. This includes pointers. */
+#define REMPARMDESC_FLAGS_INT 0
+/** The parameter is a GC pointer. */
+#define REMPARMDESC_FLAGS_GCPTR 1
+/** The parameter is a GC physical address. */
+#define REMPARMDESC_FLAGS_GCPHYS 2
+/** The parameter is a HC physical address. */
+#define REMPARMDESC_FLAGS_HCPHYS 3
+/** The parameter type is a kind of floating point. */
+#define REMPARMDESC_FLAGS_FLOAT 4
+/** The parameter value is a struct. This type takes a size. */
+#define REMPARMDESC_FLAGS_STRUCT 5
+/** The parameter is an elipsis. */
+#define REMPARMDESC_FLAGS_ELLIPSIS 6
+/** The parameter is a va_list. */
+#define REMPARMDESC_FLAGS_VALIST 7
+/** The parameter is a function pointer. pvExtra is a PREMFNDESC. */
+#define REMPARMDESC_FLAGS_PFN 8
+/** The parameter type mask. */
+#define REMPARMDESC_FLAGS_TYPE_MASK 15
+/** The parameter size field is valid. */
+#define REMPARMDESC_FLAGS_SIZE RT_BIT(7)
+/** @} */
+
+/**
+ * Function descriptor.
+ */
+typedef struct REMFNDESC
+{
+ /** The function name. */
+ const char *pszName;
+ /** Exports: Pointer to the function pointer.
+ * Imports: Pointer to the function. */
+ void *pv;
+ /** Array of parameter descriptors. */
+ PCREMPARMDESC paParams;
+ /** The number of parameter descriptors pointed to by paParams. */
+ uint8_t cParams;
+ /** Function flags (REMFNDESC_FLAGS_*). */
+ uint8_t fFlags;
+ /** The size of the return value. */
+ uint8_t cbReturn;
+ /** Pointer to the wrapper code for imports. */
+ void *pvWrapper;
+} REMFNDESC, *PREMFNDESC;
+/** Pointer to a constant function descriptor. */
+typedef const REMFNDESC *PCREMFNDESC;
+
+/** @name Function descriptor flags.
+ * @{ */
+/** The return type is void. */
+#define REMFNDESC_FLAGS_RET_VOID 0
+/** The return type is a kind of integer passed in rax/eax. This includes pointers. */
+#define REMFNDESC_FLAGS_RET_INT 1
+/** The return type is a kind of floating point. */
+#define REMFNDESC_FLAGS_RET_FLOAT 2
+/** The return value is a struct. This type take a size. */
+#define REMFNDESC_FLAGS_RET_STRUCT 3
+/** The return type mask. */
+#define REMFNDESC_FLAGS_RET_TYPE_MASK 7
+/** The argument list contains one or more va_list arguments (i.e. problems). */
+#define REMFNDESC_FLAGS_VALIST RT_BIT(6)
+/** The function has an ellipsis (i.e. a problem). */
+#define REMFNDESC_FLAGS_ELLIPSIS RT_BIT(7)
+/** @} */
+
+/**
+ * Chunk of read-write-executable memory.
+ */
+typedef struct REMEXECMEM
+{
+ /** The number of bytes left. */
+ struct REMEXECMEM *pNext;
+ /** The size of this chunk. */
+ uint32_t cb;
+ /** The offset of the next code block. */
+ uint32_t off;
+#if ARCH_BITS == 32
+ uint32_t padding;
+#endif
+} REMEXECMEM, *PREMEXECMEM;
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#ifndef USE_REM_STUBS
+/** Loader handle of the REM object/DLL. */
+static RTLDRMOD g_ModREM2 = NIL_RTLDRMOD;
+# ifndef VBOX_USE_BITNESS_SELECTOR
+/** Pointer to the memory containing the loaded REM2 object/DLL. */
+static void *g_pvREM2 = NULL;
+/** The size of the memory g_pvREM2 is pointing to. */
+static size_t g_cbREM2 = 0;
+# endif
+# ifdef VBOX_WITHOUT_REM_LDR_CYCLE
+/** Loader handle of the VBoxVMM DLL. */
+static RTLDRMOD g_ModVMM = NIL_RTLDRMOD;
+# endif
+
+/** Linux object export addresses.
+ * These are references from the assembly wrapper code.
+ * @{ */
+static DECLCALLBACKPTR(int, pfnREMR3Init)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3InitFinalize)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3Term)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3Reset)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3Step)(PVM, PVMCPU);
+static DECLCALLBACKPTR(int, pfnREMR3BreakpointSet)(PVM, RTGCUINTPTR);
+static DECLCALLBACKPTR(int, pfnREMR3BreakpointClear)(PVM, RTGCUINTPTR);
+static DECLCALLBACKPTR(int, pfnREMR3EmulateInstruction)(PVM, PVMCPU);
+static DECLCALLBACKPTR(int, pfnREMR3Run)(PVM, PVMCPU);
+static DECLCALLBACKPTR(int, pfnREMR3State)(PVM, PVMCPU);
+static DECLCALLBACKPTR(int, pfnREMR3StateBack)(PVM, PVMCPU);
+static DECLCALLBACKPTR(void, pfnREMR3StateUpdate)(PVM, PVMCPU);
+static DECLCALLBACKPTR(void, pfnREMR3A20Set)(PVM, PVMCPU, bool);
+static DECLCALLBACKPTR(void, pfnREMR3ReplayHandlerNotifications)(PVM pVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRamRegister)(PVM, RTGCPHYS, RTGCPHYS, unsigned);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRamDeregister)(PVM, RTGCPHYS, RTUINT);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRomRegister)(PVM, RTGCPHYS, RTUINT, void *, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalModify)(PVM, PGMPHYSHANDLERKIND, RTGCPHYS, RTGCPHYS, RTGCPHYS, bool, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalRegister)(PVM, PGMPHYSHANDLERKIND, RTGCPHYS, RTGCPHYS, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalDeregister)(PVM, PGMPHYSHANDLERKIND, RTGCPHYS, RTGCPHYS, bool, bool);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyInterruptSet)(PVM, PVMCPU);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyInterruptClear)(PVM, PVMCPU);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyTimerPending)(PVM, PVMCPU);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyDmaPending)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyQueuePending)(PVM);
+static DECLCALLBACKPTR(void, pfnREMR3NotifyFF)(PVM);
+static DECLCALLBACKPTR(int, pfnREMR3NotifyCodePageChanged)(PVM, PVMCPU, RTGCPTR);
+static DECLCALLBACKPTR(int, pfnREMR3DisasEnableStepping)(PVM, bool);
+static DECLCALLBACKPTR(bool, pfnREMR3IsPageAccessHandled)(PVM, RTGCPHYS);
+/** @} */
+
+/** Export and import parameter descriptors.
+ * @{
+ */
+/* Common args. */
+static const REMPARMDESC g_aArgsSIZE_T[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsPTR[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }
+};
+static const REMPARMDESC g_aArgsSIZE_TTag[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsPTRTag[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsPTR_SIZE_T[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsSIZE_TTagLoc[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsPTRLoc[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsVM[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }
+};
+static const REMPARMDESC g_aArgsVMCPU[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }
+};
+
+static const REMPARMDESC g_aArgsVMandVMCPU[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }
+};
+
+/* REM args */
+static const REMPARMDESC g_aArgsBreakpoint[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINTPTR), NULL }
+};
+static const REMPARMDESC g_aArgsA20Set[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysRamRegister[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysRamChunkRegister[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTHCUINTPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysRamDeregister[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPhysRomRegister[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyHandlerPhysicalModify[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMPHYSHANDLERKIND), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyHandlerPhysicalRegister[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMPHYSHANDLERKIND), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyHandlerPhysicalDeregister[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMPHYSHANDLERKIND), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyCodePageChanged[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINTPTR), NULL }
+};
+static const REMPARMDESC g_aArgsNotifyPendingInterrupt[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL }
+};
+static const REMPARMDESC g_aArgsDisasEnableStepping[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsIsPageAccessHandled[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }
+};
+
+# ifndef VBOX_USE_BITNESS_SELECTOR
+
+/* VMM args */
+static const REMPARMDESC g_aArgsAPICUpdatePendingInterrupts[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }
+};
+static const REMPARMDESC g_aArgsAPICGetTpr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL }
+};
+static const REMPARMDESC g_aArgsAPICSetTpr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL }
+};
+static const REMPARMDESC g_aArgsCPUMGetGuestCpl[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+};
+
+/* CPUMQueryGuestMsr args */
+static const REMPARMDESC g_aArgsCPUMQueryGuestMsr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t *), NULL },
+};
+
+/* CPUMSetGuestMsr args */
+static const REMPARMDESC g_aArgsCPUMSetGuestMsr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL },
+};
+
+static const REMPARMDESC g_aArgsCPUMGetGuestCpuId[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }
+};
+
+static const REMPARMDESC g_aArgsCPUMR3RemEnter[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }
+};
+
+static const REMPARMDESC g_aArgsCPUMR3RemLeave[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+
+static const REMPARMDESC g_aArgsCPUMSetChangedFlags[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+
+static const REMPARMDESC g_aArgsCPUMQueryGuestCtxPtr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }
+};
+static const REMPARMDESC g_aArgsCSAMR3MonitorPage[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(CSAMTAG), NULL }
+};
+static const REMPARMDESC g_aArgsCSAMR3UnmonitorPage[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(CSAMTAG), NULL }
+};
+
+static const REMPARMDESC g_aArgsCSAMR3RecordCallAddress[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL }
+};
+
+# if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)) /* the callbacks are problematic */
+static const REMPARMDESC g_aArgsDBGCRegisterCommands[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PCDBGCCMD), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }
+};
+# endif
+static const REMPARMDESC g_aArgsDBGFR3DisasInstrEx[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(VMCPUID), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTSEL), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTGCPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }
+};
+static const REMPARMDESC g_aArgsDBGFR3DisasInstrCurrentLogInternal[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL }
+};
+static const REMPARMDESC g_aArgsDBGFR3Info[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PCDBGFINFOHLP), NULL }
+};
+static const REMPARMDESC g_aArgsDBGFR3AsSymbolByAddr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTDBGAS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PCDBGFADDRESS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(PRTGCINTPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTDBGSYMBOL), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTDBGMOD), NULL }
+};
+static const REMPARMDESC g_aArgsDBGFR3AddrFromFlat[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PDBGFADDRESS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTGCUINTPTR), NULL }
+};
+static const REMPARMDESC g_aArgsDISInstrToStr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t const *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(DISCPUMODE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PDISCPUSTATE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsEMR3FatalError[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(int), NULL }
+};
+static const REMPARMDESC g_aArgsEMSetInhibitInterruptsPC[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTGCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsHMCanExecuteGuest[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PCPUMCTX), NULL },
+};
+static const REMPARMDESC g_aArgsIOMIOPortRead[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTIOPORT), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsIOMIOPortWrite[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTIOPORT), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsIOMMMIORead[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsIOMMMIOWrite[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsMMR3HeapAlloc[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(MMTAG), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsMMR3HeapAllocZ[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(MMTAG), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsPATMIsPatchGCAddr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTRCUINTPTR), NULL }
+};
+static const REMPARMDESC g_aArgsPATMR3QueryOpcode[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPDMGetInterrupt[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL }
+};
+static const REMPARMDESC g_aArgsPDMIsaSetIrq[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsPDMR3CritSectInit[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PPDMCRITSECT), NULL },
+ /* RT_SRC_POS_DECL */
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL },
+ { REMPARMDESC_FLAGS_ELLIPSIS, 0 }
+};
+static const REMPARMDESC g_aArgsPDMCritSectEnter[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PPDMCRITSECT), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(int), NULL }
+};
+static const REMPARMDESC g_aArgsPDMCritSectEnterDebug[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PPDMCRITSECT), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(int), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTHCUINTPTR), NULL },
+ /* RT_SRC_POS_DECL */
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsPGMGetGuestMode[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+};
+static const REMPARMDESC g_aArgsPGMGstGetPage[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTGCPHYS), NULL }
+};
+static const REMPARMDESC g_aArgsPGMInvalidatePage[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysTlbGCPhys2Ptr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }
+};
+static const REMPARMDESC g_aArgsPGM3PhysGrowRange[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PCRTGCPHYS), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysIsGCPhysValid[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysRead[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysSimpleReadGCPtr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMPhysWrite[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL }
+};
+static const REMPARMDESC g_aArgsPGMChangeMode[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL }
+};
+static const REMPARMDESC g_aArgsPGMFlushTLB[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysReadUxx[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU8[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU16[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint16_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU32[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL }
+};
+static const REMPARMDESC g_aArgsPGMR3PhysWriteU64[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL }
+};
+static const REMPARMDESC g_aArgsRTMemReallocTag[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void*), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsRTMemEfRealloc[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void*), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetGCPtr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTGCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetMem[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetU32[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3GetUInt[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTUINT), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3PutGCPtr[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3PutMem[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsSSMR3PutU32[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+};
+static const REMPARMDESC g_aArgsSSMR3PutUInt[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL },
+};
+
+static const REMPARMDESC g_aArgsSSMIntLiveExecCallback[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+};
+static REMFNDESC g_SSMIntLiveExecCallback =
+{
+ "SSMIntLiveExecCallback", NULL, &g_aArgsSSMIntLiveExecCallback[0], RT_ELEMENTS(g_aArgsSSMIntLiveExecCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL
+};
+
+static const REMPARMDESC g_aArgsSSMIntLiveVoteCallback[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+};
+static REMFNDESC g_SSMIntLiveVoteCallback =
+{
+ "SSMIntLiveVoteCallback", NULL, &g_aArgsSSMIntLiveVoteCallback[0], RT_ELEMENTS(g_aArgsSSMIntLiveVoteCallback), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL
+};
+
+static const REMPARMDESC g_aArgsSSMIntCallback[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+};
+static REMFNDESC g_SSMIntCallback =
+{
+ "SSMIntCallback", NULL, &g_aArgsSSMIntCallback[0], RT_ELEMENTS(g_aArgsSSMIntCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL
+};
+
+static const REMPARMDESC g_aArgsSSMIntLoadExecCallback[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+};
+static REMFNDESC g_SSMIntLoadExecCallback =
+{
+ "SSMIntLoadExecCallback", NULL, &g_aArgsSSMIntLoadExecCallback[0], RT_ELEMENTS(g_aArgsSSMIntLoadExecCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL
+};
+/* Note: don't forget about the handwritten assembly wrapper when changing this! */
+static const REMPARMDESC g_aArgsSSMR3RegisterInternal[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLIVEPREP), &g_SSMIntCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLIVEEXEC), &g_SSMIntLiveExecCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLIVEVOTE), &g_SSMIntLiveVoteCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTSAVEPREP), &g_SSMIntCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTSAVEEXEC), &g_SSMIntCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTSAVEDONE), &g_SSMIntCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLOADPREP), &g_SSMIntCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLOADEXEC), &g_SSMIntLoadExecCallback },
+ { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLOADDONE), &g_SSMIntCallback },
+};
+
+static const REMPARMDESC g_aArgsSTAMR3Register[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(STAMTYPE), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(STAMVISIBILITY), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(STAMUNIT), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsSTAMR3Deregister[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+};
+static const REMPARMDESC g_aArgsTRPMAssertTrap[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(TRPMEVENT), NULL }
+};
+static const REMPARMDESC g_aArgsTRPMQueryTrap[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(TRPMEVENT *), NULL }
+};
+static const REMPARMDESC g_aArgsTRPMSetErrorCode[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINT), NULL }
+};
+static const REMPARMDESC g_aArgsTRPMSetFaultAddress[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL },
+ { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINT), NULL }
+};
+static const REMPARMDESC g_aArgsVMR3ReqCallWait[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(VMCPUID), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL }
+};
+static const REMPARMDESC g_aArgsVMR3ReqFree[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PVMREQ), NULL }
+};
+
+/* IPRT args */
+static const REMPARMDESC g_aArgsRTAssertMsg1[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsRTAssertMsg2[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL }
+};
+static const REMPARMDESC g_aArgsRTAssertMsg2V[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_VALIST, 0, NULL }
+};
+static const REMPARMDESC g_aArgsRTLogGetDefaultInstanceEx[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }
+};
+static const REMPARMDESC g_aArgsRTLogFlags[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }
+};
+static const REMPARMDESC g_aArgsRTLogFlush[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL }
+};
+static const REMPARMDESC g_aArgsRTLogLoggerEx[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL }
+};
+static const REMPARMDESC g_aArgsRTLogLoggerExV[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_VALIST, 0, NULL }
+};
+static const REMPARMDESC g_aArgsRTLogPrintf[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL }
+};
+static const REMPARMDESC g_aArgsRTMemProtect[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }
+};
+static const REMPARMDESC g_aArgsRTStrPrintf[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL }
+};
+static const REMPARMDESC g_aArgsRTStrPrintfV[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL },
+ { REMPARMDESC_FLAGS_VALIST, 0, NULL }
+};
+static const REMPARMDESC g_aArgsThread[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(RTTHREAD), NULL }
+};
+
+
+/* CRT args */
+static const REMPARMDESC g_aArgsmemcpy[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(const void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+static const REMPARMDESC g_aArgsmemset[] =
+{
+ { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(int), NULL },
+ { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }
+};
+
+# endif /* !VBOX_USE_BITNESS_SELECTOR */
+
+/** @} */
+
+/**
+ * Descriptors for the exported functions.
+ */
+static const REMFNDESC g_aExports[] =
+{ /* pszName, (void *)pv, pParams, cParams, fFlags, cb, pvWrapper. */
+ { "REMR3Init", (void *)&pfnREMR3Init, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3InitFinalize", (void *)&pfnREMR3InitFinalize, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3Term", (void *)&pfnREMR3Term, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3Reset", (void *)&pfnREMR3Reset, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3Step", (void *)&pfnREMR3Step, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3BreakpointSet", (void *)&pfnREMR3BreakpointSet, &g_aArgsBreakpoint[0], RT_ELEMENTS(g_aArgsBreakpoint), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3BreakpointClear", (void *)&pfnREMR3BreakpointClear, &g_aArgsBreakpoint[0], RT_ELEMENTS(g_aArgsBreakpoint), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3EmulateInstruction", (void *)&pfnREMR3EmulateInstruction, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3Run", (void *)&pfnREMR3Run, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3State", (void *)&pfnREMR3State, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3StateBack", (void *)&pfnREMR3StateBack, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3StateUpdate", (void *)&pfnREMR3StateUpdate, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3A20Set", (void *)&pfnREMR3A20Set, &g_aArgsA20Set[0], RT_ELEMENTS(g_aArgsA20Set), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3ReplayHandlerNotifications", (void *)&pfnREMR3ReplayHandlerNotifications, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyPhysRamRegister", (void *)&pfnREMR3NotifyPhysRamRegister, &g_aArgsNotifyPhysRamRegister[0], RT_ELEMENTS(g_aArgsNotifyPhysRamRegister), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyPhysRamDeregister", (void *)&pfnREMR3NotifyPhysRamDeregister, &g_aArgsNotifyPhysRamDeregister[0], RT_ELEMENTS(g_aArgsNotifyPhysRamDeregister), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyPhysRomRegister", (void *)&pfnREMR3NotifyPhysRomRegister, &g_aArgsNotifyPhysRomRegister[0], RT_ELEMENTS(g_aArgsNotifyPhysRomRegister), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyHandlerPhysicalModify", (void *)&pfnREMR3NotifyHandlerPhysicalModify, &g_aArgsNotifyHandlerPhysicalModify[0], RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalModify), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyHandlerPhysicalRegister", (void *)&pfnREMR3NotifyHandlerPhysicalRegister, &g_aArgsNotifyHandlerPhysicalRegister[0], RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalRegister), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyHandlerPhysicalDeregister", (void *)&pfnREMR3NotifyHandlerPhysicalDeregister, &g_aArgsNotifyHandlerPhysicalDeregister[0], RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalDeregister), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyInterruptSet", (void *)&pfnREMR3NotifyInterruptSet, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyInterruptClear", (void *)&pfnREMR3NotifyInterruptClear, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyTimerPending", (void *)&pfnREMR3NotifyTimerPending, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyDmaPending", (void *)&pfnREMR3NotifyDmaPending, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyQueuePending", (void *)&pfnREMR3NotifyQueuePending, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyFF", (void *)&pfnREMR3NotifyFF, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "REMR3NotifyCodePageChanged", (void *)&pfnREMR3NotifyCodePageChanged, &g_aArgsNotifyCodePageChanged[0], RT_ELEMENTS(g_aArgsNotifyCodePageChanged), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3DisasEnableStepping", (void *)&pfnREMR3DisasEnableStepping, &g_aArgsDisasEnableStepping[0], RT_ELEMENTS(g_aArgsDisasEnableStepping), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "REMR3IsPageAccessHandled", (void *)&pfnREMR3IsPageAccessHandled, &g_aArgsIsPageAccessHandled[0], RT_ELEMENTS(g_aArgsIsPageAccessHandled), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }
+};
+
+# ifndef VBOX_USE_BITNESS_SELECTOR
+
+# ifdef VBOX_WITHOUT_REM_LDR_CYCLE
+# define VMM_FN(name) NULL
+# else
+# define VMM_FN(name) (void *)(uintptr_t)& name
+# endif
+
+/**
+ * Descriptors for the functions imported from VBoxVMM.
+ */
+static REMFNDESC g_aVMMImports[] =
+{
+ { "APICUpdatePendingInterrupts", VMM_FN(APICUpdatePendingInterrupts), &g_aArgsAPICUpdatePendingInterrupts[0], RT_ELEMENTS(g_aArgsAPICUpdatePendingInterrupts), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "APICGetTpr", VMM_FN(APICGetTpr), &g_aArgsAPICGetTpr[0], RT_ELEMENTS(g_aArgsAPICGetTpr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "APICSetTpr", VMM_FN(APICSetTpr), &g_aArgsAPICSetTpr[0], RT_ELEMENTS(g_aArgsAPICSetTpr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "CPUMR3RemEnter", VMM_FN(CPUMR3RemEnter), &g_aArgsCPUMR3RemEnter[0], RT_ELEMENTS(g_aArgsCPUMR3RemEnter), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMR3RemLeave", VMM_FN(CPUMR3RemLeave), &g_aArgsCPUMR3RemLeave[0], RT_ELEMENTS(g_aArgsCPUMR3RemLeave), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "CPUMSetChangedFlags", VMM_FN(CPUMSetChangedFlags), &g_aArgsCPUMSetChangedFlags[0], RT_ELEMENTS(g_aArgsCPUMSetChangedFlags), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "CPUMGetGuestCPL", VMM_FN(CPUMGetGuestCPL), &g_aArgsCPUMGetGuestCpl[0], RT_ELEMENTS(g_aArgsCPUMGetGuestCpl), REMFNDESC_FLAGS_RET_INT, sizeof(unsigned), NULL },
+ { "CPUMQueryGuestMsr", VMM_FN(CPUMQueryGuestMsr), &g_aArgsCPUMQueryGuestMsr[0], RT_ELEMENTS(g_aArgsCPUMQueryGuestMsr), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL },
+ { "CPUMSetGuestMsr", VMM_FN(CPUMSetGuestMsr), &g_aArgsCPUMSetGuestMsr[0], RT_ELEMENTS(g_aArgsCPUMSetGuestMsr), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "CPUMGetGuestCpuId", VMM_FN(CPUMGetGuestCpuId), &g_aArgsCPUMGetGuestCpuId[0], RT_ELEMENTS(g_aArgsCPUMGetGuestCpuId), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "CPUMGetGuestEAX", VMM_FN(CPUMGetGuestEAX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestEBP", VMM_FN(CPUMGetGuestEBP), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestEBX", VMM_FN(CPUMGetGuestEBX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestECX", VMM_FN(CPUMGetGuestECX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestEDI", VMM_FN(CPUMGetGuestEDI), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestEDX", VMM_FN(CPUMGetGuestEDX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestEIP", VMM_FN(CPUMGetGuestEIP), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestESI", VMM_FN(CPUMGetGuestESI), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestESP", VMM_FN(CPUMGetGuestESP), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "CPUMGetGuestCS", VMM_FN(CPUMGetGuestCS), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTSEL), NULL },
+ { "CPUMGetGuestSS", VMM_FN(CPUMGetGuestSS), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTSEL), NULL },
+ { "CPUMGetGuestCpuVendor", VMM_FN(CPUMGetGuestCpuVendor), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(CPUMCPUVENDOR), NULL },
+ { "CPUMQueryGuestCtxPtr", VMM_FN(CPUMQueryGuestCtxPtr), &g_aArgsCPUMQueryGuestCtxPtr[0], RT_ELEMENTS(g_aArgsCPUMQueryGuestCtxPtr), REMFNDESC_FLAGS_RET_INT, sizeof(PCPUMCTX), NULL },
+ { "CSAMR3MonitorPage", VMM_FN(CSAMR3MonitorPage), &g_aArgsCSAMR3MonitorPage[0], RT_ELEMENTS(g_aArgsCSAMR3MonitorPage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "CSAMR3UnmonitorPage", VMM_FN(CSAMR3UnmonitorPage), &g_aArgsCSAMR3UnmonitorPage[0], RT_ELEMENTS(g_aArgsCSAMR3UnmonitorPage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "CSAMR3RecordCallAddress", VMM_FN(CSAMR3RecordCallAddress), &g_aArgsCSAMR3RecordCallAddress[0], RT_ELEMENTS(g_aArgsCSAMR3RecordCallAddress), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+# if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)) /* the callbacks are problematic */
+ { "DBGCRegisterCommands", VMM_FN(DBGCRegisterCommands), &g_aArgsDBGCRegisterCommands[0], RT_ELEMENTS(g_aArgsDBGCRegisterCommands), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+# endif
+ { "DBGFR3DisasInstrEx", VMM_FN(DBGFR3DisasInstrEx), &g_aArgsDBGFR3DisasInstrEx[0], RT_ELEMENTS(g_aArgsDBGFR3DisasInstrEx), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "DBGFR3DisasInstrCurrentLogInternal", VMM_FN(DBGFR3DisasInstrCurrentLogInternal), &g_aArgsDBGFR3DisasInstrCurrentLogInternal[0], RT_ELEMENTS(g_aArgsDBGFR3DisasInstrCurrentLogInternal),REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "DBGFR3Info", VMM_FN(DBGFR3Info), &g_aArgsDBGFR3Info[0], RT_ELEMENTS(g_aArgsDBGFR3Info), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "DBGFR3InfoLogRelHlp", VMM_FN(DBGFR3InfoLogRelHlp), NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "DBGFR3AsSymbolByAddr", VMM_FN(DBGFR3AsSymbolByAddr), &g_aArgsDBGFR3AsSymbolByAddr[0], RT_ELEMENTS(g_aArgsDBGFR3AsSymbolByAddr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "DBGFR3AddrFromFlat", VMM_FN(DBGFR3AddrFromFlat), &g_aArgsDBGFR3AddrFromFlat[0], RT_ELEMENTS(g_aArgsDBGFR3AddrFromFlat), REMFNDESC_FLAGS_RET_INT, sizeof(PDBGFADDRESS), NULL },
+ { "DISInstrToStr", VMM_FN(DISInstrToStr), &g_aArgsDISInstrToStr[0], RT_ELEMENTS(g_aArgsDISInstrToStr), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "EMR3FatalError", VMM_FN(EMR3FatalError), &g_aArgsEMR3FatalError[0], RT_ELEMENTS(g_aArgsEMR3FatalError), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "EMRemLock", VMM_FN(EMRemLock), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "EMRemUnlock", VMM_FN(EMRemUnlock), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "EMRemIsLockOwner", VMM_FN(EMRemIsLockOwner), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, sizeof(bool), NULL },
+ { "EMGetInhibitInterruptsPC", VMM_FN(EMGetInhibitInterruptsPC), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTGCPTR), NULL },
+ { "EMSetInhibitInterruptsPC", VMM_FN(EMSetInhibitInterruptsPC), &g_aArgsEMSetInhibitInterruptsPC[0], RT_ELEMENTS(g_aArgsEMSetInhibitInterruptsPC), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "HMIsEnabledNotMacro", VMM_FN(HMIsEnabledNotMacro), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "HMCanExecuteGuest", VMM_FN(HMCanExecuteGuest), &g_aArgsHMCanExecuteGuest[0], RT_ELEMENTS(g_aArgsHMCanExecuteGuest), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "IOMIOPortRead", VMM_FN(IOMIOPortRead), &g_aArgsIOMIOPortRead[0], RT_ELEMENTS(g_aArgsIOMIOPortRead), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "IOMIOPortWrite", VMM_FN(IOMIOPortWrite), &g_aArgsIOMIOPortWrite[0], RT_ELEMENTS(g_aArgsIOMIOPortWrite), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "IOMMMIORead", VMM_FN(IOMMMIORead), &g_aArgsIOMMMIORead[0], RT_ELEMENTS(g_aArgsIOMMMIORead), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "IOMMMIOWrite", VMM_FN(IOMMMIOWrite), &g_aArgsIOMMMIOWrite[0], RT_ELEMENTS(g_aArgsIOMMMIOWrite), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "MMR3HeapAlloc", VMM_FN(MMR3HeapAlloc), &g_aArgsMMR3HeapAlloc[0], RT_ELEMENTS(g_aArgsMMR3HeapAlloc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "MMR3HeapAllocZ", VMM_FN(MMR3HeapAllocZ), &g_aArgsMMR3HeapAllocZ[0], RT_ELEMENTS(g_aArgsMMR3HeapAllocZ), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "MMR3PhysGetRamSize", VMM_FN(MMR3PhysGetRamSize), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL },
+ { "PATMIsPatchGCAddr", VMM_FN(PATMIsPatchGCAddr), &g_aArgsPATMIsPatchGCAddr[0], RT_ELEMENTS(g_aArgsPATMIsPatchGCAddr), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "PATMR3QueryOpcode", VMM_FN(PATMR3QueryOpcode), &g_aArgsPATMR3QueryOpcode[0], RT_ELEMENTS(g_aArgsPATMR3QueryOpcode), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PDMR3DmaRun", VMM_FN(PDMR3DmaRun), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "PDMR3CritSectInit", VMM_FN(PDMR3CritSectInit), &g_aArgsPDMR3CritSectInit[0], RT_ELEMENTS(g_aArgsPDMR3CritSectInit), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PDMCritSectEnter", VMM_FN(PDMCritSectEnter), &g_aArgsPDMCritSectEnter[0], RT_ELEMENTS(g_aArgsPDMCritSectEnter), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PDMCritSectLeave", VMM_FN(PDMCritSectLeave), &g_aArgsPTR[0], RT_ELEMENTS(g_aArgsPTR), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+# ifdef VBOX_STRICT
+ { "PDMCritSectEnterDebug", VMM_FN(PDMCritSectEnterDebug), &g_aArgsPDMCritSectEnterDebug[0], RT_ELEMENTS(g_aArgsPDMCritSectEnterDebug), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+# endif
+ { "PDMGetInterrupt", VMM_FN(PDMGetInterrupt), &g_aArgsPDMGetInterrupt[0], RT_ELEMENTS(g_aArgsPDMGetInterrupt), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PDMIsaSetIrq", VMM_FN(PDMIsaSetIrq), &g_aArgsPDMIsaSetIrq[0], RT_ELEMENTS(g_aArgsPDMIsaSetIrq), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMGetGuestMode", VMM_FN(PGMGetGuestMode), &g_aArgsPGMGetGuestMode[0], RT_ELEMENTS(g_aArgsPGMGetGuestMode), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMGstGetPage", VMM_FN(PGMGstGetPage), &g_aArgsPGMGstGetPage[0], RT_ELEMENTS(g_aArgsPGMGstGetPage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMInvalidatePage", VMM_FN(PGMInvalidatePage), &g_aArgsPGMInvalidatePage[0], RT_ELEMENTS(g_aArgsPGMInvalidatePage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMPhysIsGCPhysValid", VMM_FN(PGMPhysIsGCPhysValid), &g_aArgsPGMPhysIsGCPhysValid[0], RT_ELEMENTS(g_aArgsPGMPhysIsGCPhysValid), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "PGMPhysIsA20Enabled", VMM_FN(PGMPhysIsA20Enabled), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "PGMPhysRead", VMM_FN(PGMPhysRead), &g_aArgsPGMPhysRead[0], RT_ELEMENTS(g_aArgsPGMPhysRead), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMPhysSimpleReadGCPtr", VMM_FN(PGMPhysSimpleReadGCPtr), &g_aArgsPGMPhysSimpleReadGCPtr[0], RT_ELEMENTS(g_aArgsPGMPhysSimpleReadGCPtr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMPhysWrite", VMM_FN(PGMPhysWrite), &g_aArgsPGMPhysWrite[0], RT_ELEMENTS(g_aArgsPGMPhysWrite), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "PGMChangeMode", VMM_FN(PGMChangeMode), &g_aArgsPGMChangeMode[0], RT_ELEMENTS(g_aArgsPGMChangeMode), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMFlushTLB", VMM_FN(PGMFlushTLB), &g_aArgsPGMFlushTLB[0], RT_ELEMENTS(g_aArgsPGMFlushTLB), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMCr0WpEnabled", VMM_FN(PGMCr0WpEnabled), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "PGMR3PhysReadU8", VMM_FN(PGMR3PhysReadU8), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint8_t), NULL },
+ { "PGMR3PhysReadU16", VMM_FN(PGMR3PhysReadU16), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint16_t), NULL },
+ { "PGMR3PhysReadU32", VMM_FN(PGMR3PhysReadU32), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL },
+ { "PGMR3PhysReadU64", VMM_FN(PGMR3PhysReadU64), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL },
+ { "PGMR3PhysWriteU8", VMM_FN(PGMR3PhysWriteU8), &g_aArgsPGMR3PhysWriteU8[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU8), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "PGMR3PhysWriteU16", VMM_FN(PGMR3PhysWriteU16), &g_aArgsPGMR3PhysWriteU16[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU16), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "PGMR3PhysWriteU32", VMM_FN(PGMR3PhysWriteU32), &g_aArgsPGMR3PhysWriteU32[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU32), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "PGMR3PhysWriteU64", VMM_FN(PGMR3PhysWriteU64), &g_aArgsPGMR3PhysWriteU64[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU32), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "PGMR3PhysTlbGCPhys2Ptr", VMM_FN(PGMR3PhysTlbGCPhys2Ptr), &g_aArgsPGMR3PhysTlbGCPhys2Ptr[0], RT_ELEMENTS(g_aArgsPGMR3PhysTlbGCPhys2Ptr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "PGMIsLockOwner", VMM_FN(PGMIsLockOwner), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "SSMR3GetGCPtr", VMM_FN(SSMR3GetGCPtr), &g_aArgsSSMR3GetGCPtr[0], RT_ELEMENTS(g_aArgsSSMR3GetGCPtr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3GetMem", VMM_FN(SSMR3GetMem), &g_aArgsSSMR3GetMem[0], RT_ELEMENTS(g_aArgsSSMR3GetMem), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3GetU32", VMM_FN(SSMR3GetU32), &g_aArgsSSMR3GetU32[0], RT_ELEMENTS(g_aArgsSSMR3GetU32), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3GetUInt", VMM_FN(SSMR3GetUInt), &g_aArgsSSMR3GetUInt[0], RT_ELEMENTS(g_aArgsSSMR3GetUInt), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3PutGCPtr", VMM_FN(SSMR3PutGCPtr), &g_aArgsSSMR3PutGCPtr[0], RT_ELEMENTS(g_aArgsSSMR3PutGCPtr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3PutMem", VMM_FN(SSMR3PutMem), &g_aArgsSSMR3PutMem[0], RT_ELEMENTS(g_aArgsSSMR3PutMem), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3PutU32", VMM_FN(SSMR3PutU32), &g_aArgsSSMR3PutU32[0], RT_ELEMENTS(g_aArgsSSMR3PutU32), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3PutUInt", VMM_FN(SSMR3PutUInt), &g_aArgsSSMR3PutUInt[0], RT_ELEMENTS(g_aArgsSSMR3PutUInt), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "SSMR3RegisterInternal", VMM_FN(SSMR3RegisterInternal), &g_aArgsSSMR3RegisterInternal[0], RT_ELEMENTS(g_aArgsSSMR3RegisterInternal), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "STAMR3Register", VMM_FN(STAMR3Register), &g_aArgsSTAMR3Register[0], RT_ELEMENTS(g_aArgsSTAMR3Register), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "STAMR3Deregister", VMM_FN(STAMR3Deregister), &g_aArgsSTAMR3Deregister[0], RT_ELEMENTS(g_aArgsSTAMR3Deregister), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "TMCpuTickGet", VMM_FN(TMCpuTickGet), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL },
+ { "TMR3NotifySuspend", VMM_FN(TMR3NotifySuspend), &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "TMR3NotifyResume", VMM_FN(TMR3NotifyResume), &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "TMNotifyEndOfExecution", VMM_FN(TMNotifyEndOfExecution), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "TMNotifyStartOfExecution", VMM_FN(TMNotifyStartOfExecution), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "TMTimerPollBool", VMM_FN(TMTimerPollBool), &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "TMR3TimerQueuesDo", VMM_FN(TMR3TimerQueuesDo), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "TRPMAssertTrap", VMM_FN(TRPMAssertTrap), &g_aArgsTRPMAssertTrap[0], RT_ELEMENTS(g_aArgsTRPMAssertTrap), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "TRPMGetErrorCode", VMM_FN(TRPMGetErrorCode), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTGCUINT), NULL },
+ { "TRPMGetFaultAddress", VMM_FN(TRPMGetFaultAddress), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTGCUINTPTR),NULL },
+ { "TRPMQueryTrap", VMM_FN(TRPMQueryTrap), &g_aArgsTRPMQueryTrap[0], RT_ELEMENTS(g_aArgsTRPMQueryTrap), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "TRPMResetTrap", VMM_FN(TRPMResetTrap), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "TRPMSetErrorCode", VMM_FN(TRPMSetErrorCode), &g_aArgsTRPMSetErrorCode[0], RT_ELEMENTS(g_aArgsTRPMSetErrorCode), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "TRPMSetFaultAddress", VMM_FN(TRPMSetFaultAddress), &g_aArgsTRPMSetFaultAddress[0], RT_ELEMENTS(g_aArgsTRPMSetFaultAddress), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "VMMGetCpu", VMM_FN(VMMGetCpu), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(PVMCPU), NULL },
+ { "VMR3ReqPriorityCallWait", VMM_FN(VMR3ReqPriorityCallWait), &g_aArgsVMR3ReqCallWait[0], RT_ELEMENTS(g_aArgsVMR3ReqCallWait), REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_ELLIPSIS, sizeof(int), NULL },
+ { "VMR3ReqFree", VMM_FN(VMR3ReqFree), &g_aArgsVMR3ReqFree[0], RT_ELEMENTS(g_aArgsVMR3ReqFree), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+// { "", VMM_FN(), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+};
+
+
+/**
+ * Descriptors for the functions imported from VBoxRT.
+ */
+static REMFNDESC g_aRTImports[] =
+{
+ { "RTAssertMsg1", (void *)(uintptr_t)&RTAssertMsg1, &g_aArgsRTAssertMsg1[0], RT_ELEMENTS(g_aArgsRTAssertMsg1), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTAssertMsg1Weak", (void *)(uintptr_t)&RTAssertMsg1Weak, &g_aArgsRTAssertMsg1[0], RT_ELEMENTS(g_aArgsRTAssertMsg1), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTAssertMsg2", (void *)(uintptr_t)&RTAssertMsg2, &g_aArgsRTAssertMsg2[0], RT_ELEMENTS(g_aArgsRTAssertMsg2), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL },
+ { "RTAssertMsg2V", (void *)(uintptr_t)&RTAssertMsg2V, &g_aArgsRTAssertMsg2V[0], RT_ELEMENTS(g_aArgsRTAssertMsg2V), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_VALIST, 0, NULL },
+ { "RTAssertMsg2Weak", (void *)(uintptr_t)&RTAssertMsg2Weak, &g_aArgsRTAssertMsg2[0], RT_ELEMENTS(g_aArgsRTAssertMsg2), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL },
+ { "RTAssertShouldPanic", (void *)(uintptr_t)&RTAssertShouldPanic, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL },
+ { "RTLogDefaultInstance", (void *)(uintptr_t)&RTLogDefaultInstance, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL },
+ { "RTLogRelGetDefaultInstance", (void *)(uintptr_t)&RTLogRelGetDefaultInstance, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL },
+ { "RTLogDefaultInstanceEx", (void *)(uintptr_t)&RTLogDefaultInstance, &g_aArgsRTLogGetDefaultInstanceEx[0], RT_ELEMENTS(g_aArgsRTLogGetDefaultInstanceEx), REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL },
+ { "RTLogRelGetDefaultInstanceEx", (void *)(uintptr_t)&RTLogRelGetDefaultInstance, &g_aArgsRTLogGetDefaultInstanceEx[0], RT_ELEMENTS(g_aArgsRTLogGetDefaultInstanceEx), REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL },
+ { "RTLogFlags", (void *)(uintptr_t)&RTLogFlags, &g_aArgsRTLogFlags[0], RT_ELEMENTS(g_aArgsRTLogFlags), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "RTLogFlush", (void *)(uintptr_t)&RTLogFlush, &g_aArgsRTLogFlush[0], RT_ELEMENTS(g_aArgsRTLogFlush), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "RTLogLoggerEx", (void *)(uintptr_t)&RTLogLoggerEx, &g_aArgsRTLogLoggerEx[0], RT_ELEMENTS(g_aArgsRTLogLoggerEx), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL },
+ { "RTLogLoggerExV", (void *)(uintptr_t)&RTLogLoggerExV, &g_aArgsRTLogLoggerExV[0], RT_ELEMENTS(g_aArgsRTLogLoggerExV), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_VALIST, 0, NULL },
+ { "RTLogPrintf", (void *)(uintptr_t)&RTLogPrintf, &g_aArgsRTLogPrintf[0], RT_ELEMENTS(g_aArgsRTLogPrintf), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTLogRelPrintf", (void *)(uintptr_t)&RTLogRelPrintf, &g_aArgsRTLogPrintf[0], RT_ELEMENTS(g_aArgsRTLogPrintf), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTMemAllocTag", (void *)(uintptr_t)&RTMemAllocTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemAllocZTag", (void *)(uintptr_t)&RTMemAllocZTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemReallocTag", (void *)(uintptr_t)&RTMemReallocTag, &g_aArgsRTMemReallocTag[0], RT_ELEMENTS(g_aArgsRTMemReallocTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemExecAllocTag", (void *)(uintptr_t)&RTMemExecAllocTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemExecFree", (void *)(uintptr_t)&RTMemExecFree, &g_aArgsPTR_SIZE_T[0], RT_ELEMENTS(g_aArgsPTR_SIZE_T), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTMemFree", (void *)(uintptr_t)&RTMemFree, &g_aArgsPTR[0], RT_ELEMENTS(g_aArgsPTR), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTMemPageAllocTag", (void *)(uintptr_t)&RTMemPageAllocTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemPageFree", (void *)(uintptr_t)&RTMemPageFree, &g_aArgsPTR_SIZE_T[0], RT_ELEMENTS(g_aArgsPTR_SIZE_T), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTMemProtect", (void *)(uintptr_t)&RTMemProtect, &g_aArgsRTMemProtect[0], RT_ELEMENTS(g_aArgsRTMemProtect), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL },
+ { "RTMemEfAlloc", (void *)(uintptr_t)&RTMemEfAlloc, &g_aArgsSIZE_TTagLoc[0], RT_ELEMENTS(g_aArgsSIZE_TTagLoc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemEfAllocZ", (void *)(uintptr_t)&RTMemEfAllocZ, &g_aArgsSIZE_TTagLoc[0], RT_ELEMENTS(g_aArgsSIZE_TTagLoc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemEfRealloc", (void *)(uintptr_t)&RTMemEfRealloc, &g_aArgsRTMemEfRealloc[0], RT_ELEMENTS(g_aArgsRTMemEfRealloc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "RTMemEfFree", (void *)(uintptr_t)&RTMemEfFree, &g_aArgsPTRLoc[0], RT_ELEMENTS(g_aArgsPTRLoc), REMFNDESC_FLAGS_RET_VOID, 0, NULL },
+ { "RTStrPrintf", (void *)(uintptr_t)&RTStrPrintf, &g_aArgsRTStrPrintf[0], RT_ELEMENTS(g_aArgsRTStrPrintf), REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_ELLIPSIS, sizeof(size_t), NULL },
+ { "RTStrPrintfV", (void *)(uintptr_t)&RTStrPrintfV, &g_aArgsRTStrPrintfV[0], RT_ELEMENTS(g_aArgsRTStrPrintfV), REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_VALIST, sizeof(size_t), NULL },
+ { "RTThreadSelf", (void *)(uintptr_t)&RTThreadSelf, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(RTTHREAD), NULL },
+ { "RTThreadNativeSelf", (void *)(uintptr_t)&RTThreadNativeSelf, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(RTNATIVETHREAD), NULL },
+ { "RTLockValidatorWriteLockGetCount", (void *)(uintptr_t)&RTLockValidatorWriteLockGetCount, &g_aArgsThread[0], 0, REMFNDESC_FLAGS_RET_INT, sizeof(int32_t), NULL },
+};
+
+
+/**
+ * Descriptors for the functions imported from VBoxRT.
+ */
+static REMFNDESC g_aCRTImports[] =
+{
+ { "memcpy", (void *)(uintptr_t)&memcpy, &g_aArgsmemcpy[0], RT_ELEMENTS(g_aArgsmemcpy), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL },
+ { "memset", (void *)(uintptr_t)&memset, &g_aArgsmemset[0], RT_ELEMENTS(g_aArgsmemset), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }
+/*
+floor floor
+memcpy memcpy
+sqrt sqrt
+sqrtf sqrtf
+*/
+};
+
+
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+/** LIFO of read-write-executable memory chunks used for wrappers. */
+static PREMEXECMEM g_pExecMemHead;
+# endif
+# endif /* !VBOX_USE_BITNESS_SELECTOR */
+
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+# ifndef VBOX_USE_BITNESS_SELECTOR
+static int remGenerateExportGlue(PRTUINTPTR pValue, PCREMFNDESC pDesc);
+
+# ifdef USE_REM_CALLING_CONVENTION_GLUE
+DECLASM(int) WrapGCC2MSC0Int(void); DECLASM(int) WrapGCC2MSC0Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC1Int(void); DECLASM(int) WrapGCC2MSC1Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC2Int(void); DECLASM(int) WrapGCC2MSC2Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC3Int(void); DECLASM(int) WrapGCC2MSC3Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC4Int(void); DECLASM(int) WrapGCC2MSC4Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC5Int(void); DECLASM(int) WrapGCC2MSC5Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC6Int(void); DECLASM(int) WrapGCC2MSC6Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC7Int(void); DECLASM(int) WrapGCC2MSC7Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC8Int(void); DECLASM(int) WrapGCC2MSC8Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC9Int(void); DECLASM(int) WrapGCC2MSC9Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC10Int(void); DECLASM(int) WrapGCC2MSC10Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC11Int(void); DECLASM(int) WrapGCC2MSC11Int_EndProc(void);
+DECLASM(int) WrapGCC2MSC12Int(void); DECLASM(int) WrapGCC2MSC12Int_EndProc(void);
+DECLASM(int) WrapGCC2MSCVariadictInt(void); DECLASM(int) WrapGCC2MSCVariadictInt_EndProc(void);
+DECLASM(int) WrapGCC2MSC_SSMR3RegisterInternal(void); DECLASM(int) WrapGCC2MSC_SSMR3RegisterInternal_EndProc(void);
+
+DECLASM(int) WrapMSC2GCC0Int(void); DECLASM(int) WrapMSC2GCC0Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC1Int(void); DECLASM(int) WrapMSC2GCC1Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC2Int(void); DECLASM(int) WrapMSC2GCC2Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC3Int(void); DECLASM(int) WrapMSC2GCC3Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC4Int(void); DECLASM(int) WrapMSC2GCC4Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC5Int(void); DECLASM(int) WrapMSC2GCC5Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC6Int(void); DECLASM(int) WrapMSC2GCC6Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC7Int(void); DECLASM(int) WrapMSC2GCC7Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC8Int(void); DECLASM(int) WrapMSC2GCC8Int_EndProc(void);
+DECLASM(int) WrapMSC2GCC9Int(void); DECLASM(int) WrapMSC2GCC9Int_EndProc(void);
+# endif
+
+
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+/**
+ * Allocates a block of memory for glue code.
+ *
+ * The returned memory is padded with INT3s.
+ *
+ * @returns Pointer to the allocated memory.
+ * @param The amount of memory to allocate.
+ */
+static void *remAllocGlue(size_t cb)
+{
+ PREMEXECMEM pCur = g_pExecMemHead;
+ uint32_t cbAligned = (uint32_t)RT_ALIGN_32(cb, 32);
+ while (pCur)
+ {
+ if (pCur->cb - pCur->off >= cbAligned)
+ {
+ void *pv = (uint8_t *)pCur + pCur->off;
+ pCur->off += cbAligned;
+ return memset(pv, 0xcc, cbAligned);
+ }
+ pCur = pCur->pNext;
+ }
+
+ /* add a new chunk */
+ AssertReturn(_64K - RT_ALIGN_Z(sizeof(*pCur), 32) > cbAligned, NULL);
+ pCur = (PREMEXECMEM)RTMemExecAlloc(_64K);
+ AssertReturn(pCur, NULL);
+ pCur->cb = _64K;
+ pCur->off = RT_ALIGN_32(sizeof(*pCur), 32) + cbAligned;
+ pCur->pNext = g_pExecMemHead;
+ g_pExecMemHead = pCur;
+ return memset((uint8_t *)pCur + RT_ALIGN_Z(sizeof(*pCur), 32), 0xcc, cbAligned);
+}
+# endif /* USE_REM_CALLING_CONVENTION_GLUE || USE_REM_IMPORT_JUMP_GLUE */
+
+
+# ifdef USE_REM_CALLING_CONVENTION_GLUE
+/**
+ * Checks if a function is all straight forward integers.
+ *
+ * @returns True if it's simple, false if it's bothersome.
+ * @param pDesc The function descriptor.
+ */
+static bool remIsFunctionAllInts(PCREMFNDESC pDesc)
+{
+ if ( ( (pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) != REMFNDESC_FLAGS_RET_INT
+ || pDesc->cbReturn > sizeof(uint64_t))
+ && (pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) != REMFNDESC_FLAGS_RET_VOID)
+ return false;
+ unsigned i = pDesc->cParams;
+ while (i-- > 0)
+ switch (pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK)
+ {
+ case REMPARMDESC_FLAGS_INT:
+ case REMPARMDESC_FLAGS_GCPTR:
+ case REMPARMDESC_FLAGS_GCPHYS:
+ case REMPARMDESC_FLAGS_HCPHYS:
+ break;
+
+ default:
+ AssertReleaseMsgFailed(("Invalid param flags %#x for #%d of %s!\n", pDesc->paParams[i].fFlags, i, pDesc->pszName));
+ case REMPARMDESC_FLAGS_VALIST:
+ case REMPARMDESC_FLAGS_ELLIPSIS:
+ case REMPARMDESC_FLAGS_FLOAT:
+ case REMPARMDESC_FLAGS_STRUCT:
+ case REMPARMDESC_FLAGS_PFN:
+ return false;
+ }
+ return true;
+}
+
+
+/**
+ * Checks if the function has an ellipsis (...) argument.
+ *
+ * @returns true if it has an ellipsis, otherwise false.
+ * @param pDesc The function descriptor.
+ */
+static bool remHasFunctionEllipsis(PCREMFNDESC pDesc)
+{
+ unsigned i = pDesc->cParams;
+ while (i-- > 0)
+ if ((pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK) == REMPARMDESC_FLAGS_ELLIPSIS)
+ return true;
+ return false;
+}
+
+
+/**
+ * Checks if the function uses floating point (FP) arguments or return value.
+ *
+ * @returns true if it uses floating point, otherwise false.
+ * @param pDesc The function descriptor.
+ */
+static bool remIsFunctionUsingFP(PCREMFNDESC pDesc)
+{
+ if ((pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) == REMFNDESC_FLAGS_RET_FLOAT)
+ return true;
+ unsigned i = pDesc->cParams;
+ while (i-- > 0)
+ if ((pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK) == REMPARMDESC_FLAGS_FLOAT)
+ return true;
+ return false;
+}
+
+
+/** @name The export and import fixups.
+ * @{ */
+# define REM_FIXUP_32_REAL_STUFF UINT32_C(0xdeadbeef)
+# define REM_FIXUP_64_REAL_STUFF UINT64_C(0xdeadf00df00ddead)
+# define REM_FIXUP_64_DESC UINT64_C(0xdead00010001dead)
+# define REM_FIXUP_64_LOG_ENTRY UINT64_C(0xdead00020002dead)
+# define REM_FIXUP_64_LOG_EXIT UINT64_C(0xdead00030003dead)
+# define REM_FIXUP_64_WRAP_GCC_CB UINT64_C(0xdead00040004dead)
+/** @} */
+
+
+/**
+ * Entry logger function.
+ *
+ * @param pDesc The description.
+ */
+DECLASM(void) remLogEntry(PCREMFNDESC pDesc)
+{
+ RTPrintf("calling %s\n", pDesc->pszName);
+}
+
+
+/**
+ * Exit logger function.
+ *
+ * @param pDesc The description.
+ * @param pvRet The return code.
+ */
+DECLASM(void) remLogExit(PCREMFNDESC pDesc, void *pvRet)
+{
+ RTPrintf("returning %p from %s\n", pvRet, pDesc->pszName);
+}
+
+
+/**
+ * Creates a wrapper for the specified callback function at run time.
+ *
+ * @param pDesc The function descriptor.
+ * @param pValue Upon entry *pValue contains the address of the function to be wrapped.
+ * Upon return *pValue contains the address of the wrapper glue function.
+ * @param iParam The parameter index in the function descriptor (0 based).
+ * If UINT32_MAX pDesc is the descriptor for *pValue.
+ */
+DECLASM(void) remWrapGCCCallback(PCREMFNDESC pDesc, PRTUINTPTR pValue, uint32_t iParam)
+{
+ AssertPtr(pDesc);
+ AssertPtr(pValue);
+
+ /*
+ * Simple?
+ */
+ if (!*pValue)
+ return;
+
+ /*
+ * Locate the right function descriptor.
+ */
+ if (iParam != UINT32_MAX)
+ {
+ AssertRelease(iParam < pDesc->cParams);
+ pDesc = (PCREMFNDESC)pDesc->paParams[iParam].pvExtra;
+ AssertPtr(pDesc);
+ }
+
+ /*
+ * When we get serious, here is where to insert the hash table lookup.
+ */
+
+ /*
+ * Create a new glue patch.
+ */
+# ifdef RT_OS_WINDOWS
+ int rc = remGenerateExportGlue(pValue, pDesc);
+# else
+# error "port me"
+# endif
+ AssertReleaseRC(rc);
+
+ /*
+ * Add it to the hash (later)
+ */
+}
+
+
+/**
+ * Fixes export glue.
+ *
+ * @param pvGlue The glue code.
+ * @param cb The size of the glue code.
+ * @param pvExport The address of the export we're wrapping.
+ * @param pDesc The export descriptor.
+ */
+static void remGenerateExportGlueFixup(void *pvGlue, size_t cb, uintptr_t uExport, PCREMFNDESC pDesc)
+{
+ union
+ {
+ uint8_t *pu8;
+ int32_t *pi32;
+ uint32_t *pu32;
+ uint64_t *pu64;
+ void *pv;
+ } u;
+ u.pv = pvGlue;
+
+ while (cb >= 4)
+ {
+ /** @todo add defines for the fixup constants... */
+ if (*u.pu32 == REM_FIXUP_32_REAL_STUFF)
+ {
+ /* 32-bit rel jmp/call to real export. */
+ *u.pi32 = uExport - (uintptr_t)(u.pi32 + 1);
+ Assert((uintptr_t)(u.pi32 + 1) + *u.pi32 == uExport);
+ u.pi32++;
+ cb -= 4;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_REAL_STUFF)
+ {
+ /* 64-bit address to the real export. */
+ *u.pu64++ = uExport;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_DESC)
+ {
+ /* 64-bit address to the descriptor. */
+ *u.pu64++ = (uintptr_t)pDesc;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_WRAP_GCC_CB)
+ {
+ /* 64-bit address to the entry logger function. */
+ *u.pu64++ = (uintptr_t)remWrapGCCCallback;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_ENTRY)
+ {
+ /* 64-bit address to the entry logger function. */
+ *u.pu64++ = (uintptr_t)remLogEntry;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_EXIT)
+ {
+ /* 64-bit address to the entry logger function. */
+ *u.pu64++ = (uintptr_t)remLogExit;
+ cb -= 8;
+ continue;
+ }
+
+ /* move on. */
+ u.pu8++;
+ cb--;
+ }
+}
+
+
+/**
+ * Fixes import glue.
+ *
+ * @param pvGlue The glue code.
+ * @param cb The size of the glue code.
+ * @param pDesc The import descriptor.
+ */
+static void remGenerateImportGlueFixup(void *pvGlue, size_t cb, PCREMFNDESC pDesc)
+{
+ union
+ {
+ uint8_t *pu8;
+ int32_t *pi32;
+ uint32_t *pu32;
+ uint64_t *pu64;
+ void *pv;
+ } u;
+ u.pv = pvGlue;
+
+ while (cb >= 4)
+ {
+ if (*u.pu32 == REM_FIXUP_32_REAL_STUFF)
+ {
+ /* 32-bit rel jmp/call to real function. */
+ *u.pi32 = (uintptr_t)pDesc->pv - (uintptr_t)(u.pi32 + 1);
+ Assert((uintptr_t)(u.pi32 + 1) + *u.pi32 == (uintptr_t)pDesc->pv);
+ u.pi32++;
+ cb -= 4;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_REAL_STUFF)
+ {
+ /* 64-bit address to the real function. */
+ *u.pu64++ = (uintptr_t)pDesc->pv;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_DESC)
+ {
+ /* 64-bit address to the descriptor. */
+ *u.pu64++ = (uintptr_t)pDesc;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_WRAP_GCC_CB)
+ {
+ /* 64-bit address to the entry logger function. */
+ *u.pu64++ = (uintptr_t)remWrapGCCCallback;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_ENTRY)
+ {
+ /* 64-bit address to the entry logger function. */
+ *u.pu64++ = (uintptr_t)remLogEntry;
+ cb -= 8;
+ continue;
+ }
+ if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_EXIT)
+ {
+ /* 64-bit address to the entry logger function. */
+ *u.pu64++ = (uintptr_t)remLogExit;
+ cb -= 8;
+ continue;
+ }
+
+ /* move on. */
+ u.pu8++;
+ cb--;
+ }
+}
+
+# endif /* USE_REM_CALLING_CONVENTION_GLUE */
+
+
+/**
+ * Generate wrapper glue code for an export.
+ *
+ * This is only used on win64 when loading a 64-bit linux module. So, on other
+ * platforms it will not do anything.
+ *
+ * @returns VBox status code.
+ * @param pValue IN: Where to get the address of the function to wrap.
+ * OUT: Where to store the glue address.
+ * @param pDesc The export descriptor.
+ */
+static int remGenerateExportGlue(PRTUINTPTR pValue, PCREMFNDESC pDesc)
+{
+# ifdef USE_REM_CALLING_CONVENTION_GLUE
+ uintptr_t *ppfn = (uintptr_t *)pDesc->pv;
+
+ uintptr_t pfn = 0; /* a little hack for the callback glue */
+ if (!ppfn)
+ ppfn = &pfn;
+
+ if (!*ppfn)
+ {
+ if (remIsFunctionAllInts(pDesc))
+ {
+ static const struct { void *pvStart, *pvEnd; } s_aTemplates[] =
+ {
+ { (void *)&WrapMSC2GCC0Int, (void *)&WrapMSC2GCC0Int_EndProc },
+ { (void *)&WrapMSC2GCC1Int, (void *)&WrapMSC2GCC1Int_EndProc },
+ { (void *)&WrapMSC2GCC2Int, (void *)&WrapMSC2GCC2Int_EndProc },
+ { (void *)&WrapMSC2GCC3Int, (void *)&WrapMSC2GCC3Int_EndProc },
+ { (void *)&WrapMSC2GCC4Int, (void *)&WrapMSC2GCC4Int_EndProc },
+ { (void *)&WrapMSC2GCC5Int, (void *)&WrapMSC2GCC5Int_EndProc },
+ { (void *)&WrapMSC2GCC6Int, (void *)&WrapMSC2GCC6Int_EndProc },
+ { (void *)&WrapMSC2GCC7Int, (void *)&WrapMSC2GCC7Int_EndProc },
+ { (void *)&WrapMSC2GCC8Int, (void *)&WrapMSC2GCC8Int_EndProc },
+ { (void *)&WrapMSC2GCC9Int, (void *)&WrapMSC2GCC9Int_EndProc },
+ };
+ const unsigned i = pDesc->cParams;
+ AssertReleaseMsg(i < RT_ELEMENTS(s_aTemplates), ("%d (%s)\n", i, pDesc->pszName));
+
+ /* duplicate the patch. */
+ const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+ uint8_t *pb = (uint8_t *)remAllocGlue(cb);
+ AssertReturn(pb, VERR_NO_MEMORY);
+ memcpy(pb, s_aTemplates[i].pvStart, cb);
+
+ /* fix it up. */
+ remGenerateExportGlueFixup(pb, cb, *pValue, pDesc);
+ *ppfn = (uintptr_t)pb;
+ }
+ else
+ {
+ /* custom hacks - it's simpler to make assembly templates than writing a more generic code generator... */
+ static const struct { const char *pszName; PFNRT pvStart, pvEnd; } s_aTemplates[] =
+ {
+ { "somefunction", (PFNRT)&WrapMSC2GCC9Int, (PFNRT)&WrapMSC2GCC9Int_EndProc },
+ };
+ unsigned i;
+ for (i = 0; i < RT_ELEMENTS(s_aTemplates); i++)
+ if (!strcmp(pDesc->pszName, s_aTemplates[i].pszName))
+ break;
+ AssertReleaseMsgReturn(i < RT_ELEMENTS(s_aTemplates), ("Not implemented! %s\n", pDesc->pszName), VERR_NOT_IMPLEMENTED);
+
+ /* duplicate the patch. */
+ const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+ uint8_t *pb = (uint8_t *)remAllocGlue(cb);
+ AssertReturn(pb, VERR_NO_MEMORY);
+ memcpy(pb, s_aTemplates[i].pvStart, cb);
+
+ /* fix it up. */
+ remGenerateExportGlueFixup(pb, cb, *pValue, pDesc);
+ *ppfn = (uintptr_t)pb;
+ }
+ }
+ *pValue = *ppfn;
+ return VINF_SUCCESS;
+# else /* !USE_REM_CALLING_CONVENTION_GLUE */
+ return VINF_SUCCESS;
+# endif /* !USE_REM_CALLING_CONVENTION_GLUE */
+}
+
+
+/**
+ * Generate wrapper glue code for an import.
+ *
+ * This is only used on win64 when loading a 64-bit linux module. So, on other
+ * platforms it will simply return the address of the imported function
+ * without generating any glue code.
+ *
+ * @returns VBox status code.
+ * @param pValue Where to store the glue address.
+ * @param pDesc The export descriptor.
+ */
+static int remGenerateImportGlue(PRTUINTPTR pValue, PREMFNDESC pDesc)
+{
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+ if (!pDesc->pvWrapper)
+ {
+# ifdef USE_REM_CALLING_CONVENTION_GLUE
+ if (remIsFunctionAllInts(pDesc))
+ {
+ static const struct { void *pvStart, *pvEnd; } s_aTemplates[] =
+ {
+ { (void *)&WrapGCC2MSC0Int, (void *)&WrapGCC2MSC0Int_EndProc },
+ { (void *)&WrapGCC2MSC1Int, (void *)&WrapGCC2MSC1Int_EndProc },
+ { (void *)&WrapGCC2MSC2Int, (void *)&WrapGCC2MSC2Int_EndProc },
+ { (void *)&WrapGCC2MSC3Int, (void *)&WrapGCC2MSC3Int_EndProc },
+ { (void *)&WrapGCC2MSC4Int, (void *)&WrapGCC2MSC4Int_EndProc },
+ { (void *)&WrapGCC2MSC5Int, (void *)&WrapGCC2MSC5Int_EndProc },
+ { (void *)&WrapGCC2MSC6Int, (void *)&WrapGCC2MSC6Int_EndProc },
+ { (void *)&WrapGCC2MSC7Int, (void *)&WrapGCC2MSC7Int_EndProc },
+ { (void *)&WrapGCC2MSC8Int, (void *)&WrapGCC2MSC8Int_EndProc },
+ { (void *)&WrapGCC2MSC9Int, (void *)&WrapGCC2MSC9Int_EndProc },
+ { (void *)&WrapGCC2MSC10Int, (void *)&WrapGCC2MSC10Int_EndProc },
+ { (void *)&WrapGCC2MSC11Int, (void *)&WrapGCC2MSC11Int_EndProc },
+ { (void *)&WrapGCC2MSC12Int, (void *)&WrapGCC2MSC12Int_EndProc }
+ };
+ const unsigned i = pDesc->cParams;
+ AssertReleaseMsg(i < RT_ELEMENTS(s_aTemplates), ("%d (%s)\n", i, pDesc->pszName));
+
+ /* duplicate the patch. */
+ const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+ pDesc->pvWrapper = remAllocGlue(cb);
+ AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+ memcpy(pDesc->pvWrapper, s_aTemplates[i].pvStart, cb);
+
+ /* fix it up. */
+ remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc);
+ }
+ else if ( remHasFunctionEllipsis(pDesc)
+ && !remIsFunctionUsingFP(pDesc))
+ {
+ /* duplicate the patch. */
+ const size_t cb = (uintptr_t)&WrapGCC2MSCVariadictInt_EndProc - (uintptr_t)&WrapGCC2MSCVariadictInt;
+ pDesc->pvWrapper = remAllocGlue(cb);
+ AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+ memcpy(pDesc->pvWrapper, (void *)&WrapGCC2MSCVariadictInt, cb);
+
+ /* fix it up. */
+ remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc);
+ }
+ else
+ {
+ /* custom hacks - it's simpler to make assembly templates than writing a more generic code generator... */
+ static const struct { const char *pszName; PFNRT pvStart, pvEnd; } s_aTemplates[] =
+ {
+ { "SSMR3RegisterInternal", (PFNRT)&WrapGCC2MSC_SSMR3RegisterInternal, (PFNRT)&WrapGCC2MSC_SSMR3RegisterInternal_EndProc },
+ };
+ unsigned i;
+ for (i = 0; i < RT_ELEMENTS(s_aTemplates); i++)
+ if (!strcmp(pDesc->pszName, s_aTemplates[i].pszName))
+ break;
+ AssertReleaseMsgReturn(i < RT_ELEMENTS(s_aTemplates), ("Not implemented! %s\n", pDesc->pszName), VERR_NOT_IMPLEMENTED);
+
+ /* duplicate the patch. */
+ const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart;
+ pDesc->pvWrapper = remAllocGlue(cb);
+ AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+ memcpy(pDesc->pvWrapper, s_aTemplates[i].pvStart, cb);
+
+ /* fix it up. */
+ remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc);
+ }
+# else /* !USE_REM_CALLING_CONVENTION_GLUE */
+
+ /*
+ * Generate a jump patch.
+ */
+ uint8_t *pb;
+# ifdef RT_ARCH_AMD64
+ pDesc->pvWrapper = pb = (uint8_t *)remAllocGlue(32);
+ AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+ /**pb++ = 0xcc;*/
+ *pb++ = 0xff;
+ *pb++ = 0x24;
+ *pb++ = 0x25;
+ *(uint32_t *)pb = (uintptr_t)pb + 5;
+ pb += 5;
+ *(uint64_t *)pb = (uint64_t)pDesc->pv;
+# else
+ pDesc->pvWrapper = pb = (uint8_t *)remAllocGlue(8);
+ AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY);
+ *pb++ = 0xea;
+ *(uint32_t *)pb = (uint32_t)pDesc->pv;
+# endif
+# endif /* !USE_REM_CALLING_CONVENTION_GLUE */
+ }
+ *pValue = (uintptr_t)pDesc->pvWrapper;
+# else /* !USE_REM_CALLING_CONVENTION_GLUE */
+ *pValue = (uintptr_t)pDesc->pv;
+# endif /* !USE_REM_CALLING_CONVENTION_GLUE */
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Resolve an external symbol during RTLdrGetBits().
+ *
+ * @returns iprt status code.
+ * @param hLdrMod The loader module handle.
+ * @param pszModule Module name.
+ * @param pszSymbol Symbol name, NULL if uSymbol should be used.
+ * @param uSymbol Symbol ordinal, ~0 if pszSymbol should be used.
+ * @param pValue Where to store the symbol value (address).
+ * @param pvUser User argument.
+ */
+static DECLCALLBACK(int) remGetImport(RTLDRMOD hLdrMod, const char *pszModule, const char *pszSymbol, unsigned uSymbol, RTUINTPTR *pValue, void *pvUser)
+{
+ unsigned i;
+ for (i = 0; i < RT_ELEMENTS(g_aVMMImports); i++)
+ if (!strcmp(g_aVMMImports[i].pszName, pszSymbol))
+ return remGenerateImportGlue(pValue, &g_aVMMImports[i]);
+ for (i = 0; i < RT_ELEMENTS(g_aRTImports); i++)
+ if (!strcmp(g_aRTImports[i].pszName, pszSymbol))
+ return remGenerateImportGlue(pValue, &g_aRTImports[i]);
+ for (i = 0; i < RT_ELEMENTS(g_aCRTImports); i++)
+ if (!strcmp(g_aCRTImports[i].pszName, pszSymbol))
+ return remGenerateImportGlue(pValue, &g_aCRTImports[i]);
+ LogRel(("Missing REM Import: %s\n", pszSymbol));
+# if 1
+ *pValue = 0;
+ AssertMsgFailed(("%s.%s\n", pszModule, pszSymbol));
+ return VERR_SYMBOL_NOT_FOUND;
+# else
+ return remGenerateImportGlue(pValue, &g_aCRTImports[0]);
+# endif
+}
+
+/**
+ * Loads the linux object, resolves all imports and exports.
+ *
+ * @returns VBox status code.
+ */
+static int remLoadLinuxObj(void)
+{
+ size_t offFilename;
+ char szPath[RTPATH_MAX];
+ int rc = RTPathAppPrivateArch(szPath, sizeof(szPath) - 32);
+ AssertRCReturn(rc, rc);
+ offFilename = strlen(szPath);
+
+# ifdef VBOX_WITHOUT_REM_LDR_CYCLE
+ /*
+ * Resolve all the VBoxVMM references.
+ */
+ if (g_ModVMM != NIL_RTLDRMOD)
+ {
+ rc = SUPR3HardenedLdrLoadAppPriv("VBoxVMM", &g_ModVMM, RTLDRLOAD_FLAGS_LOCAL, NULL);
+ AssertRCReturn(rc, rc);
+ for (size_t i = 0; i < RT_ELEMENTS(g_aVMMImports); i++)
+ {
+ rc = RTLdrGetSymbol(g_ModVMM, g_aVMMImports[i].pszName, &g_aVMMImports[i].pv);
+ AssertLogRelMsgRCReturn(rc, ("RTLdrGetSymbol(VBoxVMM,%s,) -> %Rrc\n", g_aVMMImports[i].pszName, rc), rc);
+ }
+ }
+# endif
+
+ /*
+ * Load the VBoxREM2.rel object/DLL.
+ */
+ strcpy(&szPath[offFilename], "/VBoxREM2.rel");
+ rc = RTLdrOpen(szPath, 0, RTLDRARCH_HOST, &g_ModREM2);
+ if (RT_SUCCESS(rc))
+ {
+ g_cbREM2 = RTLdrSize(g_ModREM2);
+ g_pvREM2 = RTMemExecAlloc(g_cbREM2);
+ if (g_pvREM2)
+ {
+ RTPathChangeToUnixSlashes(szPath, true);
+# ifdef DEBUG /* How to load the VBoxREM2.rel symbols into the GNU debugger. */
+ RTPrintf("VBoxREMWrapper: (gdb) add-symbol-file %s 0x%p\n", szPath, g_pvREM2);
+# endif
+ LogRel(("REM: Loading %s at 0x%p (%d bytes)\n"
+ "REM: (gdb) add-symbol-file %s 0x%p\n",
+ szPath, g_pvREM2, RTLdrSize(g_ModREM2), szPath, g_pvREM2));
+ rc = RTLdrGetBits(g_ModREM2, g_pvREM2, (RTUINTPTR)g_pvREM2, remGetImport, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Resolve exports.
+ */
+ unsigned i;
+ for (i = 0; i < RT_ELEMENTS(g_aExports); i++)
+ {
+ RTUINTPTR Value;
+ rc = RTLdrGetSymbolEx(g_ModREM2, g_pvREM2, (RTUINTPTR)g_pvREM2, UINT32_MAX, g_aExports[i].pszName, &Value);
+ AssertMsgRC(rc, ("%s rc=%Rrc\n", g_aExports[i].pszName, rc));
+ if (RT_FAILURE(rc))
+ break;
+ rc = remGenerateExportGlue(&Value, &g_aExports[i]);
+ if (RT_FAILURE(rc))
+ break;
+ *(void **)g_aExports[i].pv = (void *)(uintptr_t)Value;
+ }
+ return rc;
+ }
+
+ RTMemExecFree(g_pvREM2, g_cbREM2);
+ g_pvREM2 = NULL;
+ }
+ g_cbREM2 = 0;
+ RTLdrClose(g_ModREM2);
+ g_ModREM2 = NIL_RTLDRMOD;
+ }
+ LogRel(("REM: failed loading '%s', rc=%Rrc\n", szPath, rc));
+ return rc;
+}
+
+
+/**
+ * Unloads the linux object, freeing up all resources (dlls and
+ * import glue) we allocated during remLoadLinuxObj().
+ */
+static void remUnloadLinuxObj(void)
+{
+ unsigned i;
+
+ /* close modules. */
+ RTLdrClose(g_ModREM2);
+ g_ModREM2 = NIL_RTLDRMOD;
+ RTMemExecFree(g_pvREM2, g_cbREM2);
+ g_pvREM2 = NULL;
+ g_cbREM2 = 0;
+
+ /* clear the pointers. */
+ for (i = 0; i < RT_ELEMENTS(g_aExports); i++)
+ *(void **)g_aExports[i].pv = NULL;
+# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE)
+ for (i = 0; i < RT_ELEMENTS(g_aVMMImports); i++)
+ g_aVMMImports[i].pvWrapper = NULL;
+ for (i = 0; i < RT_ELEMENTS(g_aRTImports); i++)
+ g_aRTImports[i].pvWrapper = NULL;
+ for (i = 0; i < RT_ELEMENTS(g_aCRTImports); i++)
+ g_aCRTImports[i].pvWrapper = NULL;
+
+ /* free wrapper memory. */
+ while (g_pExecMemHead)
+ {
+ PREMEXECMEM pCur = g_pExecMemHead;
+ g_pExecMemHead = pCur->pNext;
+ memset(pCur, 0xcc, pCur->cb);
+ RTMemExecFree(pCur, pCur->cb);
+ }
+# endif
+}
+
+# else /* VBOX_USE_BITNESS_SELECTOR */
+
+/**
+ * Checks if 64-bit support is enabled.
+ *
+ * @returns true / false.
+ * @param pVM Pointer to the shared VM structure.
+ */
+static bool remIs64bitEnabled(PVM pVM)
+{
+ bool f;
+ int rc;
+
+# ifdef VBOX_WITHOUT_REM_LDR_CYCLE
+ if (g_ModVMM == NIL_RTLDRMOD)
+ {
+ rc = SUPR3HardenedLdrLoadAppPriv("VBoxVMM", &g_ModVMM, RTLDRLOAD_FLAGS_LOCAL, NULL);
+ AssertRCReturn(rc, false);
+ }
+
+ DECLCALLBACKMEMBER(PCFGMNODE, pfnCFGMR3GetRoot)(PVM);
+ rc = RTLdrGetSymbol(g_ModVMM, "CFGMR3GetRoot", (void **)&pfnCFGMR3GetRoot);
+ AssertRCReturn(rc, false);
+
+ DECLCALLBACKMEMBER(PCFGMNODE, pfnCFGMR3GetChild)(PCFGMNODE, const char *);
+ rc = RTLdrGetSymbol(g_ModVMM, "CFGMR3GetChild", (void **)&pfnCFGMR3GetChild);
+ AssertRCReturn(rc, false);
+
+ DECLCALLBACKMEMBER(int, pfnCFGMR3QueryBoolDef)(PCFGMNODE, const char *, bool *, bool);
+ rc = RTLdrGetSymbol(g_ModVMM, "CFGMR3QueryBoolDef", (void **)&pfnCFGMR3QueryBoolDef);
+ AssertRCReturn(rc, false);
+
+ rc = pfnCFGMR3QueryBoolDef(pfnCFGMR3GetChild(pfnCFGMR3GetRoot(pVM), "REM"), "64bitEnabled", &f, false);
+# else
+ rc = CFGMR3QueryBoolDef(CFGMR3GetChild(CFGMR3GetRoot(pVM), "REM"), "64bitEnabled", &f, false);
+# endif
+ AssertRCReturn(rc, false);
+ return f;
+}
+
+
+/**
+ * Loads real REM object, resolves all exports (imports are done by native loader).
+ *
+ * @returns VBox status code.
+ */
+static int remLoadProperObj(PVM pVM)
+{
+ /*
+ * Load the VBoxREM32/64 object/DLL.
+ */
+ const char *pszModule = remIs64bitEnabled(pVM) ? "VBoxREM64" : "VBoxREM32";
+ int rc = SUPR3HardenedLdrLoadAppPriv(pszModule, &g_ModREM2, RTLDRLOAD_FLAGS_LOCAL, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ LogRel(("REM: %s\n", pszModule));
+
+ /*
+ * Resolve exports.
+ */
+ unsigned i;
+ for (i = 0; i < RT_ELEMENTS(g_aExports); i++)
+ {
+ void *pvValue;
+ rc = RTLdrGetSymbol(g_ModREM2, g_aExports[i].pszName, &pvValue);
+ AssertLogRelMsgRCBreak(rc, ("%s rc=%Rrc\n", g_aExports[i].pszName, rc));
+ *(void **)g_aExports[i].pv = pvValue;
+ }
+ }
+
+ return rc;
+}
+
+
+/**
+ * Unloads the real REM object.
+ */
+static void remUnloadProperObj(void)
+{
+ /* close module. */
+ RTLdrClose(g_ModREM2);
+ g_ModREM2 = NIL_RTLDRMOD;
+}
+
+# endif /* VBOX_USE_BITNESS_SELECTOR */
+#endif /* USE_REM_STUBS */
+
+REMR3DECL(int) REMR3Init(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+ return VINF_SUCCESS;
+
+#elif defined(VBOX_USE_BITNESS_SELECTOR)
+ if (!pfnREMR3Init)
+ {
+ int rc = remLoadProperObj(pVM);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+ return pfnREMR3Init(pVM);
+
+#else
+ if (!pfnREMR3Init)
+ {
+ int rc = remLoadLinuxObj();
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+ return pfnREMR3Init(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3InitFinalize(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3InitFinalize));
+ return pfnREMR3InitFinalize(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3Term(PVM pVM)
+{
+#ifdef USE_REM_STUBS
+ return VINF_SUCCESS;
+
+#elif defined(VBOX_USE_BITNESS_SELECTOR)
+ int rc;
+ Assert(VALID_PTR(pfnREMR3Term));
+ rc = pfnREMR3Term(pVM);
+ remUnloadProperObj();
+ return rc;
+
+#else
+ int rc;
+ Assert(VALID_PTR(pfnREMR3Term));
+ rc = pfnREMR3Term(pVM);
+ remUnloadLinuxObj();
+ return rc;
+#endif
+}
+
+REMR3DECL(void) REMR3Reset(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3Reset));
+ pfnREMR3Reset(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3Step(PVM pVM, PVMCPU pVCpu)
+{
+#ifdef USE_REM_STUBS
+ return VERR_NOT_IMPLEMENTED;
+#else
+ Assert(VALID_PTR(pfnREMR3Step));
+ return pfnREMR3Step(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(int) REMR3BreakpointSet(PVM pVM, RTGCUINTPTR Address)
+{
+#ifdef USE_REM_STUBS
+ return VERR_REM_NO_MORE_BP_SLOTS;
+#else
+ Assert(VALID_PTR(pfnREMR3BreakpointSet));
+ return pfnREMR3BreakpointSet(pVM, Address);
+#endif
+}
+
+REMR3DECL(int) REMR3BreakpointClear(PVM pVM, RTGCUINTPTR Address)
+{
+#ifdef USE_REM_STUBS
+ return VERR_NOT_IMPLEMENTED;
+#else
+ Assert(VALID_PTR(pfnREMR3BreakpointClear));
+ return pfnREMR3BreakpointClear(pVM, Address);
+#endif
+}
+
+REMR3DECL(int) REMR3EmulateInstruction(PVM pVM, PVMCPU pVCpu)
+{
+#ifdef USE_REM_STUBS
+ return VERR_NOT_IMPLEMENTED;
+#else
+ Assert(VALID_PTR(pfnREMR3EmulateInstruction));
+ return pfnREMR3EmulateInstruction(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(int) REMR3Run(PVM pVM, PVMCPU pVCpu)
+{
+#ifdef USE_REM_STUBS
+ return VERR_NOT_IMPLEMENTED;
+#else
+ Assert(VALID_PTR(pfnREMR3Run));
+ return pfnREMR3Run(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(int) REMR3State(PVM pVM, PVMCPU pVCpu)
+{
+#ifdef USE_REM_STUBS
+ return VERR_NOT_IMPLEMENTED;
+#else
+ Assert(VALID_PTR(pfnREMR3State));
+ return pfnREMR3State(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(int) REMR3StateBack(PVM pVM, PVMCPU pVCpu)
+{
+#ifdef USE_REM_STUBS
+ return VERR_NOT_IMPLEMENTED;
+#else
+ Assert(VALID_PTR(pfnREMR3StateBack));
+ return pfnREMR3StateBack(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(void) REMR3StateUpdate(PVM pVM, PVMCPU pVCpu)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3StateUpdate));
+ pfnREMR3StateUpdate(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(void) REMR3A20Set(PVM pVM, PVMCPU pVCpu, bool fEnable)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3A20Set));
+ pfnREMR3A20Set(pVM, pVCpu, fEnable);
+#endif
+}
+
+REMR3DECL(void) REMR3ReplayHandlerNotifications(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3ReplayHandlerNotifications));
+ pfnREMR3ReplayHandlerNotifications(pVM);
+#endif
+}
+
+REMR3DECL(int) REMR3NotifyCodePageChanged(PVM pVM, PVMCPU pVCpu, RTGCPTR pvCodePage)
+{
+#ifdef USE_REM_STUBS
+ return VINF_SUCCESS;
+#else
+ Assert(VALID_PTR(pfnREMR3NotifyCodePageChanged));
+ return pfnREMR3NotifyCodePageChanged(pVM, pVCpu, pvCodePage);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, unsigned fFlags)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyPhysRamRegister));
+ pfnREMR3NotifyPhysRamRegister(pVM, GCPhys, cb, fFlags);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyPhysRomRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, void *pvCopy, bool fShadow)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyPhysRomRegister));
+ pfnREMR3NotifyPhysRomRegister(pVM, GCPhys, cb, pvCopy, fShadow);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyPhysRamDeregister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyPhysRamDeregister));
+ pfnREMR3NotifyPhysRamDeregister(pVM, GCPhys, cb);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalRegister));
+ pfnREMR3NotifyHandlerPhysicalRegister(pVM, enmKind, GCPhys, cb, fHasHCHandler);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalDeregister));
+ pfnREMR3NotifyHandlerPhysicalDeregister(pVM, enmKind, GCPhys, cb, fHasHCHandler, fRestoreAsRAM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalModify));
+ pfnREMR3NotifyHandlerPhysicalModify(pVM, enmKind, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM);
+#endif
+}
+
+REMR3DECL(bool) REMR3IsPageAccessHandled(PVM pVM, RTGCPHYS GCPhys)
+{
+#ifdef USE_REM_STUBS
+ return false;
+#else
+ Assert(VALID_PTR(pfnREMR3IsPageAccessHandled));
+ return pfnREMR3IsPageAccessHandled(pVM, GCPhys);
+#endif
+}
+
+REMR3DECL(int) REMR3DisasEnableStepping(PVM pVM, bool fEnable)
+{
+#ifdef USE_REM_STUBS
+ return VERR_NOT_IMPLEMENTED;
+#else
+ Assert(VALID_PTR(pfnREMR3DisasEnableStepping));
+ return pfnREMR3DisasEnableStepping(pVM, fEnable);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyInterruptSet(PVM pVM, PVMCPU pVCpu)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyInterruptSet));
+ pfnREMR3NotifyInterruptSet(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyInterruptClear(PVM pVM, PVMCPU pVCpu)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyInterruptClear));
+ pfnREMR3NotifyInterruptClear(pVM, pVCpu);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyTimerPending(PVM pVM, PVMCPU pVCpuDst)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyTimerPending));
+ pfnREMR3NotifyTimerPending(pVM, pVCpuDst);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyDmaPending(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyDmaPending));
+ pfnREMR3NotifyDmaPending(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyQueuePending(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+ Assert(VALID_PTR(pfnREMR3NotifyQueuePending));
+ pfnREMR3NotifyQueuePending(pVM);
+#endif
+}
+
+REMR3DECL(void) REMR3NotifyFF(PVM pVM)
+{
+#ifndef USE_REM_STUBS
+ /* the timer can call this early on, so don't be picky. */
+ if (pfnREMR3NotifyFF)
+ pfnREMR3NotifyFF(pVM);
+#endif
+}
diff --git a/src/recompiler/VBoxREMWrapperA.asm b/src/recompiler/VBoxREMWrapperA.asm
new file mode 100644
index 00000000..a947031f
--- /dev/null
+++ b/src/recompiler/VBoxREMWrapperA.asm
@@ -0,0 +1,912 @@
+; $Id: VBoxREMWrapperA.asm $
+;; @file
+; VBoxREM Wrapper, Assembly routines and wrapper Templates.
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+
+
+
+;*******************************************************************************
+;* Header Files *
+;*******************************************************************************
+%include "iprt/asmdefs.mac"
+
+%define REM_FIXUP_32_REAL_STUFF 0deadbeefh
+%define REM_FIXUP_64_REAL_STUFF 0deadf00df00ddeadh
+%define REM_FIXUP_64_DESC 0dead00010001deadh
+%define REM_FIXUP_64_LOG_ENTRY 0dead00020002deadh
+%define REM_FIXUP_64_LOG_EXIT 0dead00030003deadh
+%define REM_FIXUP_64_WRAP_GCC_CB 0dead00040004deadh
+
+;%define ENTRY_LOGGING 1
+;%define EXIT_LOGGING 1
+
+
+%ifdef RT_ARCH_AMD64
+ ;;
+ ; 64-bit pushad
+ %macro MY_PUSHAQ 0
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ push rsi
+ push rdi
+ push rbp
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+
+ ;;
+ ; 64-bit popad
+ %macro MY_POPAQ 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop r11
+ pop r10
+ pop r9
+ pop r8
+ pop rbp
+ pop rdi
+ pop rsi
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+ %endmacro
+
+ ;;
+ ; Entry logging
+ %ifdef ENTRY_LOGGING
+ %macro LOG_ENTRY 0
+ MY_PUSHAQ
+ push rbp
+ mov rbp, rsp
+ and rsp, ~0fh
+ sub rsp, 20h ; shadow space
+
+ %ifdef RT_OS_WINDOWS
+ mov rcx, REM_FIXUP_64_DESC
+ %else
+ mov rdi, REM_FIXUP_64_DESC
+ %endif
+ mov rax, REM_FIXUP_64_LOG_ENTRY
+ call rax
+
+ leave
+ MY_POPAQ
+ %endmacro
+ %else
+ %define LOG_ENTRY
+ %endif
+
+ ;;
+ ; Exit logging
+ %ifdef EXIT_LOGGING
+ %macro LOG_EXIT 0
+ MY_PUSHAQ
+ push rbp
+ mov rbp, rsp
+ and rsp, ~0fh
+ sub rsp, 20h ; shadow space
+
+ %ifdef RT_OS_WINDOWS
+ mov rdx, rax
+ mov rcx, REM_FIXUP_64_DESC
+ %else
+ mov rsi, eax
+ mov rdi, REM_FIXUP_64_DESC
+ %endif
+ mov rax, REM_FIXUP_64_LOG_EXIT
+ call rax
+
+ leave
+ MY_POPAQ
+ %endmacro
+ %else
+ %define LOG_EXIT
+ %endif
+
+%else
+ %define LOG_ENTRY
+ %define LOG_EXIT
+%endif
+
+
+BEGINCODE
+
+%ifdef RT_OS_WINDOWS
+ %ifdef RT_ARCH_AMD64
+
+
+BEGINPROC WrapGCC2MSC0Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC0Int
+
+
+BEGINPROC WrapGCC2MSC1Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC1Int
+
+
+BEGINPROC WrapGCC2MSC2Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC2Int
+
+
+BEGINPROC WrapGCC2MSC3Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC3Int
+
+
+BEGINPROC WrapGCC2MSC4Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC4Int
+
+
+BEGINPROC WrapGCC2MSC5Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 30h
+
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC5Int
+
+
+BEGINPROC WrapGCC2MSC6Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 30h
+
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC6Int
+
+
+BEGINPROC WrapGCC2MSC7Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 40h
+
+ mov r11, [rbp + 10h]
+ mov [rsp + 30h], r11
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC7Int
+
+
+BEGINPROC WrapGCC2MSC8Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 40h
+
+ mov r10, [rbp + 18h]
+ mov [rsp + 38h], r10
+ mov r11, [rbp + 10h]
+ mov [rsp + 30h], r11
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC8Int
+
+
+BEGINPROC WrapGCC2MSC9Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 50h
+
+ mov rax, [rbp + 20h]
+ mov [rsp + 40h], rax
+ mov r10, [rbp + 18h]
+ mov [rsp + 38h], r10
+ mov r11, [rbp + 10h]
+ mov [rsp + 30h], r11
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC9Int
+
+
+BEGINPROC WrapGCC2MSC10Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 50h
+
+ mov r11, [rbp + 28h]
+ mov [rsp + 48h], r11
+ mov rax, [rbp + 20h]
+ mov [rsp + 40h], rax
+ mov r10, [rbp + 18h]
+ mov [rsp + 38h], r10
+ mov r11, [rbp + 10h]
+ mov [rsp + 30h], r11
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC10Int
+
+
+BEGINPROC WrapGCC2MSC11Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 60h
+
+ mov r10, [rbp + 30h]
+ mov [rsp + 50h], r10
+ mov r11, [rbp + 28h]
+ mov [rsp + 48h], r11
+ mov rax, [rbp + 20h]
+ mov [rsp + 40h], rax
+ mov r10, [rbp + 18h]
+ mov [rsp + 38h], r10
+ mov r11, [rbp + 10h]
+ mov [rsp + 30h], r11
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC11Int
+
+
+BEGINPROC WrapGCC2MSC12Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 60h
+
+ mov rax, [rbp + 28h]
+ mov [rsp + 48h], rax
+ mov r10, [rbp + 30h]
+ mov [rsp + 50h], r10
+ mov r11, [rbp + 28h]
+ mov [rsp + 48h], r11
+ mov rax, [rbp + 20h]
+ mov [rsp + 40h], rax
+ mov r10, [rbp + 18h]
+ mov [rsp + 38h], r10
+ mov r11, [rbp + 10h]
+ mov [rsp + 30h], r11
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov r8, rdx
+ mov rdx, rsi
+ mov rcx, rdi
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC12Int
+
+
+
+BEGINPROC WrapGCC2MSCVariadictInt
+ LOG_ENTRY
+%ifdef DEBUG
+ ; check that there are NO floting point arguments in XMM registers!
+ or rax, rax
+ jz .ok
+ int3
+.ok:
+%endif
+ sub rsp, 28h
+ mov r11, [rsp + 28h] ; r11 = return address.
+ mov [rsp + 28h], r9
+ mov [rsp + 20h], r8
+ mov r9, rcx
+ mov [rsp + 18h], r9 ; (*)
+ mov r8, rdx
+ mov [rsp + 14h], r8 ; (*)
+ mov rdx, rsi
+ mov [rsp + 8h], rdx ; (*)
+ mov rcx, rdi
+ mov [rsp], rcx ; (*)
+ mov rsi, r11 ; rsi is preserved by the callee.
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ add rsp, 30h
+ LOG_EXIT
+ jmp rsi
+ ; (*) unconditionally spill the registers, just in case '...' implies weird stuff on MSC. Check this out!
+ENDPROC WrapGCC2MSCVariadictInt
+
+
+;;
+; Custom template for SSMR3RegisterInternal.
+;
+; (This is based on the WrapGCC2MSC11Int template.)
+;
+; @cproto
+;
+; SSMR3DECL(int) SSMR3RegisterInternal(PVM pVM, const char *pszName, uint32_t u32Instance, uint32_t u32Version, size_t cbGuess,
+; PFNSSMINTLIVEPREP pfnLivePrep, PFNSSMINTLIVEEXEC pfnLiveExec, PFNSSMINTLIVEVOTE pfnLiveVote,
+; PFNSSMINTSAVEPREP pfnSavePrep, PFNSSMINTSAVEEXEC pfnSaveExec, PFNSSMINTSAVEDONE pfnSaveDone,
+; PFNSSMINTLOADPREP pfnLoadPrep, PFNSSMINTLOADEXEC pfnLoadExec, PFNSSMINTLOADDONE pfnLoadDone);
+;
+; @param pVM rdi 0
+; @param pszName rsi 1
+; @param u32Instance rdx 2
+; @param u32Version rcx 3
+; @param cbGuess r8 4
+; @param pfnLivePrep r9 5
+; @param pfnLiveExec rbp + 10h 6
+; @param pfnLiveVote rbp + 18h 7
+; @param pfnSavePrep rbp + 20h 8
+; @param pfnSaveExec rbp + 28h 9
+; @param pfnSaveDone rbp + 30h 10
+; @param pfnLoadPrep rbp + 38h 11
+; @param pfnLoadExec rbp + 40h 12
+; @param pfnLoadDone rbp + 48h 13
+;
+BEGINPROC WrapGCC2MSC_SSMR3RegisterInternal
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+
+ sub rsp, 80h
+
+ mov r10, [rbp + 48h]
+ mov [rsp + 68h], r10 ; pfnLiveDone
+ mov r11, [rbp + 40h]
+ mov [rsp + 60h], r11 ; pfnLiveExec
+ mov rax, [rbp + 38h]
+ mov [rsp + 58h], rax ; pfnLivePrep
+ mov r10, [rbp + 30h]
+ mov [rsp + 50h], r10 ; pfnLoadDone
+ mov r11, [rbp + 28h]
+ mov [rsp + 48h], r11 ; pfnLoadExec
+ mov rax, [rbp + 20h]
+ mov [rsp + 40h], rax ; pfnLoadPrep
+ mov r10, [rbp + 18h]
+ mov [rsp + 38h], r10 ; pfnSaveDone
+ mov r11, [rbp + 10h]
+ mov [rsp + 30h], r11 ; pfnSaveExec
+ mov [rsp + 28h], r9 ; pfnSavePrep
+ mov [rsp + 20h], r8
+ mov [rsp + 18h], rcx ; -> r9
+ mov [rsp + 10h], rdx ; -> r8
+ mov [rsp + 08h], rsi ; -> rdx
+ mov [rsp], rdi ; -> rcx
+
+ ; Now convert the function pointers. Have to setup a new shadow
+ ; space here since the SSMR3RegisterInternal one is already in use.
+ sub rsp, 20h
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 28h + 20h] ; pValue
+ mov r8d, 5 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 30h + 20h] ; pValue
+ mov r8d, 6 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 38h + 20h] ; pValue
+ mov r8d, 7 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 40h + 20h] ; pValue
+ mov r8d, 8 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 48h + 20h] ; pValue
+ mov r8d, 9 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 50h + 20h] ; pValue
+ mov r8d, 10 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 58h + 20h] ; pValue
+ mov r8d, 11 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 60h + 20h] ; pValue
+ mov r8d, 12 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ mov rcx, REM_FIXUP_64_DESC ; pDesc
+ lea rdx, [rsp + 68h + 20h] ; pValue
+ mov r8d, 13 ; iParam
+ mov rax, REM_FIXUP_64_WRAP_GCC_CB
+ call rax
+
+ add rsp, 20h
+
+ ; finally do the call.
+ mov r9, [rsp + 18h]
+ mov r8, [rsp + 10h]
+ mov rdx, [rsp + 08h]
+ mov rcx, [rsp]
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapGCC2MSC_SSMR3RegisterInternal
+
+
+;
+; The other way around:
+;
+
+
+BEGINPROC WrapMSC2GCC0Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 10h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC0Int
+
+
+BEGINPROC WrapMSC2GCC1Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC1Int
+
+
+BEGINPROC WrapMSC2GCC2Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+%ifdef USE_DIRECT_CALLS
+ call $+5+REM_FIXUP_32_REAL_STUFF
+%else
+ mov rax, REM_FIXUP_64_REAL_STUFF
+ call rax
+%endif
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC2Int
+
+
+BEGINPROC WrapMSC2GCC3Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ call $+5+REM_FIXUP_32_REAL_STUFF
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC3Int
+
+
+BEGINPROC WrapMSC2GCC4Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ call $+5+REM_FIXUP_32_REAL_STUFF
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC4Int
+
+
+BEGINPROC WrapMSC2GCC5Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, [rbp + 30h]
+ call $+5+REM_FIXUP_32_REAL_STUFF
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC5Int
+
+
+BEGINPROC WrapMSC2GCC6Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 20h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, [rbp + 30h]
+ mov r9, [rbp + 38h]
+ call $+5+REM_FIXUP_32_REAL_STUFF
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC6Int
+
+
+BEGINPROC WrapMSC2GCC7Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 30h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, [rbp + 30h]
+ mov r9, [rbp + 38h]
+ mov r10, [rbp + 40h]
+ mov [rsp], r10
+ call $+5+REM_FIXUP_32_REAL_STUFF
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC7Int
+
+
+BEGINPROC WrapMSC2GCC8Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 30h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, [rbp + 30h]
+ mov r9, [rbp + 38h]
+ mov r10, [rbp + 40h]
+ mov [rsp], r10
+ mov r11, [rbp + 48h]
+ mov [rsp + 8], r11
+ call $+5+REM_FIXUP_32_REAL_STUFF
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC8Int
+
+
+BEGINPROC WrapMSC2GCC9Int
+ LOG_ENTRY
+ push rbp
+ mov rbp, rsp
+ sub rsp, 40h
+ mov [rbp - 10h], rsi
+ mov [rbp - 18h], rdi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, [rbp + 30h]
+ mov r9, [rbp + 38h]
+ mov r10, [rbp + 40h]
+ mov [rsp], r10
+ mov r11, [rbp + 48h]
+ mov [rsp + 8], r11
+ mov rax, [rbp + 50h]
+ mov [rsp + 10h], rax
+ call $+5+REM_FIXUP_32_REAL_STUFF
+
+ mov rdi, [rbp - 18h]
+ mov rsi, [rbp - 10h]
+ leave
+ LOG_EXIT
+ ret
+ENDPROC WrapMSC2GCC9Int
+
+ %endif ; RT_ARCH_AMD64
+%endif ; RT_OS_WINDOWS
+
diff --git a/src/recompiler/VBoxRecompiler.c b/src/recompiler/VBoxRecompiler.c
new file mode 100644
index 00000000..fd19df22
--- /dev/null
+++ b/src/recompiler/VBoxRecompiler.c
@@ -0,0 +1,5481 @@
+/* $Id: VBoxRecompiler.c $ */
+/** @file
+ * VBox Recompiler - QEMU.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+/** @page pg_rem REM - Recompiled Execution Manager.
+ *
+ * The recompiled exeuction manager (REM) serves the final fallback for guest
+ * execution, after HM / raw-mode and IEM have given up.
+ *
+ * The REM is qemu with a whole bunch of VBox specific customization for
+ * interfacing with PATM, CSAM, PGM and other components.
+ *
+ * @sa @ref grp_rem
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_REM
+#include <stdio.h> /* FILE */
+#include "osdep.h"
+#include "config.h"
+#include "cpu.h"
+#include "exec-all.h"
+#include "ioport.h"
+
+#include <VBox/vmm/rem.h>
+#include <VBox/vmm/vmapi.h>
+#include <VBox/vmm/tm.h>
+#include <VBox/vmm/ssm.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/trpm.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/mm.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/dbgf.h>
+#include <VBox/dbg.h>
+#include <VBox/vmm/apic.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/patm.h>
+#include <VBox/vmm/csam.h>
+#include "REMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/uvm.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+
+#include <VBox/log.h>
+#include <iprt/alloca.h>
+#include <iprt/semaphore.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/thread.h>
+#include <iprt/string.h>
+
+/* Don't wanna include everything. */
+extern void cpu_exec_init_all(uintptr_t tb_size);
+extern void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
+extern void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
+extern void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+extern void tlb_flush_page(CPUX86State *env, target_ulong addr);
+extern void tlb_flush(CPUX86State *env, int flush_global);
+extern void sync_seg(CPUX86State *env1, int seg_reg, int selector);
+extern void sync_ldtr(CPUX86State *env1, int selector);
+
+#ifdef VBOX_STRICT
+ram_addr_t get_phys_page_offset(target_ulong addr);
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+
+/** Copy 80-bit fpu register at pSrc to pDst.
+ * This is probably faster than *calling* memcpy.
+ */
+#define REM_COPY_FPU_REG(pDst, pSrc) \
+ do { *(PX86FPUMMX)(pDst) = *(const X86FPUMMX *)(pSrc); } while (0)
+
+/** How remR3RunLoggingStep operates. */
+#define REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+
+
+/** Selector flag shift between qemu and VBox.
+ * VBox shifts the qemu bits to the right. */
+#define SEL_FLAGS_SHIFT (8)
+/** Mask applied to the shifted qemu selector flags to get the attributes VBox
+ * (VT-x) needs. */
+#define SEL_FLAGS_SMASK UINT32_C(0x1F0FF)
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static DECLCALLBACK(int) remR3Save(PVM pVM, PSSMHANDLE pSSM);
+static DECLCALLBACK(int) remR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass);
+static DECLCALLBACK(int) remR3LoadDone(PVM pVM, PSSMHANDLE pSSM);
+static void remR3StateUpdate(PVM pVM, PVMCPU pVCpu);
+static int remR3InitPhysRamSizeAndDirtyMap(PVM pVM, bool fGuarded);
+
+static uint32_t remR3MMIOReadU8(void *pvEnv, target_phys_addr_t GCPhys);
+static uint32_t remR3MMIOReadU16(void *pvEnv, target_phys_addr_t GCPhys);
+static uint32_t remR3MMIOReadU32(void *pvEnv, target_phys_addr_t GCPhys);
+static void remR3MMIOWriteU8(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32);
+static void remR3MMIOWriteU16(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32);
+static void remR3MMIOWriteU32(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32);
+
+static uint32_t remR3HandlerReadU8(void *pvVM, target_phys_addr_t GCPhys);
+static uint32_t remR3HandlerReadU16(void *pvVM, target_phys_addr_t GCPhys);
+static uint32_t remR3HandlerReadU32(void *pvVM, target_phys_addr_t GCPhys);
+static void remR3HandlerWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+static void remR3HandlerWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+static void remR3HandlerWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32);
+
+static void remR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM);
+static void remR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler);
+static void remR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM);
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+
+/** @todo Move stats to REM::s some rainy day we have nothing do to. */
+#ifdef VBOX_WITH_STATISTICS
+static STAMPROFILEADV gStatExecuteSingleInstr;
+static STAMPROFILEADV gStatCompilationQEmu;
+static STAMPROFILEADV gStatRunCodeQEmu;
+static STAMPROFILEADV gStatTotalTimeQEmu;
+static STAMPROFILEADV gStatTimers;
+static STAMPROFILEADV gStatTBLookup;
+static STAMPROFILEADV gStatIRQ;
+static STAMPROFILEADV gStatRawCheck;
+static STAMPROFILEADV gStatMemRead;
+static STAMPROFILEADV gStatMemWrite;
+static STAMPROFILE gStatGCPhys2HCVirt;
+static STAMCOUNTER gStatCpuGetTSC;
+static STAMCOUNTER gStatRefuseTFInhibit;
+static STAMCOUNTER gStatRefuseVM86;
+static STAMCOUNTER gStatRefusePaging;
+static STAMCOUNTER gStatRefusePAE;
+static STAMCOUNTER gStatRefuseIOPLNot0;
+static STAMCOUNTER gStatRefuseIF0;
+static STAMCOUNTER gStatRefuseCode16;
+static STAMCOUNTER gStatRefuseWP0;
+static STAMCOUNTER gStatRefuseRing1or2;
+static STAMCOUNTER gStatRefuseCanExecute;
+static STAMCOUNTER gaStatRefuseStale[6];
+static STAMCOUNTER gStatREMGDTChange;
+static STAMCOUNTER gStatREMIDTChange;
+static STAMCOUNTER gStatREMLDTRChange;
+static STAMCOUNTER gStatREMTRChange;
+static STAMCOUNTER gStatSelOutOfSync[6];
+static STAMCOUNTER gStatSelOutOfSyncStateBack[6];
+static STAMCOUNTER gStatFlushTBs;
+#endif
+/* in exec.c */
+extern uint32_t tlb_flush_count;
+extern uint32_t tb_flush_count;
+extern uint32_t tb_phys_invalidate_count;
+
+/*
+ * Global stuff.
+ */
+
+/** MMIO read callbacks. */
+CPUReadMemoryFunc *g_apfnMMIORead[3] =
+{
+ remR3MMIOReadU8,
+ remR3MMIOReadU16,
+ remR3MMIOReadU32
+};
+
+/** MMIO write callbacks. */
+CPUWriteMemoryFunc *g_apfnMMIOWrite[3] =
+{
+ remR3MMIOWriteU8,
+ remR3MMIOWriteU16,
+ remR3MMIOWriteU32
+};
+
+/** Handler read callbacks. */
+CPUReadMemoryFunc *g_apfnHandlerRead[3] =
+{
+ remR3HandlerReadU8,
+ remR3HandlerReadU16,
+ remR3HandlerReadU32
+};
+
+/** Handler write callbacks. */
+CPUWriteMemoryFunc *g_apfnHandlerWrite[3] =
+{
+ remR3HandlerWriteU8,
+ remR3HandlerWriteU16,
+ remR3HandlerWriteU32
+};
+
+
+#ifdef VBOX_WITH_DEBUGGER
+/*
+ * Debugger commands.
+ */
+static FNDBGCCMD remR3CmdDisasEnableStepping;;
+
+/** '.remstep' arguments. */
+static const DBGCVARDESC g_aArgRemStep[] =
+{
+ /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */
+ { 0, ~0U, DBGCVAR_CAT_NUMBER, 0, "on/off", "Boolean value/mnemonic indicating the new state." },
+};
+
+/** Command descriptors. */
+static const DBGCCMD g_aCmds[] =
+{
+ {
+ .pszCmd ="remstep",
+ .cArgsMin = 0,
+ .cArgsMax = 1,
+ .paArgDescs = &g_aArgRemStep[0],
+ .cArgDescs = RT_ELEMENTS(g_aArgRemStep),
+ .fFlags = 0,
+ .pfnHandler = remR3CmdDisasEnableStepping,
+ .pszSyntax = "[on/off]",
+ .pszDescription = "Enable or disable the single stepping with logged disassembly. "
+ "If no arguments show the current state."
+ }
+};
+#endif
+
+/** Prologue code, must be in lower 4G to simplify jumps to/from generated code.
+ * @todo huh??? That cannot be the case on the mac... So, this
+ * point is probably not valid any longer. */
+uint8_t *code_gen_prologue;
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+void remAbort(int rc, const char *pszTip);
+extern int testmath(void);
+
+/* Put them here to avoid unused variable warning. */
+AssertCompile(RT_SIZEOFMEMB(VM, rem.padding) >= RT_SIZEOFMEMB(VM, rem.s));
+#if !defined(IPRT_NO_CRT) && (defined(RT_OS_LINUX) || defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS))
+//AssertCompileMemberSize(REM, Env, REM_ENV_SIZE);
+/* Why did this have to be identical?? */
+AssertCompile(RT_SIZEOFMEMB(REM, Env) <= REM_ENV_SIZE);
+#else
+AssertCompile(RT_SIZEOFMEMB(REM, Env) <= REM_ENV_SIZE);
+#endif
+
+
+/**
+ * Initializes the REM.
+ *
+ * @returns VBox status code.
+ * @param pVM The VM to operate on.
+ */
+REMR3DECL(int) REMR3Init(PVM pVM)
+{
+ PREMHANDLERNOTIFICATION pCur;
+ uint32_t u32Dummy;
+ int rc;
+ unsigned i;
+
+#ifdef VBOX_ENABLE_VBOXREM64
+ LogRel(("Using 64-bit aware REM\n"));
+#endif
+
+ /*
+ * Assert sanity.
+ */
+ AssertReleaseMsg(sizeof(pVM->rem.padding) >= sizeof(pVM->rem.s), ("%#x >= %#x; sizeof(Env)=%#x\n", sizeof(pVM->rem.padding), sizeof(pVM->rem.s), sizeof(pVM->rem.s.Env)));
+ AssertReleaseMsg(sizeof(pVM->rem.s.Env) <= REM_ENV_SIZE, ("%#x == %#x\n", sizeof(pVM->rem.s.Env), REM_ENV_SIZE));
+ AssertReleaseMsg(!(RT_UOFFSETOF(VM, rem) & 31), ("off=%#zx\n", RT_UOFFSETOF(VM, rem)));
+#if 0 /* just an annoyance at the moment. */
+#if defined(DEBUG) && !defined(RT_OS_SOLARIS) && !defined(RT_OS_FREEBSD) /// @todo fix the solaris and freebsd math stuff.
+ Assert(!testmath());
+#endif
+#endif
+
+ /*
+ * Init some internal data members.
+ */
+ pVM->rem.s.offVM = RT_UOFFSETOF(VM, rem.s);
+ pVM->rem.s.Env.pVM = pVM;
+#ifdef CPU_RAW_MODE_INIT
+ pVM->rem.s.state |= CPU_RAW_MODE_INIT;
+#endif
+
+ /*
+ * Initialize the REM critical section.
+ *
+ * Note: This is not a 100% safe solution as updating the internal memory state while another VCPU
+ * is executing code could be dangerous. Taking the REM lock is not an option due to the danger of
+ * deadlocks. (mostly pgm vs rem locking)
+ */
+ rc = PDMR3CritSectInit(pVM, &pVM->rem.s.CritSectRegister, RT_SRC_POS, "REM-Register");
+ AssertRCReturn(rc, rc);
+
+ /* ctx. */
+ pVM->rem.s.pCtx = NULL; /* set when executing code. */
+ AssertMsg(MMR3PhysGetRamSize(pVM) == 0, ("Init order has changed! REM depends on notification about ALL physical memory registrations\n"));
+
+ /* ignore all notifications */
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ code_gen_prologue = RTMemExecAlloc(_1K);
+ AssertLogRelReturn(code_gen_prologue, VERR_NO_MEMORY);
+
+ cpu_exec_init_all(0);
+
+ /*
+ * Init the recompiler.
+ */
+ if (!cpu_x86_init(&pVM->rem.s.Env, "vbox"))
+ {
+ AssertMsgFailed(("cpu_x86_init failed - impossible!\n"));
+ return VERR_GENERAL_FAILURE;
+ }
+ PVMCPU pVCpu = VMMGetCpu(pVM);
+ CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features);
+ CPUMGetGuestCpuId(pVCpu, 0x80000001, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext3_features, &pVM->rem.s.Env.cpuid_ext2_features);
+
+ EMRemLock(pVM);
+ cpu_reset(&pVM->rem.s.Env);
+ EMRemUnlock(pVM);
+
+ /* allocate code buffer for single instruction emulation. */
+ pVM->rem.s.Env.cbCodeBuffer = 4096;
+ pVM->rem.s.Env.pvCodeBuffer = RTMemExecAlloc(pVM->rem.s.Env.cbCodeBuffer);
+ AssertMsgReturn(pVM->rem.s.Env.pvCodeBuffer, ("Failed to allocate code buffer!\n"), VERR_NO_MEMORY);
+
+ /* Finally, set the cpu_single_env global. */
+ cpu_single_env = &pVM->rem.s.Env;
+
+ /* Nothing is pending by default */
+ pVM->rem.s.uStateLoadPendingInterrupt = REM_NO_PENDING_IRQ;
+
+ /*
+ * Register ram types.
+ */
+ pVM->rem.s.iMMIOMemType = cpu_register_io_memory(g_apfnMMIORead, g_apfnMMIOWrite, &pVM->rem.s.Env);
+ AssertReleaseMsg(pVM->rem.s.iMMIOMemType >= 0, ("pVM->rem.s.iMMIOMemType=%d\n", pVM->rem.s.iMMIOMemType));
+ pVM->rem.s.iHandlerMemType = cpu_register_io_memory(g_apfnHandlerRead, g_apfnHandlerWrite, pVM);
+ AssertReleaseMsg(pVM->rem.s.iHandlerMemType >= 0, ("pVM->rem.s.iHandlerMemType=%d\n", pVM->rem.s.iHandlerMemType));
+ Log2(("REM: iMMIOMemType=%d iHandlerMemType=%d\n", pVM->rem.s.iMMIOMemType, pVM->rem.s.iHandlerMemType));
+
+ /* stop ignoring. */
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+
+ /*
+ * Register the saved state data unit.
+ */
+ rc = SSMR3RegisterInternal(pVM, "rem", 1, REM_SAVED_STATE_VERSION, sizeof(uint32_t) * 10,
+ NULL, NULL, NULL,
+ NULL, remR3Save, NULL,
+ NULL, remR3Load, remR3LoadDone);
+ if (RT_FAILURE(rc))
+ return rc;
+
+#ifdef VBOX_WITH_DEBUGGER
+ /*
+ * Debugger commands.
+ */
+ static bool fRegisteredCmds = false;
+ if (!fRegisteredCmds)
+ {
+ int rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds));
+ if (RT_SUCCESS(rc))
+ fRegisteredCmds = true;
+ }
+#endif
+
+#ifdef VBOX_WITH_STATISTICS
+ /*
+ * Statistics.
+ */
+ STAM_REG(pVM, &gStatExecuteSingleInstr, STAMTYPE_PROFILE, "/PROF/REM/SingleInstr",STAMUNIT_TICKS_PER_CALL, "Profiling single instruction emulation.");
+ STAM_REG(pVM, &gStatCompilationQEmu, STAMTYPE_PROFILE, "/PROF/REM/Compile", STAMUNIT_TICKS_PER_CALL, "Profiling QEmu compilation.");
+ STAM_REG(pVM, &gStatRunCodeQEmu, STAMTYPE_PROFILE, "/PROF/REM/Runcode", STAMUNIT_TICKS_PER_CALL, "Profiling QEmu code execution.");
+ STAM_REG(pVM, &gStatTotalTimeQEmu, STAMTYPE_PROFILE, "/PROF/REM/Emulate", STAMUNIT_TICKS_PER_CALL, "Profiling code emulation.");
+ STAM_REG(pVM, &gStatTimers, STAMTYPE_PROFILE, "/PROF/REM/Timers", STAMUNIT_TICKS_PER_CALL, "Profiling timer queue processing.");
+ STAM_REG(pVM, &gStatTBLookup, STAMTYPE_PROFILE, "/PROF/REM/TBLookup", STAMUNIT_TICKS_PER_CALL, "Profiling translation block lookup.");
+ STAM_REG(pVM, &gStatIRQ, STAMTYPE_PROFILE, "/PROF/REM/IRQ", STAMUNIT_TICKS_PER_CALL, "Profiling IRQ delivery.");
+ STAM_REG(pVM, &gStatRawCheck, STAMTYPE_PROFILE, "/PROF/REM/RawCheck", STAMUNIT_TICKS_PER_CALL, "Profiling remR3CanExecuteRaw calls.");
+ STAM_REG(pVM, &gStatMemRead, STAMTYPE_PROFILE, "/PROF/REM/MemRead", STAMUNIT_TICKS_PER_CALL, "Profiling memory access.");
+ STAM_REG(pVM, &gStatMemWrite, STAMTYPE_PROFILE, "/PROF/REM/MemWrite", STAMUNIT_TICKS_PER_CALL, "Profiling memory access.");
+ STAM_REG(pVM, &gStatGCPhys2HCVirt, STAMTYPE_PROFILE, "/PROF/REM/GCPhys2HCVirt", STAMUNIT_TICKS_PER_CALL, "Profiling memory conversion (PGMR3PhysTlbGCPhys2Ptr).");
+
+ STAM_REG(pVM, &gStatCpuGetTSC, STAMTYPE_COUNTER, "/REM/CpuGetTSC", STAMUNIT_OCCURENCES, "cpu_get_tsc calls");
+
+ STAM_REG(pVM, &gStatRefuseTFInhibit, STAMTYPE_COUNTER, "/REM/Refuse/TFInibit", STAMUNIT_OCCURENCES, "Raw mode refused because of TF or irq inhibit");
+ STAM_REG(pVM, &gStatRefuseVM86, STAMTYPE_COUNTER, "/REM/Refuse/VM86", STAMUNIT_OCCURENCES, "Raw mode refused because of VM86");
+ STAM_REG(pVM, &gStatRefusePaging, STAMTYPE_COUNTER, "/REM/Refuse/Paging", STAMUNIT_OCCURENCES, "Raw mode refused because of disabled paging/pm");
+ STAM_REG(pVM, &gStatRefusePAE, STAMTYPE_COUNTER, "/REM/Refuse/PAE", STAMUNIT_OCCURENCES, "Raw mode refused because of PAE");
+ STAM_REG(pVM, &gStatRefuseIOPLNot0, STAMTYPE_COUNTER, "/REM/Refuse/IOPLNot0", STAMUNIT_OCCURENCES, "Raw mode refused because of IOPL != 0");
+ STAM_REG(pVM, &gStatRefuseIF0, STAMTYPE_COUNTER, "/REM/Refuse/IF0", STAMUNIT_OCCURENCES, "Raw mode refused because of IF=0");
+ STAM_REG(pVM, &gStatRefuseCode16, STAMTYPE_COUNTER, "/REM/Refuse/Code16", STAMUNIT_OCCURENCES, "Raw mode refused because of 16 bit code");
+ STAM_REG(pVM, &gStatRefuseWP0, STAMTYPE_COUNTER, "/REM/Refuse/WP0", STAMUNIT_OCCURENCES, "Raw mode refused because of WP=0");
+ STAM_REG(pVM, &gStatRefuseRing1or2, STAMTYPE_COUNTER, "/REM/Refuse/Ring1or2", STAMUNIT_OCCURENCES, "Raw mode refused because of ring 1/2 execution");
+ STAM_REG(pVM, &gStatRefuseCanExecute, STAMTYPE_COUNTER, "/REM/Refuse/CanExecuteRaw", STAMUNIT_OCCURENCES, "Raw mode refused because of cCanExecuteRaw");
+ STAM_REG(pVM, &gaStatRefuseStale[R_ES], STAMTYPE_COUNTER, "/REM/Refuse/StaleES", STAMUNIT_OCCURENCES, "Raw mode refused because of stale ES");
+ STAM_REG(pVM, &gaStatRefuseStale[R_CS], STAMTYPE_COUNTER, "/REM/Refuse/StaleCS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale CS");
+ STAM_REG(pVM, &gaStatRefuseStale[R_SS], STAMTYPE_COUNTER, "/REM/Refuse/StaleSS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale SS");
+ STAM_REG(pVM, &gaStatRefuseStale[R_DS], STAMTYPE_COUNTER, "/REM/Refuse/StaleDS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale DS");
+ STAM_REG(pVM, &gaStatRefuseStale[R_FS], STAMTYPE_COUNTER, "/REM/Refuse/StaleFS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale FS");
+ STAM_REG(pVM, &gaStatRefuseStale[R_GS], STAMTYPE_COUNTER, "/REM/Refuse/StaleGS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale GS");
+ STAM_REG(pVM, &gStatFlushTBs, STAMTYPE_COUNTER, "/REM/FlushTB", STAMUNIT_OCCURENCES, "Number of TB flushes");
+
+ STAM_REG(pVM, &gStatREMGDTChange, STAMTYPE_COUNTER, "/REM/Change/GDTBase", STAMUNIT_OCCURENCES, "GDT base changes");
+ STAM_REG(pVM, &gStatREMLDTRChange, STAMTYPE_COUNTER, "/REM/Change/LDTR", STAMUNIT_OCCURENCES, "LDTR changes");
+ STAM_REG(pVM, &gStatREMIDTChange, STAMTYPE_COUNTER, "/REM/Change/IDTBase", STAMUNIT_OCCURENCES, "IDT base changes");
+ STAM_REG(pVM, &gStatREMTRChange, STAMTYPE_COUNTER, "/REM/Change/TR", STAMUNIT_OCCURENCES, "TR selector changes");
+
+ STAM_REG(pVM, &gStatSelOutOfSync[0], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/ES", STAMUNIT_OCCURENCES, "ES out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSync[1], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/CS", STAMUNIT_OCCURENCES, "CS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSync[2], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/SS", STAMUNIT_OCCURENCES, "SS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSync[3], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/DS", STAMUNIT_OCCURENCES, "DS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSync[4], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/FS", STAMUNIT_OCCURENCES, "FS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSync[5], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/GS", STAMUNIT_OCCURENCES, "GS out of sync");
+
+ STAM_REG(pVM, &gStatSelOutOfSyncStateBack[0], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/ES", STAMUNIT_OCCURENCES, "ES out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSyncStateBack[1], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/CS", STAMUNIT_OCCURENCES, "CS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSyncStateBack[2], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/SS", STAMUNIT_OCCURENCES, "SS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSyncStateBack[3], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/DS", STAMUNIT_OCCURENCES, "DS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSyncStateBack[4], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/FS", STAMUNIT_OCCURENCES, "FS out of sync");
+ STAM_REG(pVM, &gStatSelOutOfSyncStateBack[5], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/GS", STAMUNIT_OCCURENCES, "GS out of sync");
+
+ STAM_REG(pVM, &pVM->rem.s.Env.StatTbFlush, STAMTYPE_PROFILE, "/REM/TbFlush", STAMUNIT_TICKS_PER_CALL, "profiling tb_flush().");
+#endif /* VBOX_WITH_STATISTICS */
+ AssertCompileMemberAlignment(CPUX86State, StatTbFlush, 4);
+ AssertCompileMemberAlignment(CPUX86State, StatTbFlush, 8);
+
+ STAM_REL_REG(pVM, &tb_flush_count, STAMTYPE_U32_RESET, "/REM/TbFlushCount", STAMUNIT_OCCURENCES, "tb_flush() calls");
+ STAM_REL_REG(pVM, &tb_phys_invalidate_count, STAMTYPE_U32_RESET, "/REM/TbPhysInvldCount", STAMUNIT_OCCURENCES, "tb_phys_invalidate() calls");
+ STAM_REL_REG(pVM, &tlb_flush_count, STAMTYPE_U32_RESET, "/REM/TlbFlushCount", STAMUNIT_OCCURENCES, "tlb_flush() calls");
+
+
+#ifdef DEBUG_ALL_LOGGING
+ loglevel = ~0;
+#endif
+
+ /*
+ * Init the handler notification lists.
+ */
+ pVM->rem.s.idxPendingList = UINT32_MAX;
+ pVM->rem.s.idxFreeList = 0;
+
+ for (i = 0 ; i < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications); i++)
+ {
+ pCur = &pVM->rem.s.aHandlerNotifications[i];
+ pCur->idxNext = i + 1;
+ pCur->idxSelf = i;
+ }
+ pCur->idxNext = UINT32_MAX; /* the last record. */
+
+ return rc;
+}
+
+
+/**
+ * Finalizes the REM initialization.
+ *
+ * This is called after all components, devices and drivers has
+ * been initialized. Its main purpose it to finish the RAM related
+ * initialization.
+ *
+ * @returns VBox status code.
+ *
+ * @param pVM The VM handle.
+ */
+REMR3DECL(int) REMR3InitFinalize(PVM pVM)
+{
+ int rc;
+
+ /*
+ * Ram size & dirty bit map.
+ */
+ Assert(!pVM->rem.s.fGCPhysLastRamFixed);
+ pVM->rem.s.fGCPhysLastRamFixed = true;
+#ifdef RT_STRICT
+ rc = remR3InitPhysRamSizeAndDirtyMap(pVM, true /* fGuarded */);
+#else
+ rc = remR3InitPhysRamSizeAndDirtyMap(pVM, false /* fGuarded */);
+#endif
+ return rc;
+}
+
+/**
+ * Initializes ram_list.phys_dirty and ram_list.phys_dirty_size.
+ *
+ * @returns VBox status code.
+ * @param pVM The VM handle.
+ * @param fGuarded Whether to guard the map.
+ */
+static int remR3InitPhysRamSizeAndDirtyMap(PVM pVM, bool fGuarded)
+{
+ int rc = VINF_SUCCESS;
+ RTGCPHYS cb;
+
+ AssertLogRelReturn(QLIST_EMPTY(&ram_list.blocks), VERR_INTERNAL_ERROR_2);
+
+ cb = pVM->rem.s.GCPhysLastRam + 1;
+ AssertLogRelMsgReturn(cb > pVM->rem.s.GCPhysLastRam,
+ ("GCPhysLastRam=%RGp - out of range\n", pVM->rem.s.GCPhysLastRam),
+ VERR_OUT_OF_RANGE);
+
+ ram_list.phys_dirty_size = cb >> PAGE_SHIFT;
+ AssertMsg(((RTGCPHYS)ram_list.phys_dirty_size << PAGE_SHIFT) == cb, ("%RGp\n", cb));
+
+ if (!fGuarded)
+ {
+ ram_list.phys_dirty = MMR3HeapAlloc(pVM, MM_TAG_REM, ram_list.phys_dirty_size);
+ AssertLogRelMsgReturn(ram_list.phys_dirty, ("Failed to allocate %u bytes of dirty page map bytes\n", ram_list.phys_dirty_size), VERR_NO_MEMORY);
+ }
+ else
+ {
+ /*
+ * Fill it up the nearest 4GB RAM and leave at least _64KB of guard after it.
+ */
+ uint32_t cbBitmapAligned = RT_ALIGN_32(ram_list.phys_dirty_size, PAGE_SIZE);
+ uint32_t cbBitmapFull = RT_ALIGN_32(ram_list.phys_dirty_size, (_4G >> PAGE_SHIFT));
+ if (cbBitmapFull == cbBitmapAligned)
+ cbBitmapFull += _4G >> PAGE_SHIFT;
+ else if (cbBitmapFull - cbBitmapAligned < _64K)
+ cbBitmapFull += _64K;
+
+ ram_list.phys_dirty = RTMemPageAlloc(cbBitmapFull);
+ AssertLogRelMsgReturn(ram_list.phys_dirty, ("Failed to allocate %u bytes of dirty page map bytes\n", cbBitmapFull), VERR_NO_MEMORY);
+
+ rc = RTMemProtect(ram_list.phys_dirty + cbBitmapAligned, cbBitmapFull - cbBitmapAligned, RTMEM_PROT_NONE);
+ if (RT_FAILURE(rc))
+ {
+ RTMemPageFree(ram_list.phys_dirty, cbBitmapFull);
+ AssertLogRelRCReturn(rc, rc);
+ }
+
+ ram_list.phys_dirty += cbBitmapAligned - ram_list.phys_dirty_size;
+ }
+
+ /* initialize it. */
+ memset(ram_list.phys_dirty, 0xff, ram_list.phys_dirty_size);
+ return rc;
+}
+
+
+/**
+ * Terminates the REM.
+ *
+ * Termination means cleaning up and freeing all resources,
+ * the VM it self is at this point powered off or suspended.
+ *
+ * @returns VBox status code.
+ * @param pVM The VM to operate on.
+ */
+REMR3DECL(int) REMR3Term(PVM pVM)
+{
+ /*
+ * Statistics.
+ */
+ STAMR3Deregister(pVM->pUVM, "/PROF/REM/*");
+ STAMR3Deregister(pVM->pUVM, "/REM/*");
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * The VM is being reset.
+ *
+ * For the REM component this means to call the cpu_reset() and
+ * reinitialize some state variables.
+ *
+ * @param pVM VM handle.
+ */
+REMR3DECL(void) REMR3Reset(PVM pVM)
+{
+ EMRemLock(pVM); /* Only pro forma, we're in a rendezvous. */
+
+ /*
+ * Reset the REM cpu.
+ */
+ Assert(pVM->rem.s.cIgnoreAll == 0);
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+ cpu_reset(&pVM->rem.s.Env);
+ pVM->rem.s.cInvalidatedPages = 0;
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+ Assert(pVM->rem.s.cIgnoreAll == 0);
+
+ /* Clear raw ring 0 init state */
+ pVM->rem.s.Env.state &= ~CPU_RAW_RING0;
+
+ /* Flush the TBs the next time we execute code here. */
+ pVM->rem.s.fFlushTBs = true;
+
+ EMRemUnlock(pVM);
+}
+
+
+/**
+ * Execute state save operation.
+ *
+ * @returns VBox status code.
+ * @param pVM VM Handle.
+ * @param pSSM SSM operation handle.
+ */
+static DECLCALLBACK(int) remR3Save(PVM pVM, PSSMHANDLE pSSM)
+{
+ PREM pRem = &pVM->rem.s;
+
+ /*
+ * Save the required CPU Env bits.
+ * (Not much because we're never in REM when doing the save.)
+ */
+ LogFlow(("remR3Save:\n"));
+ Assert(!pRem->fInREM);
+ SSMR3PutU32(pSSM, pRem->Env.hflags);
+ SSMR3PutU32(pSSM, ~0); /* separator */
+
+ /* Remember if we've entered raw mode (vital for ring 1 checks in e.g. iret emulation). */
+ SSMR3PutU32(pSSM, !!(pRem->Env.state & CPU_RAW_RING0));
+ SSMR3PutU32(pSSM, REM_NO_PENDING_IRQ);
+
+ return SSMR3PutU32(pSSM, ~0); /* terminator */
+}
+
+
+/**
+ * Execute state load operation.
+ *
+ * @returns VBox status code.
+ * @param pVM VM Handle.
+ * @param pSSM SSM operation handle.
+ * @param uVersion Data layout version.
+ * @param uPass The data pass.
+ */
+static DECLCALLBACK(int) remR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
+{
+ uint32_t u32Dummy;
+ uint32_t fRawRing0 = false;
+ uint32_t u32Sep;
+ uint32_t i;
+ int rc;
+ PREM pRem;
+
+ LogFlow(("remR3Load:\n"));
+ Assert(uPass == SSM_PASS_FINAL); NOREF(uPass);
+
+ /*
+ * Validate version.
+ */
+ if ( uVersion != REM_SAVED_STATE_VERSION
+ && uVersion != REM_SAVED_STATE_VERSION_VER1_6)
+ {
+ AssertMsgFailed(("remR3Load: Invalid version uVersion=%d!\n", uVersion));
+ return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
+ }
+
+ /*
+ * Do a reset to be on the safe side...
+ */
+ REMR3Reset(pVM);
+
+ /*
+ * Ignore all ignorable notifications.
+ * (Not doing this will cause serious trouble.)
+ */
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ /*
+ * Load the required CPU Env bits.
+ * (Not much because we're never in REM when doing the save.)
+ */
+ pRem = &pVM->rem.s;
+ Assert(!pRem->fInREM);
+ SSMR3GetU32(pSSM, &pRem->Env.hflags);
+ if (uVersion == REM_SAVED_STATE_VERSION_VER1_6)
+ {
+ /* Redundant REM CPU state has to be loaded, but can be ignored. */
+ CPUX86State_Ver16 temp;
+ SSMR3GetMem(pSSM, &temp, RT_UOFFSETOF(CPUX86State_Ver16, jmp_env));
+ }
+
+ rc = SSMR3GetU32(pSSM, &u32Sep); /* separator */
+ if (RT_FAILURE(rc))
+ return rc;
+ if (u32Sep != ~0U)
+ {
+ AssertMsgFailed(("u32Sep=%#x\n", u32Sep));
+ return VERR_SSM_DATA_UNIT_FORMAT_CHANGED;
+ }
+
+ /* Remember if we've entered raw mode (vital for ring 1 checks in e.g. iret emulation). */
+ SSMR3GetUInt(pSSM, &fRawRing0);
+ if (fRawRing0)
+ pRem->Env.state |= CPU_RAW_RING0;
+
+ if (uVersion == REM_SAVED_STATE_VERSION_VER1_6)
+ {
+ /*
+ * Load the REM stuff.
+ */
+ /** @todo r=bird: We should just drop all these items, restoring doesn't make
+ * sense. */
+ rc = SSMR3GetU32(pSSM, (uint32_t *)&pRem->cInvalidatedPages);
+ if (RT_FAILURE(rc))
+ return rc;
+ if (pRem->cInvalidatedPages > RT_ELEMENTS(pRem->aGCPtrInvalidatedPages))
+ {
+ AssertMsgFailed(("cInvalidatedPages=%#x\n", pRem->cInvalidatedPages));
+ return VERR_SSM_DATA_UNIT_FORMAT_CHANGED;
+ }
+ for (i = 0; i < pRem->cInvalidatedPages; i++)
+ SSMR3GetGCPtr(pSSM, &pRem->aGCPtrInvalidatedPages[i]);
+ }
+
+ rc = SSMR3GetUInt(pSSM, &pVM->rem.s.uStateLoadPendingInterrupt);
+ AssertRCReturn(rc, rc);
+ AssertLogRelMsgReturn( pVM->rem.s.uStateLoadPendingInterrupt == REM_NO_PENDING_IRQ
+ || pVM->rem.s.uStateLoadPendingInterrupt < 256,
+ ("uStateLoadPendingInterrupt=%#x\n", pVM->rem.s.uStateLoadPendingInterrupt),
+ VERR_SSM_UNEXPECTED_DATA);
+
+ /* check the terminator. */
+ rc = SSMR3GetU32(pSSM, &u32Sep);
+ if (RT_FAILURE(rc))
+ return rc;
+ if (u32Sep != ~0U)
+ {
+ AssertMsgFailed(("u32Sep=%#x (term)\n", u32Sep));
+ return VERR_SSM_DATA_UNIT_FORMAT_CHANGED;
+ }
+
+ /*
+ * Get the CPUID features.
+ */
+ PVMCPU pVCpu = VMMGetCpu(pVM);
+ CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features);
+ CPUMGetGuestCpuId(pVCpu, 0x80000001, 0, &u32Dummy, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext2_features);
+
+ /*
+ * Stop ignoring ignorable notifications.
+ */
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+
+ /*
+ * Sync the whole CPU state when executing code in the recompiler.
+ */
+ for (i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_ALL);
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * @callback_method_impl{FNSSMINTLOADDONE,
+ * For pushing misdesigned pending-interrupt mess to TRPM where it belongs. }
+ */
+static DECLCALLBACK(int) remR3LoadDone(PVM pVM, PSSMHANDLE pSSM)
+{
+ if (pVM->rem.s.uStateLoadPendingInterrupt != REM_NO_PENDING_IRQ)
+ {
+ int rc = TRPMAssertTrap(&pVM->aCpus[0], pVM->rem.s.uStateLoadPendingInterrupt, TRPM_HARDWARE_INT);
+ AssertLogRelMsgReturn(rc, ("uStateLoadPendingInterrupt=%#x rc=%Rrc\n", pVM->rem.s.uStateLoadPendingInterrupt, rc), rc);
+ pVM->rem.s.uStateLoadPendingInterrupt = REM_NO_PENDING_IRQ;
+ }
+ return VINF_SUCCESS;
+}
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_RUN
+
+/**
+ * Single steps an instruction in recompiled mode.
+ *
+ * Before calling this function the REM state needs to be in sync with
+ * the VM. Call REMR3State() to perform the sync. It's only necessary
+ * (and permitted) to sync at the first call to REMR3Step()/REMR3Run()
+ * and after calling REMR3StateBack().
+ *
+ * @returns VBox status code.
+ *
+ * @param pVM VM Handle.
+ * @param pVCpu VMCPU Handle.
+ */
+REMR3DECL(int) REMR3Step(PVM pVM, PVMCPU pVCpu)
+{
+ int rc, interrupt_request;
+ RTGCPTR GCPtrPC;
+ bool fBp;
+
+ /*
+ * Lock the REM - we don't wanna have anyone interrupting us
+ * while stepping - and enabled single stepping. We also ignore
+ * pending interrupts and suchlike.
+ */
+ interrupt_request = pVM->rem.s.Env.interrupt_request;
+ Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER)));
+ pVM->rem.s.Env.interrupt_request = 0;
+ cpu_single_step(&pVM->rem.s.Env, 1);
+
+ /*
+ * If we're standing at a breakpoint, that have to be disabled before we start stepping.
+ */
+ GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base;
+ fBp = !cpu_breakpoint_remove(&pVM->rem.s.Env, GCPtrPC, BP_GDB);
+
+ /*
+ * Execute and handle the return code.
+ * We execute without enabling the cpu tick, so on success we'll
+ * just flip it on and off to make sure it moves
+ */
+ rc = cpu_exec(&pVM->rem.s.Env);
+ if (rc == EXCP_DEBUG)
+ {
+ TMR3NotifyResume(pVM, pVCpu);
+ TMR3NotifySuspend(pVM, pVCpu);
+ rc = VINF_EM_DBG_STEPPED;
+ }
+ else
+ {
+ switch (rc)
+ {
+ case EXCP_INTERRUPT: rc = VINF_SUCCESS; break;
+ case EXCP_HLT:
+ case EXCP_HALTED: rc = VINF_EM_HALT; break;
+ case EXCP_RC:
+ rc = pVM->rem.s.rc;
+ pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+ break;
+ case EXCP_EXECUTE_RAW:
+ case EXCP_EXECUTE_HM:
+ /** @todo is it correct? No! */
+ rc = VINF_SUCCESS;
+ break;
+ default:
+ AssertReleaseMsgFailed(("This really shouldn't happen, rc=%d!\n", rc));
+ rc = VERR_INTERNAL_ERROR;
+ break;
+ }
+ }
+
+ /*
+ * Restore the stuff we changed to prevent interruption.
+ * Unlock the REM.
+ */
+ if (fBp)
+ {
+ int rc2 = cpu_breakpoint_insert(&pVM->rem.s.Env, GCPtrPC, BP_GDB, NULL);
+ Assert(rc2 == 0); NOREF(rc2);
+ }
+ cpu_single_step(&pVM->rem.s.Env, 0);
+ pVM->rem.s.Env.interrupt_request = interrupt_request;
+
+ return rc;
+}
+
+
+/**
+ * Set a breakpoint using the REM facilities.
+ *
+ * @returns VBox status code.
+ * @param pVM The VM handle.
+ * @param Address The breakpoint address.
+ * @thread The emulation thread.
+ */
+REMR3DECL(int) REMR3BreakpointSet(PVM pVM, RTGCUINTPTR Address)
+{
+ VM_ASSERT_EMT(pVM);
+ if (!cpu_breakpoint_insert(&pVM->rem.s.Env, Address, BP_GDB, NULL))
+ {
+ LogFlow(("REMR3BreakpointSet: Address=%RGv\n", Address));
+ return VINF_SUCCESS;
+ }
+ LogFlow(("REMR3BreakpointSet: Address=%RGv - failed!\n", Address));
+ return VERR_REM_NO_MORE_BP_SLOTS;
+}
+
+
+/**
+ * Clears a breakpoint set by REMR3BreakpointSet().
+ *
+ * @returns VBox status code.
+ * @param pVM The VM handle.
+ * @param Address The breakpoint address.
+ * @thread The emulation thread.
+ */
+REMR3DECL(int) REMR3BreakpointClear(PVM pVM, RTGCUINTPTR Address)
+{
+ VM_ASSERT_EMT(pVM);
+ if (!cpu_breakpoint_remove(&pVM->rem.s.Env, Address, BP_GDB))
+ {
+ LogFlow(("REMR3BreakpointClear: Address=%RGv\n", Address));
+ return VINF_SUCCESS;
+ }
+ LogFlow(("REMR3BreakpointClear: Address=%RGv - not found!\n", Address));
+ return VERR_REM_BP_NOT_FOUND;
+}
+
+
+/**
+ * Emulate an instruction.
+ *
+ * This function executes one instruction without letting anyone
+ * interrupt it. This is intended for being called while being in
+ * raw mode and thus will take care of all the state syncing between
+ * REM and the rest.
+ *
+ * @returns VBox status code.
+ * @param pVM VM handle.
+ * @param pVCpu VMCPU Handle.
+ */
+REMR3DECL(int) REMR3EmulateInstruction(PVM pVM, PVMCPU pVCpu)
+{
+ bool fFlushTBs;
+
+ int rc, rc2;
+ Log2(("REMR3EmulateInstruction: (cs:eip=%04x:%08x)\n", CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu)));
+
+ /* Make sure this flag is set; we might never execute remR3CanExecuteRaw in the AMD-V case.
+ * CPU_RAW_HM makes sure we never execute interrupt handlers in the recompiler.
+ */
+ if (!VM_IS_RAW_MODE_ENABLED(pVM))
+ pVM->rem.s.Env.state |= CPU_RAW_HM;
+
+ /* Skip the TB flush as that's rather expensive and not necessary for single instruction emulation. */
+ fFlushTBs = pVM->rem.s.fFlushTBs;
+ pVM->rem.s.fFlushTBs = false;
+
+ /*
+ * Sync the state and enable single instruction / single stepping.
+ */
+ rc = REMR3State(pVM, pVCpu);
+ pVM->rem.s.fFlushTBs = fFlushTBs;
+ if (RT_SUCCESS(rc))
+ {
+ int interrupt_request = pVM->rem.s.Env.interrupt_request;
+ Assert(!( interrupt_request
+ & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER | CPU_INTERRUPT_EXTERNAL_HARD
+ | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER
+ | CPU_INTERRUPT_EXTERNAL_DMA)));
+#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+ cpu_single_step(&pVM->rem.s.Env, 0);
+#endif
+ Assert(!pVM->rem.s.Env.singlestep_enabled);
+
+ /*
+ * Now we set the execute single instruction flag and enter the cpu_exec loop.
+ */
+ TMNotifyStartOfExecution(pVCpu);
+ pVM->rem.s.Env.interrupt_request = CPU_INTERRUPT_SINGLE_INSTR;
+ rc = cpu_exec(&pVM->rem.s.Env);
+ TMNotifyEndOfExecution(pVCpu);
+ switch (rc)
+ {
+ /*
+ * Executed without anything out of the way happening.
+ */
+ case EXCP_SINGLE_INSTR:
+ rc = VINF_EM_RESCHEDULE;
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_SINGLE_INSTR\n"));
+ break;
+
+ /*
+ * If we take a trap or start servicing a pending interrupt, we might end up here.
+ * (Timer thread or some other thread wishing EMT's attention.)
+ */
+ case EXCP_INTERRUPT:
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_INTERRUPT\n"));
+ rc = VINF_EM_RESCHEDULE;
+ break;
+
+ /*
+ * Single step, we assume!
+ * If there was a breakpoint there we're fucked now.
+ */
+ case EXCP_DEBUG:
+ if (pVM->rem.s.Env.watchpoint_hit)
+ {
+ /** @todo deal with watchpoints */
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_DEBUG rc=%Rrc !watchpoint_hit!\n", rc));
+ rc = VINF_EM_DBG_BREAKPOINT;
+ }
+ else
+ {
+ CPUBreakpoint *pBP;
+ RTGCPTR GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base;
+ QTAILQ_FOREACH(pBP, &pVM->rem.s.Env.breakpoints, entry)
+ if (pBP->pc == GCPtrPC)
+ break;
+ rc = pBP ? VINF_EM_DBG_BREAKPOINT : VINF_EM_DBG_STEPPED;
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_DEBUG rc=%Rrc pBP=%p GCPtrPC=%RGv\n", rc, pBP, GCPtrPC));
+ }
+ break;
+
+ /*
+ * hlt instruction.
+ */
+ case EXCP_HLT:
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HLT\n"));
+ rc = VINF_EM_HALT;
+ break;
+
+ /*
+ * The VM has halted.
+ */
+ case EXCP_HALTED:
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HALTED\n"));
+ rc = VINF_EM_HALT;
+ break;
+
+ /*
+ * Switch to RAW-mode.
+ */
+ case EXCP_EXECUTE_RAW:
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_RAW\n"));
+ rc = VINF_EM_RESCHEDULE_RAW;
+ break;
+
+ /*
+ * Switch to hardware accelerated RAW-mode.
+ */
+ case EXCP_EXECUTE_HM:
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_HM\n"));
+ rc = VINF_EM_RESCHEDULE_HM;
+ break;
+
+ /*
+ * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error).
+ */
+ case EXCP_RC:
+ Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_RC\n"));
+ rc = pVM->rem.s.rc;
+ pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+ break;
+
+ /*
+ * Figure out the rest when they arrive....
+ */
+ default:
+ AssertMsgFailed(("rc=%d\n", rc));
+ Log2(("REMR3EmulateInstruction: cpu_exec -> %d\n", rc));
+ rc = VINF_EM_RESCHEDULE;
+ break;
+ }
+
+ /*
+ * Switch back the state.
+ */
+ pVM->rem.s.Env.interrupt_request = interrupt_request;
+ rc2 = REMR3StateBack(pVM, pVCpu);
+ AssertRC(rc2);
+ }
+
+ Log2(("REMR3EmulateInstruction: returns %Rrc (cs:eip=%04x:%RGv)\n",
+ rc, pVM->rem.s.Env.segs[R_CS].selector, (RTGCPTR)pVM->rem.s.Env.eip));
+ return rc;
+}
+
+
+/**
+ * Used by REMR3Run to handle the case where CPU_EMULATE_SINGLE_STEP is set.
+ *
+ * @returns VBox status code.
+ *
+ * @param pVM The VM handle.
+ * @param pVCpu The Virtual CPU handle.
+ */
+static int remR3RunLoggingStep(PVM pVM, PVMCPU pVCpu)
+{
+ int rc;
+
+ Assert(pVM->rem.s.fInREM);
+#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+ cpu_single_step(&pVM->rem.s.Env, 1);
+#else
+ Assert(!pVM->rem.s.Env.singlestep_enabled);
+#endif
+
+ /*
+ * Now we set the execute single instruction flag and enter the cpu_exec loop.
+ */
+ for (;;)
+ {
+ char szBuf[256];
+
+ /*
+ * Log the current registers state and instruction.
+ */
+ remR3StateUpdate(pVM, pVCpu);
+ DBGFR3Info(pVM->pUVM, "cpumguest", NULL, NULL);
+ szBuf[0] = '\0';
+ rc = DBGFR3DisasInstrEx(pVM->pUVM,
+ pVCpu->idCpu,
+ 0, /* Sel */ 0, /* GCPtr */
+ DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
+ szBuf,
+ sizeof(szBuf),
+ NULL);
+ if (RT_FAILURE(rc))
+ RTStrPrintf(szBuf, sizeof(szBuf), "DBGFR3DisasInstrEx failed with rc=%Rrc\n", rc);
+ RTLogPrintf("CPU%d: %s\n", pVCpu->idCpu, szBuf);
+
+ /*
+ * Execute the instruction.
+ */
+ TMNotifyStartOfExecution(pVCpu);
+
+ if ( pVM->rem.s.Env.exception_index < 0
+ || pVM->rem.s.Env.exception_index > 256)
+ pVM->rem.s.Env.exception_index = -1; /** @todo We need to do similar stuff elsewhere, I think. */
+
+#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+ pVM->rem.s.Env.interrupt_request = 0;
+#else
+ pVM->rem.s.Env.interrupt_request = CPU_INTERRUPT_SINGLE_INSTR;
+#endif
+ if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_UPDATE_APIC | VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
+ pVM->rem.s.Env.interrupt_request |= CPU_INTERRUPT_HARD;
+ RTLogPrintf("remR3RunLoggingStep: interrupt_request=%#x halted=%d exception_index=%#x\n",
+ pVM->rem.s.Env.interrupt_request,
+ pVM->rem.s.Env.halted,
+ pVM->rem.s.Env.exception_index
+ );
+
+ rc = cpu_exec(&pVM->rem.s.Env);
+
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> %#x interrupt_request=%#x halted=%d exception_index=%#x\n", rc,
+ pVM->rem.s.Env.interrupt_request,
+ pVM->rem.s.Env.halted,
+ pVM->rem.s.Env.exception_index
+ );
+
+ TMNotifyEndOfExecution(pVCpu);
+
+ switch (rc)
+ {
+#ifndef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+ /*
+ * The normal exit.
+ */
+ case EXCP_SINGLE_INSTR:
+ if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK))
+ continue;
+ RTLogPrintf("remR3RunLoggingStep: rc=VINF_SUCCESS w/ FFs (%#x/%#RX64)\n",
+ pVM->fGlobalForcedActions, (uint64_t)pVCpu->fLocalForcedActions);
+ rc = VINF_SUCCESS;
+ break;
+
+#else
+ /*
+ * The normal exit, check for breakpoints at PC just to be sure.
+ */
+#endif
+ case EXCP_DEBUG:
+ if (pVM->rem.s.Env.watchpoint_hit)
+ {
+ /** @todo deal with watchpoints */
+ Log2(("remR3RunLoggingStep: cpu_exec -> EXCP_DEBUG rc=%Rrc !watchpoint_hit!\n", rc));
+ rc = VINF_EM_DBG_BREAKPOINT;
+ }
+ else
+ {
+ CPUBreakpoint *pBP;
+ RTGCPTR GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base;
+ QTAILQ_FOREACH(pBP, &pVM->rem.s.Env.breakpoints, entry)
+ if (pBP->pc == GCPtrPC)
+ break;
+ rc = pBP ? VINF_EM_DBG_BREAKPOINT : VINF_EM_DBG_STEPPED;
+ Log2(("remR3RunLoggingStep: cpu_exec -> EXCP_DEBUG rc=%Rrc pBP=%p GCPtrPC=%RGv\n", rc, pBP, GCPtrPC));
+ }
+#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+ if (rc == VINF_EM_DBG_STEPPED)
+ {
+ if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK))
+ continue;
+
+ RTLogPrintf("remR3RunLoggingStep: rc=VINF_SUCCESS w/ FFs (%#x/%#RX64)\n",
+ pVM->fGlobalForcedActions, (uint64_t)pVCpu->fLocalForcedActions);
+ rc = VINF_SUCCESS;
+ }
+#endif
+ break;
+
+ /*
+ * If we take a trap or start servicing a pending interrupt, we might end up here.
+ * (Timer thread or some other thread wishing EMT's attention.)
+ */
+ case EXCP_INTERRUPT:
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_INTERRUPT rc=VINF_SUCCESS\n");
+ rc = VINF_SUCCESS;
+ break;
+
+ /*
+ * hlt instruction.
+ */
+ case EXCP_HLT:
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_HLT rc=VINF_EM_HALT\n");
+ rc = VINF_EM_HALT;
+ break;
+
+ /*
+ * The VM has halted.
+ */
+ case EXCP_HALTED:
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_HALTED rc=VINF_EM_HALT\n");
+ rc = VINF_EM_HALT;
+ break;
+
+ /*
+ * Switch to RAW-mode.
+ */
+ case EXCP_EXECUTE_RAW:
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_EXECUTE_RAW rc=VINF_EM_RESCHEDULE_RAW\n");
+ rc = VINF_EM_RESCHEDULE_RAW;
+ break;
+
+ /*
+ * Switch to hardware accelerated RAW-mode.
+ */
+ case EXCP_EXECUTE_HM:
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_EXECUTE_HM rc=VINF_EM_RESCHEDULE_HM\n");
+ rc = VINF_EM_RESCHEDULE_HM;
+ break;
+
+ /*
+ * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error).
+ */
+ case EXCP_RC:
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_RC rc=%Rrc\n", pVM->rem.s.rc);
+ rc = pVM->rem.s.rc;
+ pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+ break;
+
+ /*
+ * Figure out the rest when they arrive....
+ */
+ default:
+ AssertMsgFailed(("rc=%d\n", rc));
+ RTLogPrintf("remR3RunLoggingStep: cpu_exec -> %d rc=VINF_EM_RESCHEDULE\n", rc);
+ rc = VINF_EM_RESCHEDULE;
+ break;
+ }
+ break;
+ }
+
+#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+// cpu_single_step(&pVM->rem.s.Env, 0);
+#else
+ pVM->rem.s.Env.interrupt_request &= ~(CPU_INTERRUPT_SINGLE_INSTR | CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT);
+#endif
+ return rc;
+}
+
+
+/**
+ * Runs code in recompiled mode.
+ *
+ * Before calling this function the REM state needs to be in sync with
+ * the VM. Call REMR3State() to perform the sync. It's only necessary
+ * (and permitted) to sync at the first call to REMR3Step()/REMR3Run()
+ * and after calling REMR3StateBack().
+ *
+ * @returns VBox status code.
+ *
+ * @param pVM VM Handle.
+ * @param pVCpu VMCPU Handle.
+ */
+REMR3DECL(int) REMR3Run(PVM pVM, PVMCPU pVCpu)
+{
+ int rc;
+
+ if (RT_UNLIKELY(pVM->rem.s.Env.state & CPU_EMULATE_SINGLE_STEP))
+ return remR3RunLoggingStep(pVM, pVCpu);
+
+ Assert(pVM->rem.s.fInREM);
+ Log2(("REMR3Run: (cs:eip=%04x:%RGv)\n", pVM->rem.s.Env.segs[R_CS].selector, (RTGCPTR)pVM->rem.s.Env.eip));
+
+ TMNotifyStartOfExecution(pVCpu);
+ rc = cpu_exec(&pVM->rem.s.Env);
+ TMNotifyEndOfExecution(pVCpu);
+ switch (rc)
+ {
+ /*
+ * This happens when the execution was interrupted
+ * by an external event, like pending timers.
+ */
+ case EXCP_INTERRUPT:
+ Log2(("REMR3Run: cpu_exec -> EXCP_INTERRUPT\n"));
+ rc = VINF_SUCCESS;
+ break;
+
+ /*
+ * hlt instruction.
+ */
+ case EXCP_HLT:
+ Log2(("REMR3Run: cpu_exec -> EXCP_HLT\n"));
+ rc = VINF_EM_HALT;
+ break;
+
+ /*
+ * The VM has halted.
+ */
+ case EXCP_HALTED:
+ Log2(("REMR3Run: cpu_exec -> EXCP_HALTED\n"));
+ rc = VINF_EM_HALT;
+ break;
+
+ /*
+ * Breakpoint/single step.
+ */
+ case EXCP_DEBUG:
+ if (pVM->rem.s.Env.watchpoint_hit)
+ {
+ /** @todo deal with watchpoints */
+ Log2(("REMR3Run: cpu_exec -> EXCP_DEBUG rc=%Rrc !watchpoint_hit!\n", rc));
+ rc = VINF_EM_DBG_BREAKPOINT;
+ }
+ else
+ {
+ CPUBreakpoint *pBP;
+ RTGCPTR GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base;
+ QTAILQ_FOREACH(pBP, &pVM->rem.s.Env.breakpoints, entry)
+ if (pBP->pc == GCPtrPC)
+ break;
+ rc = pBP ? VINF_EM_DBG_BREAKPOINT : VINF_EM_DBG_STEPPED;
+ Log2(("REMR3Run: cpu_exec -> EXCP_DEBUG rc=%Rrc pBP=%p GCPtrPC=%RGv\n", rc, pBP, GCPtrPC));
+ }
+ break;
+
+ /*
+ * Switch to RAW-mode.
+ */
+ case EXCP_EXECUTE_RAW:
+ Log2(("REMR3Run: cpu_exec -> EXCP_EXECUTE_RAW pc=%RGv\n", pVM->rem.s.Env.eip));
+ rc = VINF_EM_RESCHEDULE_RAW;
+ break;
+
+ /*
+ * Switch to hardware accelerated RAW-mode.
+ */
+ case EXCP_EXECUTE_HM:
+ Log2(("REMR3Run: cpu_exec -> EXCP_EXECUTE_HM\n"));
+ rc = VINF_EM_RESCHEDULE_HM;
+ break;
+
+ /*
+ * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error).
+ */
+ case EXCP_RC:
+ Log2(("REMR3Run: cpu_exec -> EXCP_RC rc=%Rrc\n", pVM->rem.s.rc));
+ rc = pVM->rem.s.rc;
+ pVM->rem.s.rc = VERR_INTERNAL_ERROR;
+ break;
+
+ /*
+ * Figure out the rest when they arrive....
+ */
+ default:
+ AssertMsgFailed(("rc=%d\n", rc));
+ Log2(("REMR3Run: cpu_exec -> %d\n", rc));
+ rc = VINF_SUCCESS;
+ break;
+ }
+
+ Log2(("REMR3Run: returns %Rrc (cs:eip=%04x:%RGv)\n", rc, pVM->rem.s.Env.segs[R_CS].selector, (RTGCPTR)pVM->rem.s.Env.eip));
+ return rc;
+}
+
+
+/**
+ * Check if the cpu state is suitable for Raw execution.
+ *
+ * @returns true if RAW/HWACC mode is ok, false if we should stay in REM.
+ *
+ * @param env The CPU env struct.
+ * @param eip The EIP to check this for (might differ from env->eip).
+ * @param fFlags hflags OR'ed with IOPL, TF and VM from eflags.
+ * @param piException Stores EXCP_EXECUTE_RAW/HWACC in case raw mode is supported in this context
+ *
+ * @remark This function must be kept in perfect sync with the scheduler in EM.cpp!
+ */
+bool remR3CanExecuteRaw(CPUX86State *env, RTGCPTR eip, unsigned fFlags, int *piException)
+{
+ /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */
+ /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */
+ /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */
+ uint32_t u32CR0;
+
+ /* Update counter. */
+ env->pVM->rem.s.cCanExecuteRaw++;
+
+ /* Never when single stepping+logging guest code. */
+ if (env->state & CPU_EMULATE_SINGLE_STEP)
+ return false;
+
+ if (!VM_IS_RAW_MODE_ENABLED(env->pVM))
+ {
+#ifdef RT_OS_WINDOWS
+ PCPUMCTX pCtx = alloca(sizeof(*pCtx));
+#else
+ CPUMCTX Ctx;
+ PCPUMCTX pCtx = &Ctx;
+#endif
+ /** @todo NEM: scheduling. */
+
+ env->state |= CPU_RAW_HM;
+
+ /*
+ * The simple check first...
+ */
+ if (!EMIsHwVirtExecutionEnabled(env->pVM))
+ return false;
+
+ /*
+ * Create partial context for HMCanExecuteGuest.
+ */
+ pCtx->cr0 = env->cr[0];
+ pCtx->cr3 = env->cr[3];
+ pCtx->cr4 = env->cr[4];
+
+ pCtx->tr.Sel = env->tr.selector;
+ pCtx->tr.ValidSel = env->tr.selector;
+ pCtx->tr.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->tr.u64Base = env->tr.base;
+ pCtx->tr.u32Limit = env->tr.limit;
+ pCtx->tr.Attr.u = (env->tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->ldtr.Sel = env->ldt.selector;
+ pCtx->ldtr.ValidSel = env->ldt.selector;
+ pCtx->ldtr.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->ldtr.u64Base = env->ldt.base;
+ pCtx->ldtr.u32Limit = env->ldt.limit;
+ pCtx->ldtr.Attr.u = (env->ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->idtr.cbIdt = env->idt.limit;
+ pCtx->idtr.pIdt = env->idt.base;
+
+ pCtx->gdtr.cbGdt = env->gdt.limit;
+ pCtx->gdtr.pGdt = env->gdt.base;
+
+ pCtx->rsp = env->regs[R_ESP];
+ pCtx->rip = env->eip;
+
+ pCtx->eflags.u32 = env->eflags;
+
+ pCtx->cs.Sel = env->segs[R_CS].selector;
+ pCtx->cs.ValidSel = env->segs[R_CS].selector;
+ pCtx->cs.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->cs.u64Base = env->segs[R_CS].base;
+ pCtx->cs.u32Limit = env->segs[R_CS].limit;
+ pCtx->cs.Attr.u = (env->segs[R_CS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->ds.Sel = env->segs[R_DS].selector;
+ pCtx->ds.ValidSel = env->segs[R_DS].selector;
+ pCtx->ds.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->ds.u64Base = env->segs[R_DS].base;
+ pCtx->ds.u32Limit = env->segs[R_DS].limit;
+ pCtx->ds.Attr.u = (env->segs[R_DS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->es.Sel = env->segs[R_ES].selector;
+ pCtx->es.ValidSel = env->segs[R_ES].selector;
+ pCtx->es.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->es.u64Base = env->segs[R_ES].base;
+ pCtx->es.u32Limit = env->segs[R_ES].limit;
+ pCtx->es.Attr.u = (env->segs[R_ES].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->fs.Sel = env->segs[R_FS].selector;
+ pCtx->fs.ValidSel = env->segs[R_FS].selector;
+ pCtx->fs.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->fs.u64Base = env->segs[R_FS].base;
+ pCtx->fs.u32Limit = env->segs[R_FS].limit;
+ pCtx->fs.Attr.u = (env->segs[R_FS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->gs.Sel = env->segs[R_GS].selector;
+ pCtx->gs.ValidSel = env->segs[R_GS].selector;
+ pCtx->gs.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->gs.u64Base = env->segs[R_GS].base;
+ pCtx->gs.u32Limit = env->segs[R_GS].limit;
+ pCtx->gs.Attr.u = (env->segs[R_GS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->ss.Sel = env->segs[R_SS].selector;
+ pCtx->ss.ValidSel = env->segs[R_SS].selector;
+ pCtx->ss.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->ss.u64Base = env->segs[R_SS].base;
+ pCtx->ss.u32Limit = env->segs[R_SS].limit;
+ pCtx->ss.Attr.u = (env->segs[R_SS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+
+ pCtx->msrEFER = env->efer;
+
+ /*
+ * Hardware accelerated mode:
+ * Typically only 32-bits protected mode, with paging enabled, code is allowed here.
+ */
+ PVMCPU pVCpu = &env->pVM->aCpus[0];
+ if (HMCanExecuteGuest(pVCpu, pCtx))
+ {
+ *piException = EXCP_EXECUTE_HM;
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * Here we only support 16 & 32 bits protected mode ring 3 code that has no IO privileges
+ * or 32 bits protected mode ring 0 code
+ *
+ * The tests are ordered by the likelihood of being true during normal execution.
+ */
+ if (fFlags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))
+ {
+ STAM_COUNTER_INC(&gStatRefuseTFInhibit);
+ Log2(("raw mode refused: fFlags=%#x\n", fFlags));
+ return false;
+ }
+
+#ifndef VBOX_RAW_V86
+ if (fFlags & VM_MASK) {
+ STAM_COUNTER_INC(&gStatRefuseVM86);
+ Log2(("raw mode refused: VM_MASK\n"));
+ return false;
+ }
+#endif
+
+ if (env->state & CPU_EMULATE_SINGLE_INSTR)
+ {
+#ifndef DEBUG_bird
+ Log2(("raw mode refused: CPU_EMULATE_SINGLE_INSTR\n"));
+#endif
+ return false;
+ }
+
+ if (env->singlestep_enabled)
+ {
+ //Log2(("raw mode refused: Single step\n"));
+ return false;
+ }
+
+ if (!QTAILQ_EMPTY(&env->breakpoints))
+ {
+ //Log2(("raw mode refused: Breakpoints\n"));
+ return false;
+ }
+
+ if (!QTAILQ_EMPTY(&env->watchpoints))
+ {
+ //Log2(("raw mode refused: Watchpoints\n"));
+ return false;
+ }
+
+ u32CR0 = env->cr[0];
+ if ((u32CR0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
+ {
+ STAM_COUNTER_INC(&gStatRefusePaging);
+ //Log2(("raw mode refused: %s%s%s\n", (u32CR0 & X86_CR0_PG) ? "" : " !PG", (u32CR0 & X86_CR0_PE) ? "" : " !PE", (u32CR0 & X86_CR0_AM) ? "" : " !AM"));
+ return false;
+ }
+
+ if (env->cr[4] & CR4_PAE_MASK)
+ {
+ if (!(env->cpuid_features & X86_CPUID_FEATURE_EDX_PAE))
+ {
+ STAM_COUNTER_INC(&gStatRefusePAE);
+ return false;
+ }
+ }
+
+ if (((fFlags >> HF_CPL_SHIFT) & 3) == 3)
+ {
+ if (!EMIsRawRing3Enabled(env->pVM))
+ return false;
+
+ if (!(env->eflags & IF_MASK))
+ {
+ STAM_COUNTER_INC(&gStatRefuseIF0);
+ Log2(("raw mode refused: IF (RawR3)\n"));
+ return false;
+ }
+
+ if (!(u32CR0 & CR0_WP_MASK) && EMIsRawRing0Enabled(env->pVM))
+ {
+ STAM_COUNTER_INC(&gStatRefuseWP0);
+ Log2(("raw mode refused: CR0.WP + RawR0\n"));
+ return false;
+ }
+ }
+ else
+ {
+ if (!EMIsRawRing0Enabled(env->pVM))
+ return false;
+
+ // Let's start with pure 32 bits ring 0 code first
+ if ((fFlags & (HF_SS32_MASK | HF_CS32_MASK)) != (HF_SS32_MASK | HF_CS32_MASK))
+ {
+ STAM_COUNTER_INC(&gStatRefuseCode16);
+ Log2(("raw r0 mode refused: HF_[S|C]S32_MASK fFlags=%#x\n", fFlags));
+ return false;
+ }
+
+ if (EMIsRawRing1Enabled(env->pVM))
+ {
+ /* Only ring 0 and 1 supervisor code. */
+ if (((fFlags >> HF_CPL_SHIFT) & 3) == 2) /* ring 1 code is moved into ring 2, so we can't support ring-2 in that case. */
+ {
+ Log2(("raw r0 mode refused: CPL %d\n", (fFlags >> HF_CPL_SHIFT) & 3));
+ return false;
+ }
+ }
+ /* Only R0. */
+ else if (((fFlags >> HF_CPL_SHIFT) & 3) != 0)
+ {
+ STAM_COUNTER_INC(&gStatRefuseRing1or2);
+ Log2(("raw r0 mode refused: CPL %d\n", ((fFlags >> HF_CPL_SHIFT) & 3) ));
+ return false;
+ }
+
+ if (!(u32CR0 & CR0_WP_MASK))
+ {
+ STAM_COUNTER_INC(&gStatRefuseWP0);
+ Log2(("raw r0 mode refused: CR0.WP=0!\n"));
+ return false;
+ }
+
+#ifdef VBOX_WITH_RAW_MODE
+ if (PATMIsPatchGCAddr(env->pVM, eip))
+ {
+ Log2(("raw r0 mode forced: patch code\n"));
+ *piException = EXCP_EXECUTE_RAW;
+ return true;
+ }
+#endif
+
+#if !defined(VBOX_ALLOW_IF0) && !defined(VBOX_RUN_INTERRUPT_GATE_HANDLERS)
+ if (!(env->eflags & IF_MASK))
+ {
+ STAM_COUNTER_INC(&gStatRefuseIF0);
+ ////Log2(("R0: IF=0 VIF=%d %08X\n", eip, *env->pVMeflags));
+ //Log2(("RR0: Interrupts turned off; fall back to emulation\n"));
+ return false;
+ }
+#endif
+
+#ifndef VBOX_WITH_RAW_RING1
+ if (((env->eflags >> IOPL_SHIFT) & 3) != 0)
+ {
+ Log2(("raw r0 mode refused: IOPL %d\n", ((env->eflags >> IOPL_SHIFT) & 3)));
+ return false;
+ }
+#endif
+ env->state |= CPU_RAW_RING0;
+ }
+
+ /*
+ * Don't reschedule the first time we're called, because there might be
+ * special reasons why we're here that is not covered by the above checks.
+ */
+ if (env->pVM->rem.s.cCanExecuteRaw == 1)
+ {
+ Log2(("raw mode refused: first scheduling\n"));
+ STAM_COUNTER_INC(&gStatRefuseCanExecute);
+ return false;
+ }
+
+ /*
+ * Stale hidden selectors means raw-mode is unsafe (being very careful).
+ */
+ if (env->segs[R_CS].fVBoxFlags & CPUMSELREG_FLAGS_STALE)
+ {
+ Log2(("raw mode refused: stale CS (%#x)\n", env->segs[R_CS].selector));
+ STAM_COUNTER_INC(&gaStatRefuseStale[R_CS]);
+ return false;
+ }
+ if (env->segs[R_SS].fVBoxFlags & CPUMSELREG_FLAGS_STALE)
+ {
+ Log2(("raw mode refused: stale SS (%#x)\n", env->segs[R_SS].selector));
+ STAM_COUNTER_INC(&gaStatRefuseStale[R_SS]);
+ return false;
+ }
+ if (env->segs[R_DS].fVBoxFlags & CPUMSELREG_FLAGS_STALE)
+ {
+ Log2(("raw mode refused: stale DS (%#x)\n", env->segs[R_DS].selector));
+ STAM_COUNTER_INC(&gaStatRefuseStale[R_DS]);
+ return false;
+ }
+ if (env->segs[R_ES].fVBoxFlags & CPUMSELREG_FLAGS_STALE)
+ {
+ Log2(("raw mode refused: stale ES (%#x)\n", env->segs[R_ES].selector));
+ STAM_COUNTER_INC(&gaStatRefuseStale[R_ES]);
+ return false;
+ }
+ if (env->segs[R_FS].fVBoxFlags & CPUMSELREG_FLAGS_STALE)
+ {
+ Log2(("raw mode refused: stale FS (%#x)\n", env->segs[R_FS].selector));
+ STAM_COUNTER_INC(&gaStatRefuseStale[R_FS]);
+ return false;
+ }
+ if (env->segs[R_GS].fVBoxFlags & CPUMSELREG_FLAGS_STALE)
+ {
+ Log2(("raw mode refused: stale GS (%#x)\n", env->segs[R_GS].selector));
+ STAM_COUNTER_INC(&gaStatRefuseStale[R_GS]);
+ return false;
+ }
+
+/* Assert(env->pVCpu && PGMPhysIsA20Enabled(env->pVCpu));*/
+ *piException = EXCP_EXECUTE_RAW;
+ return true;
+}
+
+
+#ifdef VBOX_WITH_RAW_MODE
+/**
+ * Fetches a code byte.
+ *
+ * @returns Success indicator (bool) for ease of use.
+ * @param env The CPU environment structure.
+ * @param GCPtrInstr Where to fetch code.
+ * @param pu8Byte Where to store the byte on success
+ */
+bool remR3GetOpcode(CPUX86State *env, RTGCPTR GCPtrInstr, uint8_t *pu8Byte)
+{
+ int rc = PATMR3QueryOpcode(env->pVM, GCPtrInstr, pu8Byte);
+ if (RT_SUCCESS(rc))
+ return true;
+ return false;
+}
+#endif /* VBOX_WITH_RAW_MODE */
+
+
+/**
+ * Flush (or invalidate if you like) page table/dir entry.
+ *
+ * (invlpg instruction; tlb_flush_page)
+ *
+ * @param env Pointer to cpu environment.
+ * @param GCPtr The virtual address which page table/dir entry should be invalidated.
+ */
+void remR3FlushPage(CPUX86State *env, RTGCPTR GCPtr)
+{
+ PVM pVM = env->pVM;
+ PCPUMCTX pCtx;
+ int rc;
+
+ Assert(EMRemIsLockOwner(env->pVM));
+
+ /*
+ * When we're replaying invlpg instructions or restoring a saved
+ * state we disable this path.
+ */
+ if (pVM->rem.s.fIgnoreInvlPg || pVM->rem.s.cIgnoreAll)
+ return;
+ LogFlow(("remR3FlushPage: GCPtr=%RGv\n", GCPtr));
+ Assert(pVM->rem.s.fInREM || pVM->rem.s.fInStateSync);
+
+ //RAWEx_ProfileStop(env, STATS_QEMU_TOTAL);
+
+ /*
+ * Update the control registers before calling PGMFlushPage.
+ */
+ pCtx = (PCPUMCTX)pVM->rem.s.pCtx;
+ Assert(pCtx);
+ pCtx->cr0 = env->cr[0];
+ pCtx->cr3 = env->cr[3];
+#ifdef VBOX_WITH_RAW_MODE
+ if (((env->cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(env->pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+#endif
+ pCtx->cr4 = env->cr[4];
+
+ /*
+ * Let PGM do the rest.
+ */
+ Assert(env->pVCpu);
+ rc = PGMInvalidatePage(env->pVCpu, GCPtr);
+ if (RT_FAILURE(rc))
+ {
+ AssertMsgFailed(("remR3FlushPage %RGv failed with %d!!\n", GCPtr, rc));
+ VMCPU_FF_SET(env->pVCpu, VMCPU_FF_PGM_SYNC_CR3);
+ }
+ //RAWEx_ProfileStart(env, STATS_QEMU_TOTAL);
+}
+
+
+#ifndef REM_PHYS_ADDR_IN_TLB
+/** Wrapper for PGMR3PhysTlbGCPhys2Ptr. */
+void *remR3TlbGCPhys2Ptr(CPUX86State *env1, target_ulong physAddr, int fWritable)
+{
+ void *pv;
+ int rc;
+
+
+ /* Address must be aligned enough to fiddle with lower bits */
+ Assert((physAddr & 0x3) == 0);
+ /*AssertMsg((env1->a20_mask & physAddr) == physAddr, ("%llx\n", (uint64_t)physAddr));*/
+
+ STAM_PROFILE_START(&gStatGCPhys2HCVirt, a);
+ rc = PGMR3PhysTlbGCPhys2Ptr(env1->pVM, physAddr, true /*fWritable*/, &pv);
+ STAM_PROFILE_STOP(&gStatGCPhys2HCVirt, a);
+ Assert( rc == VINF_SUCCESS
+ || rc == VINF_PGM_PHYS_TLB_CATCH_WRITE
+ || rc == VERR_PGM_PHYS_TLB_CATCH_ALL
+ || rc == VERR_PGM_PHYS_TLB_UNASSIGNED);
+ if (RT_FAILURE(rc))
+ return (void *)1;
+ if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE)
+ return (void *)((uintptr_t)pv | 2);
+ return pv;
+}
+#endif /* REM_PHYS_ADDR_IN_TLB */
+
+
+/**
+ * Called from tlb_protect_code in order to write monitor a code page.
+ *
+ * @param env Pointer to the CPU environment.
+ * @param GCPtr Code page to monitor
+ */
+void remR3ProtectCode(CPUX86State *env, RTGCPTR GCPtr)
+{
+#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC
+ Assert(env->pVM->rem.s.fInREM);
+ if ( (env->cr[0] & X86_CR0_PG) /* paging must be enabled */
+ && !(env->state & CPU_EMULATE_SINGLE_INSTR) /* ignore during single instruction execution */
+ && (((env->hflags >> HF_CPL_SHIFT) & 3) == 0) /* supervisor mode only */
+ && !(env->eflags & VM_MASK) /* no V86 mode */
+ && VM_IS_RAW_MODE_ENABLED(env->pVM))
+ CSAMR3MonitorPage(env->pVM, GCPtr, CSAM_TAG_REM);
+#endif
+}
+
+
+/**
+ * Called from tlb_unprotect_code in order to clear write monitoring for a code page.
+ *
+ * @param env Pointer to the CPU environment.
+ * @param GCPtr Code page to monitor
+ */
+void remR3UnprotectCode(CPUX86State *env, RTGCPTR GCPtr)
+{
+ Assert(env->pVM->rem.s.fInREM);
+#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC
+ if ( (env->cr[0] & X86_CR0_PG) /* paging must be enabled */
+ && !(env->state & CPU_EMULATE_SINGLE_INSTR) /* ignore during single instruction execution */
+ && (((env->hflags >> HF_CPL_SHIFT) & 3) == 0) /* supervisor mode only */
+ && !(env->eflags & VM_MASK) /* no V86 mode */
+ && VM_IS_RAW_MODE_ENABLED(env->pVM))
+ CSAMR3UnmonitorPage(env->pVM, GCPtr, CSAM_TAG_REM);
+#endif
+}
+
+
+/**
+ * Called when the CPU is initialized, any of the CRx registers are changed or
+ * when the A20 line is modified.
+ *
+ * @param env Pointer to the CPU environment.
+ * @param fGlobal Set if the flush is global.
+ */
+void remR3FlushTLB(CPUX86State *env, bool fGlobal)
+{
+ PVM pVM = env->pVM;
+ PCPUMCTX pCtx;
+ Assert(EMRemIsLockOwner(pVM));
+
+ /*
+ * When we're replaying invlpg instructions or restoring a saved
+ * state we disable this path.
+ */
+ if (pVM->rem.s.fIgnoreCR3Load || pVM->rem.s.cIgnoreAll)
+ return;
+ Assert(pVM->rem.s.fInREM);
+
+ /*
+ * The caller doesn't check cr4, so we have to do that for ourselves.
+ */
+ if (!fGlobal && !(env->cr[4] & X86_CR4_PGE))
+ fGlobal = true;
+ Log(("remR3FlushTLB: CR0=%08RX64 CR3=%08RX64 CR4=%08RX64 %s\n", (uint64_t)env->cr[0], (uint64_t)env->cr[3], (uint64_t)env->cr[4], fGlobal ? " global" : ""));
+
+ /*
+ * Update the control registers before calling PGMR3FlushTLB.
+ */
+ pCtx = (PCPUMCTX)pVM->rem.s.pCtx;
+ Assert(pCtx);
+ pCtx->cr0 = env->cr[0];
+ pCtx->cr3 = env->cr[3];
+#ifdef VBOX_WITH_RAW_MODE
+ if (((env->cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(env->pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+#endif
+ pCtx->cr4 = env->cr[4];
+
+ /*
+ * Let PGM do the rest.
+ */
+ Assert(env->pVCpu);
+ PGMFlushTLB(env->pVCpu, env->cr[3], fGlobal);
+}
+
+
+/**
+ * Called when any of the cr0, cr4 or efer registers is updated.
+ *
+ * @param env Pointer to the CPU environment.
+ */
+void remR3ChangeCpuMode(CPUX86State *env)
+{
+ PVM pVM = env->pVM;
+ uint64_t efer;
+ PCPUMCTX pCtx;
+ int rc;
+
+ /*
+ * When we're replaying loads or restoring a saved
+ * state this path is disabled.
+ */
+ if (pVM->rem.s.fIgnoreCpuMode || pVM->rem.s.cIgnoreAll)
+ return;
+ Assert(pVM->rem.s.fInREM);
+
+ pCtx = (PCPUMCTX)pVM->rem.s.pCtx;
+ Assert(pCtx);
+
+ /*
+ * Notify PGM about WP0 being enabled (like CPUSetGuestCR0 does).
+ */
+ if (((env->cr[0] ^ pCtx->cr0) & X86_CR0_WP) && (env->cr[0] & X86_CR0_WP))
+ PGMCr0WpEnabled(env->pVCpu);
+
+ /*
+ * Update the control registers before calling PGMChangeMode()
+ * as it may need to map whatever cr3 is pointing to.
+ */
+ pCtx->cr0 = env->cr[0];
+ pCtx->cr3 = env->cr[3];
+#ifdef VBOX_WITH_RAW_MODE
+ if (((env->cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(env->pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+#endif
+ pCtx->cr4 = env->cr[4];
+#ifdef TARGET_X86_64
+ efer = env->efer;
+ pCtx->msrEFER = efer;
+#else
+ efer = 0;
+#endif
+ Assert(env->pVCpu);
+ rc = PGMChangeMode(env->pVCpu, env->cr[0], env->cr[4], efer);
+ if (rc != VINF_SUCCESS)
+ {
+ if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+ {
+ Log(("PGMChangeMode(, %RX64, %RX64, %RX64) -> %Rrc -> remR3RaiseRC\n", env->cr[0], env->cr[4], efer, rc));
+ remR3RaiseRC(env->pVM, rc);
+ }
+ else
+ cpu_abort(env, "PGMChangeMode(, %RX64, %RX64, %RX64) -> %Rrc\n", env->cr[0], env->cr[4], efer, rc);
+ }
+}
+
+
+/**
+ * Called from compiled code to run dma.
+ *
+ * @param env Pointer to the CPU environment.
+ */
+void remR3DmaRun(CPUX86State *env)
+{
+ remR3ProfileStop(STATS_QEMU_RUN_EMULATED_CODE);
+ PDMR3DmaRun(env->pVM);
+ remR3ProfileStart(STATS_QEMU_RUN_EMULATED_CODE);
+}
+
+
+/**
+ * Called from compiled code to schedule pending timers in VMM
+ *
+ * @param env Pointer to the CPU environment.
+ */
+void remR3TimersRun(CPUX86State *env)
+{
+ LogFlow(("remR3TimersRun:\n"));
+ LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("remR3TimersRun\n"));
+ remR3ProfileStop(STATS_QEMU_RUN_EMULATED_CODE);
+ remR3ProfileStart(STATS_QEMU_RUN_TIMERS);
+ TMR3TimerQueuesDo(env->pVM);
+ remR3ProfileStop(STATS_QEMU_RUN_TIMERS);
+ remR3ProfileStart(STATS_QEMU_RUN_EMULATED_CODE);
+}
+
+
+/**
+ * Record trap occurrence
+ *
+ * @returns VBox status code
+ * @param env Pointer to the CPU environment.
+ * @param uTrap Trap nr
+ * @param uErrorCode Error code
+ * @param pvNextEIP Next EIP
+ */
+int remR3NotifyTrap(CPUX86State *env, uint32_t uTrap, uint32_t uErrorCode, RTGCPTR pvNextEIP)
+{
+ PVM pVM = env->pVM;
+#ifdef VBOX_WITH_STATISTICS
+ static STAMCOUNTER s_aStatTrap[255];
+ static bool s_aRegisters[RT_ELEMENTS(s_aStatTrap)];
+#endif
+
+#ifdef VBOX_WITH_STATISTICS
+ if (uTrap < 255)
+ {
+ if (!s_aRegisters[uTrap])
+ {
+ char szStatName[64];
+ s_aRegisters[uTrap] = true;
+ RTStrPrintf(szStatName, sizeof(szStatName), "/REM/Trap/0x%02X", uTrap);
+ STAM_REG(env->pVM, &s_aStatTrap[uTrap], STAMTYPE_COUNTER, szStatName, STAMUNIT_OCCURENCES, "Trap stats.");
+ }
+ STAM_COUNTER_INC(&s_aStatTrap[uTrap]);
+ }
+#endif
+ Log(("remR3NotifyTrap: uTrap=%x error=%x next_eip=%RGv eip=%RGv cr2=%RGv\n", uTrap, uErrorCode, pvNextEIP, (RTGCPTR)env->eip, (RTGCPTR)env->cr[2]));
+ if( uTrap < 0x20
+ && (env->cr[0] & X86_CR0_PE)
+ && !(env->eflags & X86_EFL_VM))
+ {
+#ifdef DEBUG
+ remR3DisasInstr(env, 1, "remR3NotifyTrap: ");
+#endif
+ if(pVM->rem.s.uPendingException == uTrap && ++pVM->rem.s.cPendingExceptions > 512)
+ {
+ LogRel(("VERR_REM_TOO_MANY_TRAPS -> uTrap=%x error=%x next_eip=%RGv eip=%RGv cr2=%RGv\n", uTrap, uErrorCode, pvNextEIP, (RTGCPTR)env->eip, (RTGCPTR)env->cr[2]));
+ remR3RaiseRC(env->pVM, VERR_REM_TOO_MANY_TRAPS);
+ return VERR_REM_TOO_MANY_TRAPS;
+ }
+ if(pVM->rem.s.uPendingException != uTrap || pVM->rem.s.uPendingExcptEIP != env->eip || pVM->rem.s.uPendingExcptCR2 != env->cr[2])
+ {
+ Log(("remR3NotifyTrap: uTrap=%#x set as pending\n", uTrap));
+ pVM->rem.s.cPendingExceptions = 1;
+ }
+ pVM->rem.s.uPendingException = uTrap;
+ pVM->rem.s.uPendingExcptEIP = env->eip;
+ pVM->rem.s.uPendingExcptCR2 = env->cr[2];
+ }
+ else
+ {
+ pVM->rem.s.cPendingExceptions = 0;
+ pVM->rem.s.uPendingException = uTrap;
+ pVM->rem.s.uPendingExcptEIP = env->eip;
+ pVM->rem.s.uPendingExcptCR2 = env->cr[2];
+ }
+ return VINF_SUCCESS;
+}
+
+
+/*
+ * Clear current active trap
+ *
+ * @param pVM VM Handle.
+ */
+void remR3TrapClear(PVM pVM)
+{
+ pVM->rem.s.cPendingExceptions = 0;
+ pVM->rem.s.uPendingException = 0;
+ pVM->rem.s.uPendingExcptEIP = 0;
+ pVM->rem.s.uPendingExcptCR2 = 0;
+}
+
+
+/*
+ * Record previous call instruction addresses
+ *
+ * @param env Pointer to the CPU environment.
+ */
+void remR3RecordCall(CPUX86State *env)
+{
+#ifdef VBOX_WITH_RAW_MODE
+ CSAMR3RecordCallAddress(env->pVM, env->eip);
+#endif
+}
+
+
+/**
+ * Syncs the internal REM state with the VM.
+ *
+ * This must be called before REMR3Run() is invoked whenever when the REM
+ * state is not up to date. Calling it several times in a row is not
+ * permitted.
+ *
+ * @returns VBox status code.
+ *
+ * @param pVM VM Handle.
+ * @param pVCpu VMCPU Handle.
+ *
+ * @remark The caller has to check for important FFs before calling REMR3Run. REMR3State will
+ * no do this since the majority of the callers don't want any unnecessary of events
+ * pending that would immediately interrupt execution.
+ */
+REMR3DECL(int) REMR3State(PVM pVM, PVMCPU pVCpu)
+{
+ register const CPUMCTX *pCtx;
+ register unsigned fFlags;
+ unsigned i;
+ TRPMEVENT enmType;
+ uint8_t u8TrapNo;
+ uint32_t uCpl;
+ int rc;
+
+ STAM_PROFILE_START(&pVM->rem.s.StatsState, a);
+ Log2(("REMR3State:\n"));
+
+ pVM->rem.s.Env.pVCpu = pVCpu;
+ pCtx = pVM->rem.s.pCtx = CPUMQueryGuestCtxPtr(pVCpu);
+
+ Assert(pCtx);
+ if ( CPUMIsGuestInSvmNestedHwVirtMode(pCtx)
+ || CPUMIsGuestInVmxNonRootMode(pCtx))
+ {
+ AssertMsgFailed(("Bad scheduling - can't exec. nested-guest in REM!\n"));
+ return VERR_EM_CANNOT_EXEC_GUEST;
+ }
+
+ Assert(!pVM->rem.s.fInREM);
+ pVM->rem.s.fInStateSync = true;
+
+ /*
+ * If we have to flush TBs, do that immediately.
+ */
+ if (pVM->rem.s.fFlushTBs)
+ {
+ STAM_COUNTER_INC(&gStatFlushTBs);
+ tb_flush(&pVM->rem.s.Env);
+ pVM->rem.s.fFlushTBs = false;
+ }
+
+ /*
+ * Copy the registers which require no special handling.
+ */
+#ifdef TARGET_X86_64
+ /* Note that the high dwords of 64 bits registers are undefined in 32 bits mode and are undefined after a mode change. */
+ Assert(R_EAX == 0);
+ pVM->rem.s.Env.regs[R_EAX] = pCtx->rax;
+ Assert(R_ECX == 1);
+ pVM->rem.s.Env.regs[R_ECX] = pCtx->rcx;
+ Assert(R_EDX == 2);
+ pVM->rem.s.Env.regs[R_EDX] = pCtx->rdx;
+ Assert(R_EBX == 3);
+ pVM->rem.s.Env.regs[R_EBX] = pCtx->rbx;
+ Assert(R_ESP == 4);
+ pVM->rem.s.Env.regs[R_ESP] = pCtx->rsp;
+ Assert(R_EBP == 5);
+ pVM->rem.s.Env.regs[R_EBP] = pCtx->rbp;
+ Assert(R_ESI == 6);
+ pVM->rem.s.Env.regs[R_ESI] = pCtx->rsi;
+ Assert(R_EDI == 7);
+ pVM->rem.s.Env.regs[R_EDI] = pCtx->rdi;
+ pVM->rem.s.Env.regs[8] = pCtx->r8;
+ pVM->rem.s.Env.regs[9] = pCtx->r9;
+ pVM->rem.s.Env.regs[10] = pCtx->r10;
+ pVM->rem.s.Env.regs[11] = pCtx->r11;
+ pVM->rem.s.Env.regs[12] = pCtx->r12;
+ pVM->rem.s.Env.regs[13] = pCtx->r13;
+ pVM->rem.s.Env.regs[14] = pCtx->r14;
+ pVM->rem.s.Env.regs[15] = pCtx->r15;
+
+ pVM->rem.s.Env.eip = pCtx->rip;
+
+ pVM->rem.s.Env.eflags = pCtx->rflags.u64;
+#else
+ Assert(R_EAX == 0);
+ pVM->rem.s.Env.regs[R_EAX] = pCtx->eax;
+ Assert(R_ECX == 1);
+ pVM->rem.s.Env.regs[R_ECX] = pCtx->ecx;
+ Assert(R_EDX == 2);
+ pVM->rem.s.Env.regs[R_EDX] = pCtx->edx;
+ Assert(R_EBX == 3);
+ pVM->rem.s.Env.regs[R_EBX] = pCtx->ebx;
+ Assert(R_ESP == 4);
+ pVM->rem.s.Env.regs[R_ESP] = pCtx->esp;
+ Assert(R_EBP == 5);
+ pVM->rem.s.Env.regs[R_EBP] = pCtx->ebp;
+ Assert(R_ESI == 6);
+ pVM->rem.s.Env.regs[R_ESI] = pCtx->esi;
+ Assert(R_EDI == 7);
+ pVM->rem.s.Env.regs[R_EDI] = pCtx->edi;
+ pVM->rem.s.Env.eip = pCtx->eip;
+
+ pVM->rem.s.Env.eflags = pCtx->eflags.u32;
+#endif
+
+ pVM->rem.s.Env.cr[2] = pCtx->cr2;
+
+ /** @todo we could probably benefit from using a CPUM_CHANGED_DRx flag too! */
+ for (i=0;i<8;i++)
+ pVM->rem.s.Env.dr[i] = pCtx->dr[i];
+
+#ifdef HF_HALTED_MASK /** @todo remove me when we're up to date again. */
+ /*
+ * Clear the halted hidden flag (the interrupt waking up the CPU can
+ * have been dispatched in raw mode).
+ */
+ pVM->rem.s.Env.hflags &= ~HF_HALTED_MASK;
+#endif
+
+ /*
+ * Replay invlpg? Only if we're not flushing the TLB.
+ */
+ fFlags = CPUMR3RemEnter(pVCpu, &uCpl);
+ LogFlow(("CPUMR3RemEnter %x %x\n", fFlags, uCpl));
+ if (pVM->rem.s.cInvalidatedPages)
+ {
+ if (!(fFlags & CPUM_CHANGED_GLOBAL_TLB_FLUSH))
+ {
+ RTUINT i;
+
+ pVM->rem.s.fIgnoreCR3Load = true;
+ pVM->rem.s.fIgnoreInvlPg = true;
+ for (i = 0; i < pVM->rem.s.cInvalidatedPages; i++)
+ {
+ Log2(("REMR3State: invlpg %RGv\n", pVM->rem.s.aGCPtrInvalidatedPages[i]));
+ tlb_flush_page(&pVM->rem.s.Env, pVM->rem.s.aGCPtrInvalidatedPages[i]);
+ }
+ pVM->rem.s.fIgnoreInvlPg = false;
+ pVM->rem.s.fIgnoreCR3Load = false;
+ }
+ pVM->rem.s.cInvalidatedPages = 0;
+ }
+
+ /* Replay notification changes. */
+ REMR3ReplayHandlerNotifications(pVM);
+
+ /* Update MSRs; before CRx registers! */
+ pVM->rem.s.Env.efer = pCtx->msrEFER;
+ pVM->rem.s.Env.star = pCtx->msrSTAR;
+ pVM->rem.s.Env.pat = pCtx->msrPAT;
+#ifdef TARGET_X86_64
+ pVM->rem.s.Env.lstar = pCtx->msrLSTAR;
+ pVM->rem.s.Env.cstar = pCtx->msrCSTAR;
+ pVM->rem.s.Env.fmask = pCtx->msrSFMASK;
+ pVM->rem.s.Env.kernelgsbase = pCtx->msrKERNELGSBASE;
+
+ /* Update the internal long mode activate flag according to the new EFER value. */
+ if (pCtx->msrEFER & MSR_K6_EFER_LMA)
+ pVM->rem.s.Env.hflags |= HF_LMA_MASK;
+ else
+ pVM->rem.s.Env.hflags &= ~(HF_LMA_MASK | HF_CS64_MASK);
+#endif
+
+ /* Update the inhibit IRQ mask. */
+ pVM->rem.s.Env.hflags &= ~HF_INHIBIT_IRQ_MASK;
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ {
+ RTGCPTR InhibitPC = EMGetInhibitInterruptsPC(pVCpu);
+ if (InhibitPC == pCtx->rip)
+ pVM->rem.s.Env.hflags |= HF_INHIBIT_IRQ_MASK;
+ else
+ {
+ Log(("Clearing VMCPU_FF_INHIBIT_INTERRUPTS at %RGv - successor %RGv (REM#1)\n", (RTGCPTR)pCtx->rip, InhibitPC));
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ }
+ }
+
+ /* Update the inhibit NMI mask. */
+ pVM->rem.s.Env.hflags2 &= ~HF2_NMI_MASK;
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ pVM->rem.s.Env.hflags2 |= HF2_NMI_MASK;
+
+ /*
+ * Sync the A20 gate.
+ */
+ bool fA20State = PGMPhysIsA20Enabled(pVCpu);
+ if (fA20State != RT_BOOL(pVM->rem.s.Env.a20_mask & RT_BIT(20)))
+ {
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+ cpu_x86_set_a20(&pVM->rem.s.Env, fA20State);
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+ }
+
+ /*
+ * Registers which are rarely changed and require special handling / order when changed.
+ */
+ if (fFlags & ( CPUM_CHANGED_GLOBAL_TLB_FLUSH
+ | CPUM_CHANGED_CR4
+ | CPUM_CHANGED_CR0
+ | CPUM_CHANGED_CR3
+ | CPUM_CHANGED_GDTR
+ | CPUM_CHANGED_IDTR
+ | CPUM_CHANGED_SYSENTER_MSR
+ | CPUM_CHANGED_LDTR
+ | CPUM_CHANGED_CPUID
+ | CPUM_CHANGED_FPU_REM
+ )
+ )
+ {
+ if (fFlags & CPUM_CHANGED_GLOBAL_TLB_FLUSH)
+ {
+ pVM->rem.s.fIgnoreCR3Load = true;
+ tlb_flush(&pVM->rem.s.Env, true);
+ pVM->rem.s.fIgnoreCR3Load = false;
+ }
+
+ /* CR4 before CR0! */
+ if (fFlags & CPUM_CHANGED_CR4)
+ {
+ pVM->rem.s.fIgnoreCR3Load = true;
+ pVM->rem.s.fIgnoreCpuMode = true;
+ cpu_x86_update_cr4(&pVM->rem.s.Env, pCtx->cr4);
+ pVM->rem.s.fIgnoreCpuMode = false;
+ pVM->rem.s.fIgnoreCR3Load = false;
+ }
+
+ if (fFlags & CPUM_CHANGED_CR0)
+ {
+ pVM->rem.s.fIgnoreCR3Load = true;
+ pVM->rem.s.fIgnoreCpuMode = true;
+ cpu_x86_update_cr0(&pVM->rem.s.Env, pCtx->cr0);
+ pVM->rem.s.fIgnoreCpuMode = false;
+ pVM->rem.s.fIgnoreCR3Load = false;
+ }
+
+ if (fFlags & CPUM_CHANGED_CR3)
+ {
+ pVM->rem.s.fIgnoreCR3Load = true;
+ cpu_x86_update_cr3(&pVM->rem.s.Env, pCtx->cr3);
+ pVM->rem.s.fIgnoreCR3Load = false;
+ }
+
+ if (fFlags & CPUM_CHANGED_GDTR)
+ {
+ pVM->rem.s.Env.gdt.base = pCtx->gdtr.pGdt;
+ pVM->rem.s.Env.gdt.limit = pCtx->gdtr.cbGdt;
+ }
+
+ if (fFlags & CPUM_CHANGED_IDTR)
+ {
+ pVM->rem.s.Env.idt.base = pCtx->idtr.pIdt;
+ pVM->rem.s.Env.idt.limit = pCtx->idtr.cbIdt;
+ }
+
+ if (fFlags & CPUM_CHANGED_SYSENTER_MSR)
+ {
+ pVM->rem.s.Env.sysenter_cs = pCtx->SysEnter.cs;
+ pVM->rem.s.Env.sysenter_eip = pCtx->SysEnter.eip;
+ pVM->rem.s.Env.sysenter_esp = pCtx->SysEnter.esp;
+ }
+
+ if (fFlags & CPUM_CHANGED_LDTR)
+ {
+ if (pCtx->ldtr.fFlags & CPUMSELREG_FLAGS_VALID)
+ {
+ pVM->rem.s.Env.ldt.selector = pCtx->ldtr.Sel;
+ pVM->rem.s.Env.ldt.newselector = 0;
+ pVM->rem.s.Env.ldt.fVBoxFlags = pCtx->ldtr.fFlags;
+ pVM->rem.s.Env.ldt.base = pCtx->ldtr.u64Base;
+ pVM->rem.s.Env.ldt.limit = pCtx->ldtr.u32Limit;
+ pVM->rem.s.Env.ldt.flags = (pCtx->ldtr.Attr.u & SEL_FLAGS_SMASK) << SEL_FLAGS_SHIFT;
+ }
+ else
+ {
+ AssertFailed(); /* Shouldn't happen, see cpumR3LoadExec. */
+ sync_ldtr(&pVM->rem.s.Env, pCtx->ldtr.Sel);
+ }
+ }
+
+ if (fFlags & CPUM_CHANGED_CPUID)
+ {
+ uint32_t u32Dummy;
+
+ /*
+ * Get the CPUID features.
+ */
+ CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features);
+ CPUMGetGuestCpuId(pVCpu, 0x80000001, 0, &u32Dummy, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext2_features);
+ }
+
+ /* Sync FPU state after CR4, CPUID and EFER (!). */
+ if (fFlags & CPUM_CHANGED_FPU_REM)
+ save_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)&pCtx->pXStateR3->x87); /* 'save' is an excellent name. */
+ }
+
+ /*
+ * Sync TR unconditionally to make life simpler.
+ */
+ pVM->rem.s.Env.tr.selector = pCtx->tr.Sel;
+ pVM->rem.s.Env.tr.newselector = 0;
+ pVM->rem.s.Env.tr.fVBoxFlags = pCtx->tr.fFlags;
+ pVM->rem.s.Env.tr.base = pCtx->tr.u64Base;
+ pVM->rem.s.Env.tr.limit = pCtx->tr.u32Limit;
+ pVM->rem.s.Env.tr.flags = (pCtx->tr.Attr.u & SEL_FLAGS_SMASK) << SEL_FLAGS_SHIFT;
+
+ /*
+ * Update selector registers.
+ *
+ * This must be done *after* we've synced gdt, ldt and crX registers
+ * since we're reading the GDT/LDT om sync_seg. This will happen with
+ * saved state which takes a quick dip into rawmode for instance.
+ *
+ * CPL/Stack; Note first check this one as the CPL might have changed.
+ * The wrong CPL can cause QEmu to raise an exception in sync_seg!!
+ */
+ cpu_x86_set_cpl(&pVM->rem.s.Env, uCpl);
+ /* Note! QEmu saves the 2nd dword of the descriptor; we should convert the attribute word back! */
+#define SYNC_IN_SREG(a_pEnv, a_SReg, a_pRemSReg, a_pVBoxSReg) \
+ do \
+ { \
+ if (CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, a_pVBoxSReg)) \
+ { \
+ cpu_x86_load_seg_cache(a_pEnv, R_##a_SReg, \
+ (a_pVBoxSReg)->Sel, \
+ (a_pVBoxSReg)->u64Base, \
+ (a_pVBoxSReg)->u32Limit, \
+ ((a_pVBoxSReg)->Attr.u & SEL_FLAGS_SMASK) << SEL_FLAGS_SHIFT); \
+ (a_pRemSReg)->fVBoxFlags = (a_pVBoxSReg)->fFlags; \
+ } \
+ /* This only-reload-if-changed stuff is the old approach, we should ditch it. */ \
+ else if ((a_pRemSReg)->selector != (a_pVBoxSReg)->Sel) \
+ { \
+ Log2(("REMR3State: " #a_SReg " changed from %04x to %04x!\n", \
+ (a_pRemSReg)->selector, (a_pVBoxSReg)->Sel)); \
+ sync_seg(a_pEnv, R_##a_SReg, (a_pVBoxSReg)->Sel); \
+ if ((a_pRemSReg)->newselector) \
+ STAM_COUNTER_INC(&gStatSelOutOfSync[R_##a_SReg]); \
+ } \
+ else \
+ (a_pRemSReg)->newselector = 0; \
+ } while (0)
+
+ SYNC_IN_SREG(&pVM->rem.s.Env, CS, &pVM->rem.s.Env.segs[R_CS], &pCtx->cs);
+ SYNC_IN_SREG(&pVM->rem.s.Env, SS, &pVM->rem.s.Env.segs[R_SS], &pCtx->ss);
+ SYNC_IN_SREG(&pVM->rem.s.Env, DS, &pVM->rem.s.Env.segs[R_DS], &pCtx->ds);
+ SYNC_IN_SREG(&pVM->rem.s.Env, ES, &pVM->rem.s.Env.segs[R_ES], &pCtx->es);
+ SYNC_IN_SREG(&pVM->rem.s.Env, FS, &pVM->rem.s.Env.segs[R_FS], &pCtx->fs);
+ SYNC_IN_SREG(&pVM->rem.s.Env, GS, &pVM->rem.s.Env.segs[R_GS], &pCtx->gs);
+ /** @todo need to find a way to communicate potential GDT/LDT changes and thread switches. The selector might
+ * be the same but not the base/limit. */
+
+ /*
+ * Check for traps.
+ */
+ pVM->rem.s.Env.exception_index = -1; /** @todo this won't work :/ */
+ rc = TRPMQueryTrap(pVCpu, &u8TrapNo, &enmType);
+ if (RT_SUCCESS(rc))
+ {
+#ifdef DEBUG
+ if (u8TrapNo == 0x80)
+ {
+ remR3DumpLnxSyscall(pVCpu);
+ remR3DumpOBsdSyscall(pVCpu);
+ }
+#endif
+
+ pVM->rem.s.Env.exception_index = u8TrapNo;
+ if (enmType != TRPM_SOFTWARE_INT)
+ {
+ pVM->rem.s.Env.exception_is_int = enmType == TRPM_HARDWARE_INT
+ ? EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ : 0; /* HACK ALERT! */
+ pVM->rem.s.Env.exception_next_eip = pVM->rem.s.Env.eip;
+ }
+ else
+ {
+ /*
+ * The there are two 1 byte opcodes and one 2 byte opcode for software interrupts.
+ * We ASSUME that there are no prefixes and sets the default to 2 byte, and checks
+ * for int03 and into.
+ */
+ pVM->rem.s.Env.exception_is_int = 1;
+ pVM->rem.s.Env.exception_next_eip = pCtx->rip + 2;
+ /* int 3 may be generated by one-byte 0xcc */
+ if (u8TrapNo == 3)
+ {
+ if (read_byte(&pVM->rem.s.Env, pVM->rem.s.Env.segs[R_CS].base + pCtx->rip) == 0xcc)
+ pVM->rem.s.Env.exception_next_eip = pCtx->rip + 1;
+ }
+ /* int 4 may be generated by one-byte 0xce */
+ else if (u8TrapNo == 4)
+ {
+ if (read_byte(&pVM->rem.s.Env, pVM->rem.s.Env.segs[R_CS].base + pCtx->rip) == 0xce)
+ pVM->rem.s.Env.exception_next_eip = pCtx->rip + 1;
+ }
+ }
+
+ /* get error code and cr2 if needed. */
+ if (enmType == TRPM_TRAP)
+ {
+ switch (u8TrapNo)
+ {
+ case X86_XCPT_PF:
+ pVM->rem.s.Env.cr[2] = TRPMGetFaultAddress(pVCpu);
+ /* fallthru */
+ case X86_XCPT_TS: case X86_XCPT_NP: case X86_XCPT_SS: case X86_XCPT_GP:
+ pVM->rem.s.Env.error_code = TRPMGetErrorCode(pVCpu);
+ break;
+
+ case X86_XCPT_AC: case X86_XCPT_DF:
+ default:
+ pVM->rem.s.Env.error_code = 0;
+ break;
+ }
+ }
+ else
+ pVM->rem.s.Env.error_code = 0;
+
+ /*
+ * We can now reset the active trap since the recompiler is gonna have a go at it.
+ */
+ rc = TRPMResetTrap(pVCpu);
+ AssertRC(rc);
+ Log2(("REMR3State: trap=%02x errcd=%RGv cr2=%RGv nexteip=%RGv%s\n", pVM->rem.s.Env.exception_index, (RTGCPTR)pVM->rem.s.Env.error_code,
+ (RTGCPTR)pVM->rem.s.Env.cr[2], (RTGCPTR)pVM->rem.s.Env.exception_next_eip, pVM->rem.s.Env.exception_is_int ? " software" : ""));
+ }
+
+ /*
+ * Clear old interrupt request flags; Check for pending hardware interrupts.
+ * (See @remark for why we don't check for other FFs.)
+ */
+ pVM->rem.s.Env.interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER);
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(pVCpu);
+ if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
+ pVM->rem.s.Env.interrupt_request |= CPU_INTERRUPT_HARD;
+
+ /*
+ * We're now in REM mode.
+ */
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_REM);
+ pVM->rem.s.fInREM = true;
+ pVM->rem.s.fInStateSync = false;
+ pVM->rem.s.cCanExecuteRaw = 0;
+ STAM_PROFILE_STOP(&pVM->rem.s.StatsState, a);
+ Log2(("REMR3State: returns VINF_SUCCESS\n"));
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Syncs back changes in the REM state to the VM state.
+ *
+ * This must be called after invoking REMR3Run().
+ * Calling it several times in a row is not permitted.
+ *
+ * @returns VBox status code.
+ *
+ * @param pVM VM Handle.
+ * @param pVCpu VMCPU Handle.
+ */
+REMR3DECL(int) REMR3StateBack(PVM pVM, PVMCPU pVCpu)
+{
+ register PCPUMCTX pCtx = pVM->rem.s.pCtx;
+ Assert(pCtx);
+ unsigned i;
+
+ STAM_PROFILE_START(&pVM->rem.s.StatsStateBack, a);
+ Log2(("REMR3StateBack:\n"));
+ Assert(pVM->rem.s.fInREM);
+
+ /*
+ * Copy back the registers.
+ * This is done in the order they are declared in the CPUMCTX structure.
+ */
+
+ /** @todo FOP */
+ /** @todo FPUIP */
+ /** @todo CS */
+ /** @todo FPUDP */
+ /** @todo DS */
+
+ /** @todo check if FPU/XMM was actually used in the recompiler */
+ restore_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)&pCtx->pXStateR3->x87);
+//// dprintf2(("FPU state CW=%04X TT=%04X SW=%04X (%04X)\n", env->fpuc, env->fpstt, env->fpus, pVMCtx->fpu.FSW));
+
+#ifdef TARGET_X86_64
+ /* Note that the high dwords of 64 bits registers are undefined in 32 bits mode and are undefined after a mode change. */
+ pCtx->rdi = pVM->rem.s.Env.regs[R_EDI];
+ pCtx->rsi = pVM->rem.s.Env.regs[R_ESI];
+ pCtx->rbp = pVM->rem.s.Env.regs[R_EBP];
+ pCtx->rax = pVM->rem.s.Env.regs[R_EAX];
+ pCtx->rbx = pVM->rem.s.Env.regs[R_EBX];
+ pCtx->rdx = pVM->rem.s.Env.regs[R_EDX];
+ pCtx->rcx = pVM->rem.s.Env.regs[R_ECX];
+ pCtx->r8 = pVM->rem.s.Env.regs[8];
+ pCtx->r9 = pVM->rem.s.Env.regs[9];
+ pCtx->r10 = pVM->rem.s.Env.regs[10];
+ pCtx->r11 = pVM->rem.s.Env.regs[11];
+ pCtx->r12 = pVM->rem.s.Env.regs[12];
+ pCtx->r13 = pVM->rem.s.Env.regs[13];
+ pCtx->r14 = pVM->rem.s.Env.regs[14];
+ pCtx->r15 = pVM->rem.s.Env.regs[15];
+
+ pCtx->rsp = pVM->rem.s.Env.regs[R_ESP];
+
+#else
+ pCtx->edi = pVM->rem.s.Env.regs[R_EDI];
+ pCtx->esi = pVM->rem.s.Env.regs[R_ESI];
+ pCtx->ebp = pVM->rem.s.Env.regs[R_EBP];
+ pCtx->eax = pVM->rem.s.Env.regs[R_EAX];
+ pCtx->ebx = pVM->rem.s.Env.regs[R_EBX];
+ pCtx->edx = pVM->rem.s.Env.regs[R_EDX];
+ pCtx->ecx = pVM->rem.s.Env.regs[R_ECX];
+
+ pCtx->esp = pVM->rem.s.Env.regs[R_ESP];
+#endif
+
+#define SYNC_BACK_SREG(a_sreg, a_SREG) \
+ do \
+ { \
+ pCtx->a_sreg.Sel = pVM->rem.s.Env.segs[R_##a_SREG].selector; \
+ if (!pVM->rem.s.Env.segs[R_SS].newselector) \
+ { \
+ pCtx->a_sreg.ValidSel = pVM->rem.s.Env.segs[R_##a_SREG].selector; \
+ pCtx->a_sreg.fFlags = CPUMSELREG_FLAGS_VALID; \
+ pCtx->a_sreg.u64Base = pVM->rem.s.Env.segs[R_##a_SREG].base; \
+ pCtx->a_sreg.u32Limit = pVM->rem.s.Env.segs[R_##a_SREG].limit; \
+ /* Note! QEmu saves the 2nd dword of the descriptor; we (VT-x/AMD-V) keep only the attributes! */ \
+ pCtx->a_sreg.Attr.u = (pVM->rem.s.Env.segs[R_##a_SREG].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; \
+ } \
+ else \
+ { \
+ pCtx->a_sreg.fFlags = 0; \
+ STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_##a_SREG]); \
+ } \
+ } while (0)
+
+ SYNC_BACK_SREG(es, ES);
+ SYNC_BACK_SREG(cs, CS);
+ SYNC_BACK_SREG(ss, SS);
+ SYNC_BACK_SREG(ds, DS);
+ SYNC_BACK_SREG(fs, FS);
+ SYNC_BACK_SREG(gs, GS);
+
+#ifdef TARGET_X86_64
+ pCtx->rip = pVM->rem.s.Env.eip;
+ pCtx->rflags.u64 = pVM->rem.s.Env.eflags;
+#else
+ pCtx->eip = pVM->rem.s.Env.eip;
+ pCtx->eflags.u32 = pVM->rem.s.Env.eflags;
+#endif
+
+ pCtx->cr0 = pVM->rem.s.Env.cr[0];
+ pCtx->cr2 = pVM->rem.s.Env.cr[2];
+ pCtx->cr3 = pVM->rem.s.Env.cr[3];
+#ifdef VBOX_WITH_RAW_MODE
+ if (((pVM->rem.s.Env.cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+#endif
+ pCtx->cr4 = pVM->rem.s.Env.cr[4];
+
+ for (i = 0; i < 8; i++)
+ pCtx->dr[i] = pVM->rem.s.Env.dr[i];
+
+ pCtx->gdtr.cbGdt = pVM->rem.s.Env.gdt.limit;
+ if (pCtx->gdtr.pGdt != pVM->rem.s.Env.gdt.base)
+ {
+ pCtx->gdtr.pGdt = pVM->rem.s.Env.gdt.base;
+ STAM_COUNTER_INC(&gStatREMGDTChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT);
+#endif
+ }
+
+ pCtx->idtr.cbIdt = pVM->rem.s.Env.idt.limit;
+ if (pCtx->idtr.pIdt != pVM->rem.s.Env.idt.base)
+ {
+ pCtx->idtr.pIdt = pVM->rem.s.Env.idt.base;
+ STAM_COUNTER_INC(&gStatREMIDTChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT);
+#endif
+ }
+
+ if ( pCtx->ldtr.Sel != pVM->rem.s.Env.ldt.selector
+ || pCtx->ldtr.ValidSel != pVM->rem.s.Env.ldt.selector
+ || pCtx->ldtr.u64Base != pVM->rem.s.Env.ldt.base
+ || pCtx->ldtr.u32Limit != pVM->rem.s.Env.ldt.limit
+ || pCtx->ldtr.Attr.u != ((pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK)
+ || !(pCtx->ldtr.fFlags & CPUMSELREG_FLAGS_VALID)
+ )
+ {
+ pCtx->ldtr.Sel = pVM->rem.s.Env.ldt.selector;
+ pCtx->ldtr.ValidSel = pVM->rem.s.Env.ldt.selector;
+ pCtx->ldtr.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->ldtr.u64Base = pVM->rem.s.Env.ldt.base;
+ pCtx->ldtr.u32Limit = pVM->rem.s.Env.ldt.limit;
+ pCtx->ldtr.Attr.u = (pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+ STAM_COUNTER_INC(&gStatREMLDTRChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT);
+#endif
+ }
+
+ if ( pCtx->tr.Sel != pVM->rem.s.Env.tr.selector
+ || pCtx->tr.ValidSel != pVM->rem.s.Env.tr.selector
+ || pCtx->tr.u64Base != pVM->rem.s.Env.tr.base
+ || pCtx->tr.u32Limit != pVM->rem.s.Env.tr.limit
+ || pCtx->tr.Attr.u != ((pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK)
+ || !(pCtx->tr.fFlags & CPUMSELREG_FLAGS_VALID)
+ )
+ {
+ Log(("REM: TR changed! %#x{%#llx,%#x,%#x} -> %#x{%llx,%#x,%#x}\n",
+ pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u,
+ pVM->rem.s.Env.tr.selector, (uint64_t)pVM->rem.s.Env.tr.base, pVM->rem.s.Env.tr.limit,
+ pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT));
+ pCtx->tr.Sel = pVM->rem.s.Env.tr.selector;
+ pCtx->tr.ValidSel = pVM->rem.s.Env.tr.selector;
+ pCtx->tr.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->tr.u64Base = pVM->rem.s.Env.tr.base;
+ pCtx->tr.u32Limit = pVM->rem.s.Env.tr.limit;
+ pCtx->tr.Attr.u = (pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+ Assert(pCtx->tr.Attr.u & ~DESC_INTEL_UNUSABLE);
+ STAM_COUNTER_INC(&gStatREMTRChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+#endif
+ }
+
+ /* Sysenter MSR */
+ pCtx->SysEnter.cs = pVM->rem.s.Env.sysenter_cs;
+ pCtx->SysEnter.eip = pVM->rem.s.Env.sysenter_eip;
+ pCtx->SysEnter.esp = pVM->rem.s.Env.sysenter_esp;
+
+ /* System MSRs. */
+ pCtx->msrEFER = pVM->rem.s.Env.efer;
+ pCtx->msrSTAR = pVM->rem.s.Env.star;
+ pCtx->msrPAT = pVM->rem.s.Env.pat;
+#ifdef TARGET_X86_64
+ pCtx->msrLSTAR = pVM->rem.s.Env.lstar;
+ pCtx->msrCSTAR = pVM->rem.s.Env.cstar;
+ pCtx->msrSFMASK = pVM->rem.s.Env.fmask;
+ pCtx->msrKERNELGSBASE = pVM->rem.s.Env.kernelgsbase;
+#endif
+
+ /* Inhibit interrupt flag. */
+ if (pVM->rem.s.Env.hflags & HF_INHIBIT_IRQ_MASK)
+ {
+ Log(("Settings VMCPU_FF_INHIBIT_INTERRUPTS at %RGv (REM)\n", (RTGCPTR)pCtx->rip));
+ EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ }
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ {
+ Log(("Clearing VMCPU_FF_INHIBIT_INTERRUPTS at %RGv - successor %RGv (REM#2)\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ }
+
+ /* Inhibit NMI flag. */
+ if (pVM->rem.s.Env.hflags2 & HF2_NMI_MASK)
+ {
+ Log(("Settings VMCPU_FF_BLOCK_NMIS at %RGv (REM)\n", (RTGCPTR)pCtx->rip));
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ {
+ Log(("Clearing VMCPU_FF_BLOCK_NMIS at %RGv (REM)\n", (RTGCPTR)pCtx->rip));
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+
+ remR3TrapClear(pVM);
+
+ /*
+ * Check for traps.
+ */
+ if ( pVM->rem.s.Env.exception_index >= 0
+ && pVM->rem.s.Env.exception_index < 256)
+ {
+ /* This cannot be a hardware-interrupt because exception_index < EXCP_INTERRUPT. */
+ int rc;
+
+ Log(("REMR3StateBack: Pending trap %x %d\n", pVM->rem.s.Env.exception_index, pVM->rem.s.Env.exception_is_int));
+ TRPMEVENT enmType = pVM->rem.s.Env.exception_is_int == 0 ? TRPM_TRAP
+ : pVM->rem.s.Env.exception_is_int == EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ ? TRPM_HARDWARE_INT
+ : TRPM_SOFTWARE_INT;
+ rc = TRPMAssertTrap(pVCpu, pVM->rem.s.Env.exception_index, enmType);
+ AssertRC(rc);
+ if (enmType == TRPM_TRAP)
+ {
+ switch (pVM->rem.s.Env.exception_index)
+ {
+ case X86_XCPT_PF:
+ TRPMSetFaultAddress(pVCpu, pCtx->cr2);
+ /* fallthru */
+ case X86_XCPT_TS: case X86_XCPT_NP: case X86_XCPT_SS: case X86_XCPT_GP:
+ case X86_XCPT_AC: case X86_XCPT_DF: /* 0 */
+ TRPMSetErrorCode(pVCpu, pVM->rem.s.Env.error_code);
+ break;
+ }
+ }
+ }
+
+ /*
+ * We're not longer in REM mode.
+ */
+ CPUMR3RemLeave(pVCpu,
+ !VM_IS_RAW_MODE_ENABLED(pVM)
+ || ( pVM->rem.s.Env.segs[R_SS].newselector
+ | pVM->rem.s.Env.segs[R_GS].newselector
+ | pVM->rem.s.Env.segs[R_FS].newselector
+ | pVM->rem.s.Env.segs[R_ES].newselector
+ | pVM->rem.s.Env.segs[R_DS].newselector
+ | pVM->rem.s.Env.segs[R_CS].newselector) == 0
+ );
+ VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_REM);
+ pVM->rem.s.fInREM = false;
+ pVM->rem.s.pCtx = NULL;
+ pVM->rem.s.Env.pVCpu = NULL;
+ STAM_PROFILE_STOP(&pVM->rem.s.StatsStateBack, a);
+ Log2(("REMR3StateBack: returns VINF_SUCCESS\n"));
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * This is called by the disassembler when it wants to update the cpu state
+ * before for instance doing a register dump.
+ */
+static void remR3StateUpdate(PVM pVM, PVMCPU pVCpu)
+{
+ register PCPUMCTX pCtx = pVM->rem.s.pCtx;
+ unsigned i;
+
+ Assert(pVM->rem.s.fInREM);
+
+ /*
+ * Copy back the registers.
+ * This is done in the order they are declared in the CPUMCTX structure.
+ */
+
+ PX86FXSTATE pFpuCtx = &pCtx->pXStateR3->x87;
+ /** @todo FOP */
+ /** @todo FPUIP */
+ /** @todo CS */
+ /** @todo FPUDP */
+ /** @todo DS */
+ /** @todo Fix MXCSR support in QEMU so we don't overwrite MXCSR with 0 when we shouldn't! */
+ pFpuCtx->MXCSR = 0;
+ pFpuCtx->MXCSR_MASK = 0;
+
+ /** @todo check if FPU/XMM was actually used in the recompiler */
+ restore_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)pFpuCtx);
+//// dprintf2(("FPU state CW=%04X TT=%04X SW=%04X (%04X)\n", env->fpuc, env->fpstt, env->fpus, pVMCtx->fpu.FSW));
+
+#ifdef TARGET_X86_64
+ pCtx->rdi = pVM->rem.s.Env.regs[R_EDI];
+ pCtx->rsi = pVM->rem.s.Env.regs[R_ESI];
+ pCtx->rbp = pVM->rem.s.Env.regs[R_EBP];
+ pCtx->rax = pVM->rem.s.Env.regs[R_EAX];
+ pCtx->rbx = pVM->rem.s.Env.regs[R_EBX];
+ pCtx->rdx = pVM->rem.s.Env.regs[R_EDX];
+ pCtx->rcx = pVM->rem.s.Env.regs[R_ECX];
+ pCtx->r8 = pVM->rem.s.Env.regs[8];
+ pCtx->r9 = pVM->rem.s.Env.regs[9];
+ pCtx->r10 = pVM->rem.s.Env.regs[10];
+ pCtx->r11 = pVM->rem.s.Env.regs[11];
+ pCtx->r12 = pVM->rem.s.Env.regs[12];
+ pCtx->r13 = pVM->rem.s.Env.regs[13];
+ pCtx->r14 = pVM->rem.s.Env.regs[14];
+ pCtx->r15 = pVM->rem.s.Env.regs[15];
+
+ pCtx->rsp = pVM->rem.s.Env.regs[R_ESP];
+#else
+ pCtx->edi = pVM->rem.s.Env.regs[R_EDI];
+ pCtx->esi = pVM->rem.s.Env.regs[R_ESI];
+ pCtx->ebp = pVM->rem.s.Env.regs[R_EBP];
+ pCtx->eax = pVM->rem.s.Env.regs[R_EAX];
+ pCtx->ebx = pVM->rem.s.Env.regs[R_EBX];
+ pCtx->edx = pVM->rem.s.Env.regs[R_EDX];
+ pCtx->ecx = pVM->rem.s.Env.regs[R_ECX];
+
+ pCtx->esp = pVM->rem.s.Env.regs[R_ESP];
+#endif
+
+ SYNC_BACK_SREG(es, ES);
+ SYNC_BACK_SREG(cs, CS);
+ SYNC_BACK_SREG(ss, SS);
+ SYNC_BACK_SREG(ds, DS);
+ SYNC_BACK_SREG(fs, FS);
+ SYNC_BACK_SREG(gs, GS);
+
+#ifdef TARGET_X86_64
+ pCtx->rip = pVM->rem.s.Env.eip;
+ pCtx->rflags.u64 = pVM->rem.s.Env.eflags;
+#else
+ pCtx->eip = pVM->rem.s.Env.eip;
+ pCtx->eflags.u32 = pVM->rem.s.Env.eflags;
+#endif
+
+ pCtx->cr0 = pVM->rem.s.Env.cr[0];
+ pCtx->cr2 = pVM->rem.s.Env.cr[2];
+ pCtx->cr3 = pVM->rem.s.Env.cr[3];
+#ifdef VBOX_WITH_RAW_MODE
+ if (((pVM->rem.s.Env.cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+#endif
+ pCtx->cr4 = pVM->rem.s.Env.cr[4];
+
+ for (i = 0; i < 8; i++)
+ pCtx->dr[i] = pVM->rem.s.Env.dr[i];
+
+ pCtx->gdtr.cbGdt = pVM->rem.s.Env.gdt.limit;
+ if (pCtx->gdtr.pGdt != (RTGCPTR)pVM->rem.s.Env.gdt.base)
+ {
+ pCtx->gdtr.pGdt = (RTGCPTR)pVM->rem.s.Env.gdt.base;
+ STAM_COUNTER_INC(&gStatREMGDTChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT);
+#endif
+ }
+
+ pCtx->idtr.cbIdt = pVM->rem.s.Env.idt.limit;
+ if (pCtx->idtr.pIdt != (RTGCPTR)pVM->rem.s.Env.idt.base)
+ {
+ pCtx->idtr.pIdt = (RTGCPTR)pVM->rem.s.Env.idt.base;
+ STAM_COUNTER_INC(&gStatREMIDTChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT);
+#endif
+ }
+
+ if ( pCtx->ldtr.Sel != pVM->rem.s.Env.ldt.selector
+ || pCtx->ldtr.ValidSel != pVM->rem.s.Env.ldt.selector
+ || pCtx->ldtr.u64Base != pVM->rem.s.Env.ldt.base
+ || pCtx->ldtr.u32Limit != pVM->rem.s.Env.ldt.limit
+ || pCtx->ldtr.Attr.u != ((pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK)
+ || !(pCtx->ldtr.fFlags & CPUMSELREG_FLAGS_VALID)
+ )
+ {
+ pCtx->ldtr.Sel = pVM->rem.s.Env.ldt.selector;
+ pCtx->ldtr.ValidSel = pVM->rem.s.Env.ldt.selector;
+ pCtx->ldtr.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->ldtr.u64Base = pVM->rem.s.Env.ldt.base;
+ pCtx->ldtr.u32Limit = pVM->rem.s.Env.ldt.limit;
+ pCtx->ldtr.Attr.u = (pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+ STAM_COUNTER_INC(&gStatREMLDTRChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT);
+#endif
+ }
+
+ if ( pCtx->tr.Sel != pVM->rem.s.Env.tr.selector
+ || pCtx->tr.ValidSel != pVM->rem.s.Env.tr.selector
+ || pCtx->tr.u64Base != pVM->rem.s.Env.tr.base
+ || pCtx->tr.u32Limit != pVM->rem.s.Env.tr.limit
+ || pCtx->tr.Attr.u != ((pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK)
+ || !(pCtx->tr.fFlags & CPUMSELREG_FLAGS_VALID)
+ )
+ {
+ Log(("REM: TR changed! %#x{%#llx,%#x,%#x} -> %#x{%llx,%#x,%#x}\n",
+ pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u,
+ pVM->rem.s.Env.tr.selector, (uint64_t)pVM->rem.s.Env.tr.base, pVM->rem.s.Env.tr.limit,
+ pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT));
+ pCtx->tr.Sel = pVM->rem.s.Env.tr.selector;
+ pCtx->tr.ValidSel = pVM->rem.s.Env.tr.selector;
+ pCtx->tr.fFlags = CPUMSELREG_FLAGS_VALID;
+ pCtx->tr.u64Base = pVM->rem.s.Env.tr.base;
+ pCtx->tr.u32Limit = pVM->rem.s.Env.tr.limit;
+ pCtx->tr.Attr.u = (pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK;
+ Assert(pCtx->tr.Attr.u & ~DESC_INTEL_UNUSABLE);
+ STAM_COUNTER_INC(&gStatREMTRChange);
+#ifdef VBOX_WITH_RAW_MODE
+ if (VM_IS_RAW_MODE_ENABLED(pVM))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+#endif
+ }
+
+ /* Sysenter MSR */
+ pCtx->SysEnter.cs = pVM->rem.s.Env.sysenter_cs;
+ pCtx->SysEnter.eip = pVM->rem.s.Env.sysenter_eip;
+ pCtx->SysEnter.esp = pVM->rem.s.Env.sysenter_esp;
+
+ /* System MSRs. */
+ pCtx->msrEFER = pVM->rem.s.Env.efer;
+ pCtx->msrSTAR = pVM->rem.s.Env.star;
+ pCtx->msrPAT = pVM->rem.s.Env.pat;
+#ifdef TARGET_X86_64
+ pCtx->msrLSTAR = pVM->rem.s.Env.lstar;
+ pCtx->msrCSTAR = pVM->rem.s.Env.cstar;
+ pCtx->msrSFMASK = pVM->rem.s.Env.fmask;
+ pCtx->msrKERNELGSBASE = pVM->rem.s.Env.kernelgsbase;
+#endif
+
+}
+
+
+/**
+ * Update the VMM state information if we're currently in REM.
+ *
+ * This method is used by the DBGF and PDMDevice when there is any uncertainty of whether
+ * we're currently executing in REM and the VMM state is invalid. This method will of
+ * course check that we're executing in REM before syncing any data over to the VMM.
+ *
+ * @param pVM The VM handle.
+ * @param pVCpu The VMCPU handle.
+ */
+REMR3DECL(void) REMR3StateUpdate(PVM pVM, PVMCPU pVCpu)
+{
+ if (pVM->rem.s.fInREM)
+ remR3StateUpdate(pVM, pVCpu);
+}
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM
+
+
+/**
+ * Notify the recompiler about Address Gate 20 state change.
+ *
+ * This notification is required since A20 gate changes are
+ * initialized from a device driver and the VM might just as
+ * well be in REM mode as in RAW mode.
+ *
+ * @param pVM VM handle.
+ * @param pVCpu VMCPU handle.
+ * @param fEnable True if the gate should be enabled.
+ * False if the gate should be disabled.
+ */
+REMR3DECL(void) REMR3A20Set(PVM pVM, PVMCPU pVCpu, bool fEnable)
+{
+ LogFlow(("REMR3A20Set: fEnable=%d\n", fEnable));
+ VM_ASSERT_EMT(pVM);
+
+ /** @todo SMP and the A20 gate... */
+ if (pVM->rem.s.Env.pVCpu == pVCpu)
+ {
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+ cpu_x86_set_a20(&pVM->rem.s.Env, fEnable);
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+ }
+}
+
+
+/**
+ * Replays the handler notification changes
+ * Called in response to VM_FF_REM_HANDLER_NOTIFY from the RAW execution loop.
+ *
+ * @param pVM VM handle.
+ */
+REMR3DECL(void) REMR3ReplayHandlerNotifications(PVM pVM)
+{
+ /*
+ * Replay the flushes.
+ */
+ LogFlow(("REMR3ReplayHandlerNotifications:\n"));
+ VM_ASSERT_EMT(pVM);
+
+ /** @todo this isn't ensuring correct replay order. */
+ if (VM_FF_TEST_AND_CLEAR(pVM, VM_FF_REM_HANDLER_NOTIFY))
+ {
+ uint32_t idxNext;
+ uint32_t idxRevHead;
+ uint32_t idxHead;
+#ifdef VBOX_STRICT
+ int32_t c = 0;
+#endif
+
+ /* Lockless purging of pending notifications. */
+ idxHead = ASMAtomicXchgU32(&pVM->rem.s.idxPendingList, UINT32_MAX);
+ if (idxHead == UINT32_MAX)
+ return;
+ Assert(idxHead < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications));
+
+ /*
+ * Reverse the list to process it in FIFO order.
+ */
+ idxRevHead = UINT32_MAX;
+ do
+ {
+ /* Save the index of the next rec. */
+ idxNext = pVM->rem.s.aHandlerNotifications[idxHead].idxNext;
+ Assert(idxNext < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications) || idxNext == UINT32_MAX);
+ /* Push the record onto the reversed list. */
+ pVM->rem.s.aHandlerNotifications[idxHead].idxNext = idxRevHead;
+ idxRevHead = idxHead;
+ Assert(++c <= RT_ELEMENTS(pVM->rem.s.aHandlerNotifications));
+ /* Advance. */
+ idxHead = idxNext;
+ } while (idxHead != UINT32_MAX);
+
+ /*
+ * Loop thru the list, reinserting the record into the free list as they are
+ * processed to avoid having other EMTs running out of entries while we're flushing.
+ */
+ idxHead = idxRevHead;
+ do
+ {
+ PREMHANDLERNOTIFICATION pCur = &pVM->rem.s.aHandlerNotifications[idxHead];
+ uint32_t idxCur;
+ Assert(--c >= 0);
+
+ switch (pCur->enmKind)
+ {
+ case REMHANDLERNOTIFICATIONKIND_PHYSICAL_REGISTER:
+ remR3NotifyHandlerPhysicalRegister(pVM,
+ pCur->u.PhysicalRegister.enmKind,
+ pCur->u.PhysicalRegister.GCPhys,
+ pCur->u.PhysicalRegister.cb,
+ pCur->u.PhysicalRegister.fHasHCHandler);
+ break;
+
+ case REMHANDLERNOTIFICATIONKIND_PHYSICAL_DEREGISTER:
+ remR3NotifyHandlerPhysicalDeregister(pVM,
+ pCur->u.PhysicalDeregister.enmKind,
+ pCur->u.PhysicalDeregister.GCPhys,
+ pCur->u.PhysicalDeregister.cb,
+ pCur->u.PhysicalDeregister.fHasHCHandler,
+ pCur->u.PhysicalDeregister.fRestoreAsRAM);
+ break;
+
+ case REMHANDLERNOTIFICATIONKIND_PHYSICAL_MODIFY:
+ remR3NotifyHandlerPhysicalModify(pVM,
+ pCur->u.PhysicalModify.enmKind,
+ pCur->u.PhysicalModify.GCPhysOld,
+ pCur->u.PhysicalModify.GCPhysNew,
+ pCur->u.PhysicalModify.cb,
+ pCur->u.PhysicalModify.fHasHCHandler,
+ pCur->u.PhysicalModify.fRestoreAsRAM);
+ break;
+
+ default:
+ AssertReleaseMsgFailed(("enmKind=%d\n", pCur->enmKind));
+ break;
+ }
+
+ /*
+ * Advance idxHead.
+ */
+ idxCur = idxHead;
+ idxHead = pCur->idxNext;
+ Assert(idxHead < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications) || (idxHead == UINT32_MAX && c == 0));
+
+ /*
+ * Put the record back into the free list.
+ */
+ do
+ {
+ idxNext = ASMAtomicUoReadU32(&pVM->rem.s.idxFreeList);
+ ASMAtomicWriteU32(&pCur->idxNext, idxNext);
+ ASMCompilerBarrier();
+ } while (!ASMAtomicCmpXchgU32(&pVM->rem.s.idxFreeList, idxCur, idxNext));
+ } while (idxHead != UINT32_MAX);
+
+#ifdef VBOX_STRICT
+ if (pVM->cCpus == 1)
+ {
+ unsigned c;
+ /* Check that all records are now on the free list. */
+ for (c = 0, idxNext = pVM->rem.s.idxFreeList; idxNext != UINT32_MAX;
+ idxNext = pVM->rem.s.aHandlerNotifications[idxNext].idxNext)
+ c++;
+ AssertReleaseMsg(c == RT_ELEMENTS(pVM->rem.s.aHandlerNotifications), ("%#x != %#x, idxFreeList=%#x\n", c, RT_ELEMENTS(pVM->rem.s.aHandlerNotifications), pVM->rem.s.idxFreeList));
+ }
+#endif
+ }
+}
+
+
+/**
+ * Notify REM about changed code page.
+ *
+ * @returns VBox status code.
+ * @param pVM VM handle.
+ * @param pVCpu VMCPU handle.
+ * @param pvCodePage Code page address
+ */
+REMR3DECL(int) REMR3NotifyCodePageChanged(PVM pVM, PVMCPU pVCpu, RTGCPTR pvCodePage)
+{
+#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC
+ int rc;
+ RTGCPHYS PhysGC;
+ uint64_t flags;
+
+ VM_ASSERT_EMT(pVM);
+
+ /*
+ * Get the physical page address.
+ */
+ rc = PGMGstGetPage(pVM, pvCodePage, &flags, &PhysGC);
+ if (rc == VINF_SUCCESS)
+ {
+ /*
+ * Sync the required registers and flush the whole page.
+ * (Easier to do the whole page than notifying it about each physical
+ * byte that was changed.
+ */
+ pVM->rem.s.Env.cr[0] = pVM->rem.s.pCtx->cr0;
+ pVM->rem.s.Env.cr[2] = pVM->rem.s.pCtx->cr2;
+ pVM->rem.s.Env.cr[3] = pVM->rem.s.pCtx->cr3;
+ pVM->rem.s.Env.cr[4] = pVM->rem.s.pCtx->cr4;
+
+ tb_invalidate_phys_page_range(PhysGC, PhysGC + PAGE_SIZE - 1, 0);
+ }
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Notification about a successful MMR3PhysRegister() call.
+ *
+ * @param pVM VM handle.
+ * @param GCPhys The physical address the RAM.
+ * @param cb Size of the memory.
+ * @param fFlags Flags of the REM_NOTIFY_PHYS_RAM_FLAGS_* defines.
+ */
+REMR3DECL(void) REMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, unsigned fFlags)
+{
+ Log(("REMR3NotifyPhysRamRegister: GCPhys=%RGp cb=%RGp fFlags=%#x\n", GCPhys, cb, fFlags));
+ VM_ASSERT_EMT(pVM);
+
+ /*
+ * Validate input - we trust the caller.
+ */
+ Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+ Assert(cb);
+ Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb);
+ AssertMsg(fFlags == REM_NOTIFY_PHYS_RAM_FLAGS_RAM || fFlags == REM_NOTIFY_PHYS_RAM_FLAGS_MMIO2, ("%#x\n", fFlags));
+
+ /*
+ * Base ram? Update GCPhysLastRam.
+ */
+ if (fFlags & REM_NOTIFY_PHYS_RAM_FLAGS_RAM)
+ {
+ if (GCPhys + (cb - 1) > pVM->rem.s.GCPhysLastRam)
+ {
+ AssertReleaseMsg(!pVM->rem.s.fGCPhysLastRamFixed, ("GCPhys=%RGp cb=%RGp\n", GCPhys, cb));
+ pVM->rem.s.GCPhysLastRam = GCPhys + (cb - 1);
+ }
+ }
+
+ /*
+ * Register the ram.
+ */
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY);
+ cpu_register_physical_memory_offset(GCPhys, cb, GCPhys, GCPhys);
+ PDMCritSectLeave(&pVM->rem.s.CritSectRegister);
+
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+}
+
+
+/**
+ * Notification about a successful MMR3PhysRomRegister() call.
+ *
+ * @param pVM VM handle.
+ * @param GCPhys The physical address of the ROM.
+ * @param cb The size of the ROM.
+ * @param pvCopy Pointer to the ROM copy.
+ * @param fShadow Whether it's currently writable shadow ROM or normal readonly ROM.
+ * This function will be called when ever the protection of the
+ * shadow ROM changes (at reset and end of POST).
+ */
+REMR3DECL(void) REMR3NotifyPhysRomRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, void *pvCopy, bool fShadow)
+{
+ Log(("REMR3NotifyPhysRomRegister: GCPhys=%RGp cb=%d fShadow=%RTbool\n", GCPhys, cb, fShadow));
+ VM_ASSERT_EMT(pVM);
+
+ /*
+ * Validate input - we trust the caller.
+ */
+ Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+ Assert(cb);
+ Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb);
+
+ /*
+ * Register the rom.
+ */
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY);
+ cpu_register_physical_memory_offset(GCPhys, cb, GCPhys | (fShadow ? 0 : IO_MEM_ROM), GCPhys);
+ PDMCritSectLeave(&pVM->rem.s.CritSectRegister);
+
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+}
+
+
+/**
+ * Notification about a successful memory deregistration or reservation.
+ *
+ * @param pVM VM Handle.
+ * @param GCPhys Start physical address.
+ * @param cb The size of the range.
+ */
+REMR3DECL(void) REMR3NotifyPhysRamDeregister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb)
+{
+ Log(("REMR3NotifyPhysRamDeregister: GCPhys=%RGp cb=%d\n", GCPhys, cb));
+ VM_ASSERT_EMT(pVM);
+
+ /*
+ * Validate input - we trust the caller.
+ */
+ Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+ Assert(cb);
+ Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb);
+
+ /*
+ * Unassigning the memory.
+ */
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY);
+ cpu_register_physical_memory_offset(GCPhys, cb, IO_MEM_UNASSIGNED, GCPhys);
+ PDMCritSectLeave(&pVM->rem.s.CritSectRegister);
+
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+}
+
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalRegister() call.
+ *
+ * @param pVM VM Handle.
+ * @param enmKind Kind of access handler.
+ * @param GCPhys Handler range address.
+ * @param cb Size of the handler range.
+ * @param fHasHCHandler Set if the handler has a HC callback function.
+ *
+ * @remark MMR3PhysRomRegister assumes that this function will not apply the
+ * Handler memory type to memory which has no HC handler.
+ */
+static void remR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb,
+ bool fHasHCHandler)
+{
+ Log(("REMR3NotifyHandlerPhysicalRegister: enmKind=%d GCPhys=%RGp cb=%RGp fHasHCHandler=%d\n",
+ enmKind, GCPhys, cb, fHasHCHandler));
+
+ VM_ASSERT_EMT(pVM);
+ Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+ Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb);
+
+
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY);
+ if (enmKind == PGMPHYSHANDLERKIND_MMIO)
+ cpu_register_physical_memory_offset(GCPhys, cb, pVM->rem.s.iMMIOMemType, GCPhys);
+ else if (fHasHCHandler)
+ cpu_register_physical_memory_offset(GCPhys, cb, pVM->rem.s.iHandlerMemType, GCPhys);
+ PDMCritSectLeave(&pVM->rem.s.CritSectRegister);
+
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+}
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalRegister() call.
+ *
+ * @param pVM VM Handle.
+ * @param enmKind Kind of access handler.
+ * @param GCPhys Handler range address.
+ * @param cb Size of the handler range.
+ * @param fHasHCHandler Set if the handler has a HC callback function.
+ *
+ * @remark MMR3PhysRomRegister assumes that this function will not apply the
+ * Handler memory type to memory which has no HC handler.
+ */
+REMR3DECL(void) REMR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb,
+ bool fHasHCHandler)
+{
+ REMR3ReplayHandlerNotifications(pVM);
+
+ remR3NotifyHandlerPhysicalRegister(pVM, enmKind, GCPhys, cb, fHasHCHandler);
+}
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalDeregister() operation.
+ *
+ * @param pVM VM Handle.
+ * @param enmKind Kind of access handler.
+ * @param GCPhys Handler range address.
+ * @param cb Size of the handler range.
+ * @param fHasHCHandler Set if the handler has a HC callback function.
+ * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory.
+ */
+static void remR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb,
+ bool fHasHCHandler, bool fRestoreAsRAM)
+{
+ Log(("REMR3NotifyHandlerPhysicalDeregister: enmKind=%d GCPhys=%RGp cb=%RGp fHasHCHandler=%RTbool fRestoreAsRAM=%RTbool RAM=%08x\n",
+ enmKind, GCPhys, cb, fHasHCHandler, fRestoreAsRAM, MMR3PhysGetRamSize(pVM)));
+ VM_ASSERT_EMT(pVM);
+
+
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY);
+ /** @todo this isn't right, MMIO can (in theory) be restored as RAM. */
+ if (enmKind == PGMPHYSHANDLERKIND_MMIO)
+ cpu_register_physical_memory_offset(GCPhys, cb, IO_MEM_UNASSIGNED, GCPhys);
+ else if (fHasHCHandler)
+ {
+ if (!fRestoreAsRAM)
+ {
+ Assert(GCPhys > MMR3PhysGetRamSize(pVM));
+ cpu_register_physical_memory_offset(GCPhys, cb, IO_MEM_UNASSIGNED, GCPhys);
+ }
+ else
+ {
+ Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys);
+ Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb);
+ cpu_register_physical_memory_offset(GCPhys, cb, GCPhys, GCPhys);
+ }
+ }
+ PDMCritSectLeave(&pVM->rem.s.CritSectRegister);
+
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+}
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalDeregister() operation.
+ *
+ * @param pVM VM Handle.
+ * @param enmKind Kind of access handler.
+ * @param GCPhys Handler range address.
+ * @param cb Size of the handler range.
+ * @param fHasHCHandler Set if the handler has a HC callback function.
+ * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory.
+ */
+REMR3DECL(void) REMR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+ REMR3ReplayHandlerNotifications(pVM);
+ remR3NotifyHandlerPhysicalDeregister(pVM, enmKind, GCPhys, cb, fHasHCHandler, fRestoreAsRAM);
+}
+
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalModify() call.
+ *
+ * @param pVM VM Handle.
+ * @param enmKind Kind of access handler.
+ * @param GCPhysOld Old handler range address.
+ * @param GCPhysNew New handler range address.
+ * @param cb Size of the handler range.
+ * @param fHasHCHandler Set if the handler has a HC callback function.
+ * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory.
+ */
+static void remR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+ Log(("REMR3NotifyHandlerPhysicalModify: enmKind=%d GCPhysOld=%RGp GCPhysNew=%RGp cb=%RGp fHasHCHandler=%RTbool fRestoreAsRAM=%RTbool\n",
+ enmKind, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM));
+ VM_ASSERT_EMT(pVM);
+ AssertReleaseMsg(enmKind != PGMPHYSHANDLERKIND_MMIO, ("enmKind=%d\n", enmKind));
+
+ if (fHasHCHandler)
+ {
+ ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll);
+
+ /*
+ * Reset the old page.
+ */
+ PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY);
+ if (!fRestoreAsRAM)
+ cpu_register_physical_memory_offset(GCPhysOld, cb, IO_MEM_UNASSIGNED, GCPhysOld);
+ else
+ {
+ /* This is not perfect, but it'll do for PD monitoring... */
+ Assert(cb == PAGE_SIZE);
+ Assert(RT_ALIGN_T(GCPhysOld, PAGE_SIZE, RTGCPHYS) == GCPhysOld);
+ cpu_register_physical_memory_offset(GCPhysOld, cb, GCPhysOld, GCPhysOld);
+ }
+
+ /*
+ * Update the new page.
+ */
+ Assert(RT_ALIGN_T(GCPhysNew, PAGE_SIZE, RTGCPHYS) == GCPhysNew);
+ Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb);
+ cpu_register_physical_memory_offset(GCPhysNew, cb, pVM->rem.s.iHandlerMemType, GCPhysNew);
+ PDMCritSectLeave(&pVM->rem.s.CritSectRegister);
+
+ ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll);
+ }
+}
+
+/**
+ * Notification about a successful PGMR3HandlerPhysicalModify() call.
+ *
+ * @param pVM VM Handle.
+ * @param enmKind Kind of access handler.
+ * @param GCPhysOld Old handler range address.
+ * @param GCPhysNew New handler range address.
+ * @param cb Size of the handler range.
+ * @param fHasHCHandler Set if the handler has a HC callback function.
+ * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory.
+ */
+REMR3DECL(void) REMR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM)
+{
+ REMR3ReplayHandlerNotifications(pVM);
+
+ remR3NotifyHandlerPhysicalModify(pVM, enmKind, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM);
+}
+
+/**
+ * Checks if we're handling access to this page or not.
+ *
+ * @returns true if we're trapping access.
+ * @returns false if we aren't.
+ * @param pVM The VM handle.
+ * @param GCPhys The physical address.
+ *
+ * @remark This function will only work correctly in VBOX_STRICT builds!
+ */
+REMR3DECL(bool) REMR3IsPageAccessHandled(PVM pVM, RTGCPHYS GCPhys)
+{
+#ifdef VBOX_STRICT
+ ram_addr_t off;
+ REMR3ReplayHandlerNotifications(pVM);
+
+ off = get_phys_page_offset(GCPhys);
+ return (off & PAGE_OFFSET_MASK) == pVM->rem.s.iHandlerMemType
+ || (off & PAGE_OFFSET_MASK) == pVM->rem.s.iMMIOMemType
+ || (off & PAGE_OFFSET_MASK) == IO_MEM_ROM;
+#else
+ return false;
+#endif
+}
+
+
+/**
+ * Deals with a rare case in get_phys_addr_code where the code
+ * is being monitored.
+ *
+ * It could also be an MMIO page, in which case we will raise a fatal error.
+ *
+ * @returns The physical address corresponding to addr.
+ * @param env The cpu environment.
+ * @param addr The virtual address.
+ * @param pTLBEntry The TLB entry.
+ * @param IoTlbEntry The I/O TLB entry address.
+ */
+target_ulong remR3PhysGetPhysicalAddressCode(CPUX86State *env,
+ target_ulong addr,
+ CPUTLBEntry *pTLBEntry,
+ target_phys_addr_t IoTlbEntry)
+{
+ PVM pVM = env->pVM;
+
+ if ((IoTlbEntry & ~TARGET_PAGE_MASK) == pVM->rem.s.iHandlerMemType)
+ {
+ /* If code memory is being monitored, appropriate IOTLB entry will have
+ handler IO type, and addend will provide real physical address, no
+ matter if we store VA in TLB or not, as handlers are always passed PA */
+ target_ulong ret = (IoTlbEntry & TARGET_PAGE_MASK) + addr;
+ return ret;
+ }
+ LogRel(("\nTrying to execute code with memory type addr_code=%RGv addend=%RGp at %RGv! (iHandlerMemType=%#x iMMIOMemType=%#x IOTLB=%RGp)\n"
+ "*** handlers\n",
+ (RTGCPTR)pTLBEntry->addr_code, (RTGCPHYS)pTLBEntry->addend, (RTGCPTR)addr, pVM->rem.s.iHandlerMemType, pVM->rem.s.iMMIOMemType, (RTGCPHYS)IoTlbEntry));
+ DBGFR3Info(pVM->pUVM, "handlers", NULL, DBGFR3InfoLogRelHlp());
+ LogRel(("*** mmio\n"));
+ DBGFR3Info(pVM->pUVM, "mmio", NULL, DBGFR3InfoLogRelHlp());
+ LogRel(("*** phys\n"));
+ DBGFR3Info(pVM->pUVM, "phys", NULL, DBGFR3InfoLogRelHlp());
+ cpu_abort(env, "Trying to execute code with memory type addr_code=%RGv addend=%RGp at %RGv. (iHandlerMemType=%#x iMMIOMemType=%#x)\n",
+ (RTGCPTR)pTLBEntry->addr_code, (RTGCPHYS)pTLBEntry->addend, (RTGCPTR)addr, pVM->rem.s.iHandlerMemType, pVM->rem.s.iMMIOMemType);
+ AssertFatalFailed();
+}
+
+/**
+ * Read guest RAM and ROM.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ * @param pvDst The destination address.
+ * @param cb Number of bytes
+ */
+void remR3PhysRead(RTGCPHYS SrcGCPhys, void *pvDst, unsigned cb)
+{
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ VBOXSTRICTRC rcStrict = PGMPhysRead(cpu_single_env->pVM, SrcGCPhys, pvDst, cb, PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("read(%d): %08x\n", cb, (uint32_t)SrcGCPhys));
+#endif
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 8-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+RTCCUINTREG remR3PhysReadU8(RTGCPHYS SrcGCPhys)
+{
+ uint8_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU8(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("readu8: %x <- %08x\n", val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, signed 8-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+RTCCINTREG remR3PhysReadS8(RTGCPHYS SrcGCPhys)
+{
+ int8_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU8(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("reads8: %x <- %08x\n", val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 16-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+RTCCUINTREG remR3PhysReadU16(RTGCPHYS SrcGCPhys)
+{
+ uint16_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU16(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("readu16: %x <- %08x\n", val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, signed 16-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+RTCCINTREG remR3PhysReadS16(RTGCPHYS SrcGCPhys)
+{
+ int16_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU16(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("reads16: %x <- %08x\n", (uint16_t)val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 32-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+RTCCUINTREG remR3PhysReadU32(RTGCPHYS SrcGCPhys)
+{
+ uint32_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU32(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("readu32: %x <- %08x\n", val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, signed 32-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+RTCCINTREG remR3PhysReadS32(RTGCPHYS SrcGCPhys)
+{
+ int32_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU32(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("reads32: %x <- %08x\n", val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, unsigned 64-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+uint64_t remR3PhysReadU64(RTGCPHYS SrcGCPhys)
+{
+ uint64_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU64(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("readu64: %llx <- %08x\n", val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Read guest RAM and ROM, signed 64-bit.
+ *
+ * @param SrcGCPhys The source address (guest physical).
+ */
+int64_t remR3PhysReadS64(RTGCPHYS SrcGCPhys)
+{
+ int64_t val;
+ STAM_PROFILE_ADV_START(&gStatMemRead, a);
+ VBOX_CHECK_ADDR(SrcGCPhys);
+ val = PGMR3PhysReadU64(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemRead, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("reads64: %llx <- %08x\n", val, (uint32_t)SrcGCPhys));
+#endif
+ return val;
+}
+
+
+/**
+ * Write guest RAM.
+ *
+ * @param DstGCPhys The destination address (guest physical).
+ * @param pvSrc The source address.
+ * @param cb Number of bytes to write
+ */
+void remR3PhysWrite(RTGCPHYS DstGCPhys, const void *pvSrc, unsigned cb)
+{
+ STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+ VBOX_CHECK_ADDR(DstGCPhys);
+ VBOXSTRICTRC rcStrict = PGMPhysWrite(cpu_single_env->pVM, DstGCPhys, pvSrc, cb, PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+ STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("write(%d): %08x\n", cb, (uint32_t)DstGCPhys));
+#endif
+}
+
+
+/**
+ * Write guest RAM, unsigned 8-bit.
+ *
+ * @param DstGCPhys The destination address (guest physical).
+ * @param val Value
+ */
+void remR3PhysWriteU8(RTGCPHYS DstGCPhys, uint8_t val)
+{
+ STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+ VBOX_CHECK_ADDR(DstGCPhys);
+ PGMR3PhysWriteU8(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("writeu8: %x -> %08x\n", val, (uint32_t)DstGCPhys));
+#endif
+}
+
+
+/**
+ * Write guest RAM, unsigned 8-bit.
+ *
+ * @param DstGCPhys The destination address (guest physical).
+ * @param val Value
+ */
+void remR3PhysWriteU16(RTGCPHYS DstGCPhys, uint16_t val)
+{
+ STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+ VBOX_CHECK_ADDR(DstGCPhys);
+ PGMR3PhysWriteU16(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("writeu16: %x -> %08x\n", val, (uint32_t)DstGCPhys));
+#endif
+}
+
+
+/**
+ * Write guest RAM, unsigned 32-bit.
+ *
+ * @param DstGCPhys The destination address (guest physical).
+ * @param val Value
+ */
+void remR3PhysWriteU32(RTGCPHYS DstGCPhys, uint32_t val)
+{
+ STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+ VBOX_CHECK_ADDR(DstGCPhys);
+ PGMR3PhysWriteU32(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("writeu32: %x -> %08x\n", val, (uint32_t)DstGCPhys));
+#endif
+}
+
+
+/**
+ * Write guest RAM, unsigned 64-bit.
+ *
+ * @param DstGCPhys The destination address (guest physical).
+ * @param val Value
+ */
+void remR3PhysWriteU64(RTGCPHYS DstGCPhys, uint64_t val)
+{
+ STAM_PROFILE_ADV_START(&gStatMemWrite, a);
+ VBOX_CHECK_ADDR(DstGCPhys);
+ PGMR3PhysWriteU64(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM);
+ STAM_PROFILE_ADV_STOP(&gStatMemWrite, a);
+#ifdef VBOX_DEBUG_PHYS
+ LogRel(("writeu64: %llx -> %08x\n", val, (uint32_t)DstGCPhys));
+#endif
+}
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_MMIO
+
+/** Read MMIO memory. */
+static uint32_t remR3MMIOReadU8(void *pvEnv, target_phys_addr_t GCPhys)
+{
+ CPUX86State *env = (CPUX86State *)pvEnv;
+ uint32_t u32 = 0;
+ int rc = IOMMMIORead(env->pVM, env->pVCpu, GCPhys, &u32, 1);
+ AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc);
+ Log2(("remR3MMIOReadU8: GCPhys=%RGp -> %02x\n", (RTGCPHYS)GCPhys, u32));
+ return u32;
+}
+
+/** Read MMIO memory. */
+static uint32_t remR3MMIOReadU16(void *pvEnv, target_phys_addr_t GCPhys)
+{
+ CPUX86State *env = (CPUX86State *)pvEnv;
+ uint32_t u32 = 0;
+ int rc = IOMMMIORead(env->pVM, env->pVCpu, GCPhys, &u32, 2);
+ AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc);
+ Log2(("remR3MMIOReadU16: GCPhys=%RGp -> %04x\n", (RTGCPHYS)GCPhys, u32));
+ return u32;
+}
+
+/** Read MMIO memory. */
+static uint32_t remR3MMIOReadU32(void *pvEnv, target_phys_addr_t GCPhys)
+{
+ CPUX86State *env = (CPUX86State *)pvEnv;
+ uint32_t u32 = 0;
+ int rc = IOMMMIORead(env->pVM, env->pVCpu, GCPhys, &u32, 4);
+ AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc);
+ Log2(("remR3MMIOReadU32: GCPhys=%RGp -> %08x\n", (RTGCPHYS)GCPhys, u32));
+ return u32;
+}
+
+/** Write to MMIO memory. */
+static void remR3MMIOWriteU8(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32)
+{
+ CPUX86State *env = (CPUX86State *)pvEnv;
+ int rc;
+ Log2(("remR3MMIOWriteU8: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32));
+ rc = IOMMMIOWrite(env->pVM, env->pVCpu, GCPhys, u32, 1);
+ AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc);
+}
+
+/** Write to MMIO memory. */
+static void remR3MMIOWriteU16(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32)
+{
+ CPUX86State *env = (CPUX86State *)pvEnv;
+ int rc;
+ Log2(("remR3MMIOWriteU16: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32));
+ rc = IOMMMIOWrite(env->pVM, env->pVCpu, GCPhys, u32, 2);
+ AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc);
+}
+
+/** Write to MMIO memory. */
+static void remR3MMIOWriteU32(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32)
+{
+ CPUX86State *env = (CPUX86State *)pvEnv;
+ int rc;
+ Log2(("remR3MMIOWriteU32: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32));
+ rc = IOMMMIOWrite(env->pVM, env->pVCpu, GCPhys, u32, 4);
+ AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc);
+}
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_HANDLER
+
+/* !!!WARNING!!! This is extremely hackish right now, we assume it's only for LFB access! !!!WARNING!!! */
+
+static uint32_t remR3HandlerReadU8(void *pvVM, target_phys_addr_t GCPhys)
+{
+ uint8_t u8;
+ Log2(("remR3HandlerReadU8: GCPhys=%RGp\n", (RTGCPHYS)GCPhys));
+ VBOXSTRICTRC rcStrict = PGMPhysRead((PVM)pvVM, GCPhys, &u8, sizeof(u8), PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+ return u8;
+}
+
+static uint32_t remR3HandlerReadU16(void *pvVM, target_phys_addr_t GCPhys)
+{
+ uint16_t u16;
+ Log2(("remR3HandlerReadU16: GCPhys=%RGp\n", (RTGCPHYS)GCPhys));
+ VBOXSTRICTRC rcStrict = PGMPhysRead((PVM)pvVM, GCPhys, &u16, sizeof(u16), PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+ return u16;
+}
+
+static uint32_t remR3HandlerReadU32(void *pvVM, target_phys_addr_t GCPhys)
+{
+ uint32_t u32;
+ Log2(("remR3HandlerReadU32: GCPhys=%RGp\n", (RTGCPHYS)GCPhys));
+ VBOXSTRICTRC rcStrict = PGMPhysRead((PVM)pvVM, GCPhys, &u32, sizeof(u32), PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+ return u32;
+}
+
+static void remR3HandlerWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+ Log2(("remR3HandlerWriteU8: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32));
+ VBOXSTRICTRC rcStrict = PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint8_t), PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+}
+
+static void remR3HandlerWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+ Log2(("remR3HandlerWriteU16: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32));
+ VBOXSTRICTRC rcStrict = PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint16_t), PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+}
+
+static void remR3HandlerWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32)
+{
+ Log2(("remR3HandlerWriteU32: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32));
+ VBOXSTRICTRC rcStrict = PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint32_t), PGMACCESSORIGIN_REM);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict);
+}
+
+/* -+- disassembly -+- */
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_DISAS
+
+
+/**
+ * Enables or disables singled stepped disassembly.
+ *
+ * @returns VBox status code.
+ * @param pVM VM handle.
+ * @param fEnable To enable set this flag, to disable clear it.
+ */
+static DECLCALLBACK(int) remR3DisasEnableStepping(PVM pVM, bool fEnable)
+{
+ LogFlow(("remR3DisasEnableStepping: fEnable=%d\n", fEnable));
+ VM_ASSERT_EMT(pVM);
+
+ if (fEnable)
+ pVM->rem.s.Env.state |= CPU_EMULATE_SINGLE_STEP;
+ else
+ pVM->rem.s.Env.state &= ~CPU_EMULATE_SINGLE_STEP;
+#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
+ cpu_single_step(&pVM->rem.s.Env, fEnable);
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Enables or disables singled stepped disassembly.
+ *
+ * @returns VBox status code.
+ * @param pVM VM handle.
+ * @param fEnable To enable set this flag, to disable clear it.
+ */
+REMR3DECL(int) REMR3DisasEnableStepping(PVM pVM, bool fEnable)
+{
+ int rc;
+
+ LogFlow(("REMR3DisasEnableStepping: fEnable=%d\n", fEnable));
+ if (VM_IS_EMT(pVM))
+ return remR3DisasEnableStepping(pVM, fEnable);
+
+ rc = VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)remR3DisasEnableStepping, 2, pVM, fEnable);
+ AssertRC(rc);
+ return rc;
+}
+
+
+#ifdef VBOX_WITH_DEBUGGER
+/**
+ * External Debugger Command: .remstep [on|off|1|0]
+ */
+static DECLCALLBACK(int) remR3CmdDisasEnableStepping(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM,
+ PCDBGCVAR paArgs, unsigned cArgs)
+{
+ int rc;
+ PVM pVM = pUVM->pVM;
+
+ if (cArgs == 0)
+ /*
+ * Print the current status.
+ */
+ rc = DBGCCmdHlpPrintf(pCmdHlp, "DisasStepping is %s\n",
+ pVM->rem.s.Env.state & CPU_EMULATE_SINGLE_STEP ? "enabled" : "disabled");
+ else
+ {
+ /*
+ * Convert the argument and change the mode.
+ */
+ bool fEnable;
+ rc = DBGCCmdHlpVarToBool(pCmdHlp, &paArgs[0], &fEnable);
+ if (RT_SUCCESS(rc))
+ {
+ rc = REMR3DisasEnableStepping(pVM, fEnable);
+ if (RT_SUCCESS(rc))
+ rc = DBGCCmdHlpPrintf(pCmdHlp, "DisasStepping was %s\n", fEnable ? "enabled" : "disabled");
+ else
+ rc = DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "REMR3DisasEnableStepping");
+ }
+ else
+ rc = DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "DBGCCmdHlpVarToBool");
+ }
+ return rc;
+}
+#endif /* VBOX_WITH_DEBUGGER */
+
+
+/**
+ * Disassembles one instruction and prints it to the log.
+ *
+ * @returns Success indicator.
+ * @param env Pointer to the recompiler CPU structure.
+ * @param f32BitCode Indicates that whether or not the code should
+ * be disassembled as 16 or 32 bit. If -1 the CS
+ * selector will be inspected.
+ * @param pszPrefix
+ */
+bool remR3DisasInstr(CPUX86State *env, int f32BitCode, char *pszPrefix)
+{
+ PVM pVM = env->pVM;
+ const bool fLog = LogIsEnabled();
+ const bool fLog2 = LogIs2Enabled();
+ int rc = VINF_SUCCESS;
+
+ /*
+ * Don't bother if there ain't any log output to do.
+ */
+ if (!fLog && !fLog2)
+ return true;
+
+ /*
+ * Update the state so DBGF reads the correct register values.
+ */
+ remR3StateUpdate(pVM, env->pVCpu);
+
+ /*
+ * Log registers if requested.
+ */
+ if (fLog2)
+ DBGFR3_INFO_LOG(pVM, env->pVCpu, "cpumguest", pszPrefix);
+
+ /*
+ * Disassemble to log.
+ */
+ if (fLog)
+ {
+ PVMCPU pVCpu = VMMGetCpu(pVM);
+ char szBuf[256];
+ szBuf[0] = '\0';
+ int rc = DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM,
+ pVCpu->idCpu,
+ 0, /* Sel */ 0, /* GCPtr */
+ DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
+ szBuf,
+ sizeof(szBuf),
+ NULL);
+ if (RT_FAILURE(rc))
+ RTStrPrintf(szBuf, sizeof(szBuf), "DBGFR3DisasInstrEx failed with rc=%Rrc\n", rc);
+ if (pszPrefix && *pszPrefix)
+ RTLogPrintf("%s-CPU%d: %s\n", pszPrefix, pVCpu->idCpu, szBuf);
+ else
+ RTLogPrintf("CPU%d: %s\n", pVCpu->idCpu, szBuf);
+ }
+
+ return RT_SUCCESS(rc);
+}
+
+
+/**
+ * Disassemble recompiled code.
+ *
+ * @param phFileIgnored Ignored, logfile usually.
+ * @param pvCode Pointer to the code block.
+ * @param cb Size of the code block.
+ */
+void disas(FILE *phFileIgnored, void *pvCode, unsigned long cb)
+{
+ if (LogIs2Enabled())
+ {
+ unsigned off = 0;
+ char szOutput[256];
+ DISCPUSTATE Cpu;
+#ifdef RT_ARCH_X86
+ DISCPUMODE enmCpuMode = DISCPUMODE_32BIT;
+#else
+ DISCPUMODE enmCpuMode = DISCPUMODE_64BIT;
+#endif
+
+ RTLogPrintf("Recompiled Code: %p %#lx (%ld) bytes\n", pvCode, cb, cb);
+ while (off < cb)
+ {
+ uint32_t cbInstr;
+ int rc = DISInstrToStr((uint8_t const *)pvCode + off, enmCpuMode,
+ &Cpu, &cbInstr, szOutput, sizeof(szOutput));
+ if (RT_SUCCESS(rc))
+ RTLogPrintf("%s", szOutput);
+ else
+ {
+ RTLogPrintf("disas error %Rrc\n", rc);
+ cbInstr = 1;
+ }
+ off += cbInstr;
+ }
+ }
+}
+
+
+/**
+ * Disassemble guest code.
+ *
+ * @param phFileIgnored Ignored, logfile usually.
+ * @param uCode The guest address of the code to disassemble. (flat?)
+ * @param cb Number of bytes to disassemble.
+ * @param fFlags Flags, probably something which tells if this is 16, 32 or 64 bit code.
+ */
+void target_disas(FILE *phFileIgnored, target_ulong uCode, target_ulong cb, int fFlags)
+{
+ if (LogIs2Enabled())
+ {
+ PVM pVM = cpu_single_env->pVM;
+ PVMCPU pVCpu = cpu_single_env->pVCpu;
+ RTSEL cs;
+ RTGCUINTPTR eip;
+
+ Assert(pVCpu);
+
+ /*
+ * Update the state so DBGF reads the correct register values (flags).
+ */
+ remR3StateUpdate(pVM, pVCpu);
+
+ /*
+ * Do the disassembling.
+ */
+ RTLogPrintf("Guest Code: PC=%llx %llx bytes fFlags=%d\n", (uint64_t)uCode, (uint64_t)cb, fFlags);
+ cs = cpu_single_env->segs[R_CS].selector;
+ eip = uCode - cpu_single_env->segs[R_CS].base;
+ for (;;)
+ {
+ char szBuf[256];
+ uint32_t cbInstr;
+ int rc = DBGFR3DisasInstrEx(pVM->pUVM,
+ pVCpu->idCpu,
+ cs,
+ eip,
+ DBGF_DISAS_FLAGS_DEFAULT_MODE,
+ szBuf, sizeof(szBuf),
+ &cbInstr);
+ if (RT_SUCCESS(rc))
+ RTLogPrintf("%llx %s\n", (uint64_t)uCode, szBuf);
+ else
+ {
+ RTLogPrintf("%llx %04x:%llx: %s\n", (uint64_t)uCode, cs, (uint64_t)eip, szBuf);
+ cbInstr = 1;
+ }
+
+ /* next */
+ if (cb <= cbInstr)
+ break;
+ cb -= cbInstr;
+ uCode += cbInstr;
+ eip += cbInstr;
+ }
+ }
+}
+
+
+/**
+ * Looks up a guest symbol.
+ *
+ * @returns Pointer to symbol name. This is a static buffer.
+ * @param orig_addr The address in question.
+ */
+const char *lookup_symbol(target_ulong orig_addr)
+{
+ PVM pVM = cpu_single_env->pVM;
+ RTGCINTPTR off = 0;
+ RTDBGSYMBOL Sym;
+ DBGFADDRESS Addr;
+
+ int rc = DBGFR3AsSymbolByAddr(pVM->pUVM, DBGF_AS_GLOBAL, DBGFR3AddrFromFlat(pVM->pUVM, &Addr, orig_addr),
+ RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED,
+ &off, &Sym, NULL /*phMod*/);
+ if (RT_SUCCESS(rc))
+ {
+ static char szSym[sizeof(Sym.szName) + 48];
+ if (!off)
+ RTStrPrintf(szSym, sizeof(szSym), "%s\n", Sym.szName);
+ else if (off > 0)
+ RTStrPrintf(szSym, sizeof(szSym), "%s+%x\n", Sym.szName, off);
+ else
+ RTStrPrintf(szSym, sizeof(szSym), "%s-%x\n", Sym.szName, -off);
+ return szSym;
+ }
+ return "<N/A>";
+}
+
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM
+
+
+/* -+- FF notifications -+- */
+
+/**
+ * Notification about the interrupt FF being set.
+ *
+ * @param pVM VM Handle.
+ * @param pVCpu VMCPU Handle.
+ * @thread The emulation thread.
+ */
+REMR3DECL(void) REMR3NotifyInterruptSet(PVM pVM, PVMCPU pVCpu)
+{
+ LogFlow(("REMR3NotifyInterruptSet: fInRem=%d interrupts %s\n", pVM->rem.s.fInREM,
+ (pVM->rem.s.Env.eflags & IF_MASK) && !(pVM->rem.s.Env.hflags & HF_INHIBIT_IRQ_MASK) ? "enabled" : "disabled"));
+ if (pVM->rem.s.fInREM)
+ ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_HARD);
+}
+
+
+/**
+ * Notification about the interrupt FF being set.
+ *
+ * @param pVM VM Handle.
+ * @param pVCpu VMCPU Handle.
+ * @thread Any.
+ */
+REMR3DECL(void) REMR3NotifyInterruptClear(PVM pVM, PVMCPU pVCpu)
+{
+ LogFlow(("REMR3NotifyInterruptClear:\n"));
+ if (pVM->rem.s.fInREM)
+ cpu_reset_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+}
+
+
+/**
+ * Notification about pending timer(s).
+ *
+ * @param pVM VM Handle.
+ * @param pVCpuDst The target cpu for this notification.
+ * TM will not broadcast pending timer events, but use
+ * a dedicated EMT for them. So, only interrupt REM
+ * execution if the given CPU is executing in REM.
+ * @thread Any.
+ */
+REMR3DECL(void) REMR3NotifyTimerPending(PVM pVM, PVMCPU pVCpuDst)
+{
+#ifndef DEBUG_bird
+ LogFlow(("REMR3NotifyTimerPending: fInRem=%d\n", pVM->rem.s.fInREM));
+#endif
+ if (pVM->rem.s.fInREM)
+ {
+ if (pVM->rem.s.Env.pVCpu == pVCpuDst)
+ {
+ LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("REMR3NotifyTimerPending: setting\n"));
+ ASMAtomicOrS32((int32_t volatile *)&pVM->rem.s.Env.interrupt_request,
+ CPU_INTERRUPT_EXTERNAL_TIMER);
+ }
+ else
+ LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("REMR3NotifyTimerPending: pVCpu:%p != pVCpuDst:%p\n", pVM->rem.s.Env.pVCpu, pVCpuDst));
+ }
+ else
+ LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("REMR3NotifyTimerPending: !fInREM; cpu state=%d\n", VMCPU_GET_STATE(pVCpuDst)));
+}
+
+
+/**
+ * Notification about pending DMA transfers.
+ *
+ * @param pVM VM Handle.
+ * @thread Any.
+ */
+REMR3DECL(void) REMR3NotifyDmaPending(PVM pVM)
+{
+ LogFlow(("REMR3NotifyDmaPending: fInRem=%d\n", pVM->rem.s.fInREM));
+ if (pVM->rem.s.fInREM)
+ ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_DMA);
+}
+
+
+/**
+ * Notification about pending timer(s).
+ *
+ * @param pVM VM Handle.
+ * @thread Any.
+ */
+REMR3DECL(void) REMR3NotifyQueuePending(PVM pVM)
+{
+ LogFlow(("REMR3NotifyQueuePending: fInRem=%d\n", pVM->rem.s.fInREM));
+ if (pVM->rem.s.fInREM)
+ ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_EXIT);
+}
+
+
+/**
+ * Notification about pending FF set by an external thread.
+ *
+ * @param pVM VM handle.
+ * @thread Any.
+ */
+REMR3DECL(void) REMR3NotifyFF(PVM pVM)
+{
+ LogFlow(("REMR3NotifyFF: fInRem=%d\n", pVM->rem.s.fInREM));
+ if (pVM->rem.s.fInREM)
+ ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_EXIT);
+}
+
+
+#ifdef VBOX_WITH_STATISTICS
+void remR3ProfileStart(int statcode)
+{
+ STAMPROFILEADV *pStat;
+ switch(statcode)
+ {
+ case STATS_EMULATE_SINGLE_INSTR:
+ pStat = &gStatExecuteSingleInstr;
+ break;
+ case STATS_QEMU_COMPILATION:
+ pStat = &gStatCompilationQEmu;
+ break;
+ case STATS_QEMU_RUN_EMULATED_CODE:
+ pStat = &gStatRunCodeQEmu;
+ break;
+ case STATS_QEMU_TOTAL:
+ pStat = &gStatTotalTimeQEmu;
+ break;
+ case STATS_QEMU_RUN_TIMERS:
+ pStat = &gStatTimers;
+ break;
+ case STATS_TLB_LOOKUP:
+ pStat= &gStatTBLookup;
+ break;
+ case STATS_IRQ_HANDLING:
+ pStat= &gStatIRQ;
+ break;
+ case STATS_RAW_CHECK:
+ pStat = &gStatRawCheck;
+ break;
+
+ default:
+ AssertMsgFailed(("unknown stat %d\n", statcode));
+ return;
+ }
+ STAM_PROFILE_ADV_START(pStat, a);
+}
+
+
+void remR3ProfileStop(int statcode)
+{
+ STAMPROFILEADV *pStat;
+ switch(statcode)
+ {
+ case STATS_EMULATE_SINGLE_INSTR:
+ pStat = &gStatExecuteSingleInstr;
+ break;
+ case STATS_QEMU_COMPILATION:
+ pStat = &gStatCompilationQEmu;
+ break;
+ case STATS_QEMU_RUN_EMULATED_CODE:
+ pStat = &gStatRunCodeQEmu;
+ break;
+ case STATS_QEMU_TOTAL:
+ pStat = &gStatTotalTimeQEmu;
+ break;
+ case STATS_QEMU_RUN_TIMERS:
+ pStat = &gStatTimers;
+ break;
+ case STATS_TLB_LOOKUP:
+ pStat= &gStatTBLookup;
+ break;
+ case STATS_IRQ_HANDLING:
+ pStat= &gStatIRQ;
+ break;
+ case STATS_RAW_CHECK:
+ pStat = &gStatRawCheck;
+ break;
+ default:
+ AssertMsgFailed(("unknown stat %d\n", statcode));
+ return;
+ }
+ STAM_PROFILE_ADV_STOP(pStat, a);
+}
+#endif
+
+/**
+ * Raise an RC, force rem exit.
+ *
+ * @param pVM VM handle.
+ * @param rc The rc.
+ */
+void remR3RaiseRC(PVM pVM, int rc)
+{
+ Log(("remR3RaiseRC: rc=%Rrc\n", rc));
+ Assert(pVM->rem.s.fInREM);
+ VM_ASSERT_EMT(pVM);
+ pVM->rem.s.rc = rc;
+ cpu_interrupt(&pVM->rem.s.Env, CPU_INTERRUPT_RC);
+}
+
+
+/* -+- timers -+- */
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+ STAM_COUNTER_INC(&gStatCpuGetTSC);
+ return TMCpuTickGet(env->pVCpu);
+}
+
+
+/* -+- interrupts -+- */
+
+void cpu_set_ferr(CPUX86State *env)
+{
+ int rc = PDMIsaSetIrq(env->pVM, 13, 1, 0 /*uTagSrc*/);
+ LogFlow(("cpu_set_ferr: rc=%d\n", rc)); NOREF(rc);
+}
+
+int cpu_get_pic_interrupt(CPUX86State *env)
+{
+ uint8_t u8Interrupt;
+ int rc;
+
+ if (VMCPU_FF_TEST_AND_CLEAR(env->pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(env->pVCpu);
+
+ /* When we fail to forward interrupts directly in raw mode, we fall back to the recompiler.
+ * In that case we can't call PDMGetInterrupt anymore, because it has already cleared the interrupt
+ * with the (a)pic.
+ */
+ /* Note! We assume we will go directly to the recompiler to handle the pending interrupt! */
+ rc = PDMGetInterrupt(env->pVCpu, &u8Interrupt);
+ LogFlow(("cpu_get_pic_interrupt: u8Interrupt=%d rc=%Rrc pc=%04x:%08llx ~flags=%08llx\n",
+ u8Interrupt, rc, env->segs[R_CS].selector, (uint64_t)env->eip, (uint64_t)env->eflags));
+ if (RT_SUCCESS(rc))
+ {
+ if (VMCPU_FF_IS_ANY_SET(env->pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
+ env->interrupt_request |= CPU_INTERRUPT_HARD;
+ return u8Interrupt;
+ }
+ return -1;
+}
+
+
+/* -+- local apic -+- */
+
+#if 0 /* CPUMSetGuestMsr does this now. */
+void cpu_set_apic_base(CPUX86State *env, uint64_t val)
+{
+ int rc = PDMApicSetBase(env->pVM, val);
+ LogFlow(("cpu_set_apic_base: val=%#llx rc=%Rrc\n", val, rc)); NOREF(rc);
+}
+#endif
+
+uint64_t cpu_get_apic_base(CPUX86State *env)
+{
+ uint64_t u64;
+ VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(env->pVCpu, MSR_IA32_APICBASE, &u64);
+ if (RT_SUCCESS(rcStrict))
+ {
+ LogFlow(("cpu_get_apic_base: returns %#llx \n", u64));
+ return u64;
+ }
+ LogFlow(("cpu_get_apic_base: returns 0 (rc=%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+ return 0;
+}
+
+void cpu_set_apic_tpr(CPUX86State *env, uint8_t val)
+{
+ int rc = APICSetTpr(env->pVCpu, val << 4); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
+ LogFlow(("cpu_set_apic_tpr: val=%#x rc=%Rrc\n", val, rc)); NOREF(rc);
+}
+
+uint8_t cpu_get_apic_tpr(CPUX86State *env)
+{
+ uint8_t u8;
+ int rc = APICGetTpr(env->pVCpu, &u8, NULL, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ LogFlow(("cpu_get_apic_tpr: returns %#x\n", u8));
+ return u8 >> 4; /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
+ }
+ LogFlow(("cpu_get_apic_tpr: returns 0 (rc=%Rrc)\n", rc));
+ return 0;
+}
+
+/**
+ * Read an MSR.
+ *
+ * @retval 0 success.
+ * @retval -1 failure, raise \#GP(0).
+ * @param env The cpu state.
+ * @param idMsr The MSR to read.
+ * @param puValue Where to return the value.
+ */
+int cpu_rdmsr(CPUX86State *env, uint32_t idMsr, uint64_t *puValue)
+{
+ Assert(env->pVCpu);
+ return CPUMQueryGuestMsr(env->pVCpu, idMsr, puValue) == VINF_SUCCESS ? 0 : -1;
+}
+
+/**
+ * Write to an MSR.
+ *
+ * @retval 0 success.
+ * @retval -1 failure, raise \#GP(0).
+ * @param env The cpu state.
+ * @param idMsr The MSR to write to.
+ * @param uValue The value to write.
+ */
+int cpu_wrmsr(CPUX86State *env, uint32_t idMsr, uint64_t uValue)
+{
+ Assert(env->pVCpu);
+ return CPUMSetGuestMsr(env->pVCpu, idMsr, uValue) == VINF_SUCCESS ? 0 : -1;
+}
+
+/* -+- I/O Ports -+- */
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM_IOPORT
+
+void cpu_outb(CPUX86State *env, pio_addr_t addr, uint8_t val)
+{
+ int rc;
+
+ if (addr != 0x80 && addr != 0x70 && addr != 0x61)
+ Log2(("cpu_outb: addr=%#06x val=%#x\n", addr, val));
+
+ rc = IOMIOPortWrite(env->pVM, env->pVCpu, (RTIOPORT)addr, val, 1);
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ return;
+ if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+ {
+ Log(("cpu_outb: addr=%#06x val=%#x -> %Rrc\n", addr, val, rc));
+ remR3RaiseRC(env->pVM, rc);
+ return;
+ }
+ remAbort(rc, __FUNCTION__);
+}
+
+void cpu_outw(CPUX86State *env, pio_addr_t addr, uint16_t val)
+{
+ //Log2(("cpu_outw: addr=%#06x val=%#x\n", addr, val));
+ int rc = IOMIOPortWrite(env->pVM, env->pVCpu, (RTIOPORT)addr, val, 2);
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ return;
+ if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+ {
+ Log(("cpu_outw: addr=%#06x val=%#x -> %Rrc\n", addr, val, rc));
+ remR3RaiseRC(env->pVM, rc);
+ return;
+ }
+ remAbort(rc, __FUNCTION__);
+}
+
+void cpu_outl(CPUX86State *env, pio_addr_t addr, uint32_t val)
+{
+ int rc;
+ Log2(("cpu_outl: addr=%#06x val=%#x\n", addr, val));
+ rc = IOMIOPortWrite(env->pVM, env->pVCpu, (RTIOPORT)addr, val, 4);
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ return;
+ if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+ {
+ Log(("cpu_outl: addr=%#06x val=%#x -> %Rrc\n", addr, val, rc));
+ remR3RaiseRC(env->pVM, rc);
+ return;
+ }
+ remAbort(rc, __FUNCTION__);
+}
+
+uint8_t cpu_inb(CPUX86State *env, pio_addr_t addr)
+{
+ uint32_t u32 = 0;
+ int rc = IOMIOPortRead(env->pVM, env->pVCpu, (RTIOPORT)addr, &u32, 1);
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ {
+ if (/*addr != 0x61 && */addr != 0x71)
+ Log2(("cpu_inb: addr=%#06x -> %#x\n", addr, u32));
+ return (uint8_t)u32;
+ }
+ if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+ {
+ Log(("cpu_inb: addr=%#06x -> %#x rc=%Rrc\n", addr, u32, rc));
+ remR3RaiseRC(env->pVM, rc);
+ return (uint8_t)u32;
+ }
+ remAbort(rc, __FUNCTION__);
+ return UINT8_C(0xff);
+}
+
+uint16_t cpu_inw(CPUX86State *env, pio_addr_t addr)
+{
+ uint32_t u32 = 0;
+ int rc = IOMIOPortRead(env->pVM, env->pVCpu, (RTIOPORT)addr, &u32, 2);
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ {
+ Log2(("cpu_inw: addr=%#06x -> %#x\n", addr, u32));
+ return (uint16_t)u32;
+ }
+ if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+ {
+ Log(("cpu_inw: addr=%#06x -> %#x rc=%Rrc\n", addr, u32, rc));
+ remR3RaiseRC(env->pVM, rc);
+ return (uint16_t)u32;
+ }
+ remAbort(rc, __FUNCTION__);
+ return UINT16_C(0xffff);
+}
+
+uint32_t cpu_inl(CPUX86State *env, pio_addr_t addr)
+{
+ uint32_t u32 = 0;
+ int rc = IOMIOPortRead(env->pVM, env->pVCpu, (RTIOPORT)addr, &u32, 4);
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ {
+ Log2(("cpu_inl: addr=%#06x -> %#x\n", addr, u32));
+ return u32;
+ }
+ if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST)
+ {
+ Log(("cpu_inl: addr=%#06x -> %#x rc=%Rrc\n", addr, u32, rc));
+ remR3RaiseRC(env->pVM, rc);
+ return u32;
+ }
+ remAbort(rc, __FUNCTION__);
+ return UINT32_C(0xffffffff);
+}
+
+#undef LOG_GROUP
+#define LOG_GROUP LOG_GROUP_REM
+
+
+/* -+- helpers and misc other interfaces -+- */
+
+/**
+ * Perform the CPUID instruction.
+ *
+ * @param env Pointer to the recompiler CPU structure.
+ * @param idx The CPUID leaf (eax).
+ * @param idxSub The CPUID sub-leaf (ecx) where applicable.
+ * @param pEAX Where to store eax.
+ * @param pEBX Where to store ebx.
+ * @param pECX Where to store ecx.
+ * @param pEDX Where to store edx.
+ */
+void cpu_x86_cpuid(CPUX86State *env, uint32_t idx, uint32_t idxSub,
+ uint32_t *pEAX, uint32_t *pEBX, uint32_t *pECX, uint32_t *pEDX)
+{
+ NOREF(idxSub);
+ CPUMGetGuestCpuId(env->pVCpu, idx, idxSub, pEAX, pEBX, pECX, pEDX);
+}
+
+
+#if 0 /* not used */
+/**
+ * Interface for qemu hardware to report back fatal errors.
+ */
+void hw_error(const char *pszFormat, ...)
+{
+ /*
+ * Bitch about it.
+ */
+ /** @todo Add support for nested arg lists in the LogPrintfV routine! I've code for
+ * this in my Odin32 tree at home! */
+ va_list args;
+ va_start(args, pszFormat);
+ RTLogPrintf("fatal error in virtual hardware:");
+ RTLogPrintfV(pszFormat, args);
+ va_end(args);
+ AssertReleaseMsgFailed(("fatal error in virtual hardware: %s\n", pszFormat));
+
+ /*
+ * If we're in REM context we'll sync back the state before 'jumping' to
+ * the EMs failure handling.
+ */
+ PVM pVM = cpu_single_env->pVM;
+ if (pVM->rem.s.fInREM)
+ REMR3StateBack(pVM);
+ EMR3FatalError(pVM, VERR_REM_VIRTUAL_HARDWARE_ERROR);
+ AssertMsgFailed(("EMR3FatalError returned!\n"));
+}
+#endif
+
+/**
+ * Interface for the qemu cpu to report unhandled situation
+ * raising a fatal VM error.
+ */
+void cpu_abort(CPUX86State *env, const char *pszFormat, ...)
+{
+ va_list va;
+ PVM pVM;
+ PVMCPU pVCpu;
+ char szMsg[256];
+
+ /*
+ * Bitch about it.
+ */
+ RTLogFlags(NULL, "nodisabled nobuffered");
+ RTLogFlush(NULL);
+
+ va_start(va, pszFormat);
+#if defined(RT_OS_WINDOWS) && ARCH_BITS == 64
+ /* It's a bit complicated when mixing MSC and GCC on AMD64. This is a bit ugly, but it works. */
+ unsigned cArgs = 0;
+ uintptr_t auArgs[6] = {0,0,0,0,0,0};
+ const char *psz = strchr(pszFormat, '%');
+ while (psz && cArgs < 6)
+ {
+ auArgs[cArgs++] = va_arg(va, uintptr_t);
+ psz = strchr(psz + 1, '%');
+ }
+ switch (cArgs)
+ {
+ case 1: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0]); break;
+ case 2: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1]); break;
+ case 3: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2]); break;
+ case 4: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2], auArgs[3]); break;
+ case 5: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2], auArgs[3], auArgs[4]); break;
+ case 6: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2], auArgs[3], auArgs[4], auArgs[5]); break;
+ default:
+ case 0: RTStrPrintf(szMsg, sizeof(szMsg), "%s", pszFormat); break;
+ }
+#else
+ RTStrPrintfV(szMsg, sizeof(szMsg), pszFormat, va);
+#endif
+ va_end(va);
+
+ RTLogPrintf("fatal error in recompiler cpu: %s\n", szMsg);
+ RTLogRelPrintf("fatal error in recompiler cpu: %s\n", szMsg);
+
+ /*
+ * If we're in REM context we'll sync back the state before 'jumping' to
+ * the EMs failure handling.
+ */
+ pVM = cpu_single_env->pVM;
+ pVCpu = cpu_single_env->pVCpu;
+ Assert(pVCpu);
+
+ if (pVM->rem.s.fInREM)
+ REMR3StateBack(pVM, pVCpu);
+ EMR3FatalError(pVCpu, VERR_REM_VIRTUAL_CPU_ERROR);
+ AssertMsgFailed(("EMR3FatalError returned!\n"));
+}
+
+
+/**
+ * Aborts the VM.
+ *
+ * @param rc VBox error code.
+ * @param pszTip Hint about why/when this happened.
+ */
+void remAbort(int rc, const char *pszTip)
+{
+ PVM pVM;
+ PVMCPU pVCpu;
+
+ /*
+ * Bitch about it.
+ */
+ RTLogPrintf("internal REM fatal error: rc=%Rrc %s\n", rc, pszTip);
+ AssertReleaseMsgFailed(("internal REM fatal error: rc=%Rrc %s\n", rc, pszTip));
+
+ /*
+ * Jump back to where we entered the recompiler.
+ */
+ pVM = cpu_single_env->pVM;
+ pVCpu = cpu_single_env->pVCpu;
+ Assert(pVCpu);
+
+ if (pVM->rem.s.fInREM)
+ REMR3StateBack(pVM, pVCpu);
+
+ EMR3FatalError(pVCpu, rc);
+ AssertMsgFailed(("EMR3FatalError returned!\n"));
+}
+
+
+/**
+ * Dumps a linux system call.
+ * @param pVCpu VMCPU handle.
+ */
+void remR3DumpLnxSyscall(PVMCPU pVCpu)
+{
+ static const char *apsz[] =
+ {
+ "sys_restart_syscall", /* 0 - old "setup()" system call, used for restarting */
+ "sys_exit",
+ "sys_fork",
+ "sys_read",
+ "sys_write",
+ "sys_open", /* 5 */
+ "sys_close",
+ "sys_waitpid",
+ "sys_creat",
+ "sys_link",
+ "sys_unlink", /* 10 */
+ "sys_execve",
+ "sys_chdir",
+ "sys_time",
+ "sys_mknod",
+ "sys_chmod", /* 15 */
+ "sys_lchown16",
+ "sys_ni_syscall", /* old break syscall holder */
+ "sys_stat",
+ "sys_lseek",
+ "sys_getpid", /* 20 */
+ "sys_mount",
+ "sys_oldumount",
+ "sys_setuid16",
+ "sys_getuid16",
+ "sys_stime", /* 25 */
+ "sys_ptrace",
+ "sys_alarm",
+ "sys_fstat",
+ "sys_pause",
+ "sys_utime", /* 30 */
+ "sys_ni_syscall", /* old stty syscall holder */
+ "sys_ni_syscall", /* old gtty syscall holder */
+ "sys_access",
+ "sys_nice",
+ "sys_ni_syscall", /* 35 - old ftime syscall holder */
+ "sys_sync",
+ "sys_kill",
+ "sys_rename",
+ "sys_mkdir",
+ "sys_rmdir", /* 40 */
+ "sys_dup",
+ "sys_pipe",
+ "sys_times",
+ "sys_ni_syscall", /* old prof syscall holder */
+ "sys_brk", /* 45 */
+ "sys_setgid16",
+ "sys_getgid16",
+ "sys_signal",
+ "sys_geteuid16",
+ "sys_getegid16", /* 50 */
+ "sys_acct",
+ "sys_umount", /* recycled never used phys() */
+ "sys_ni_syscall", /* old lock syscall holder */
+ "sys_ioctl",
+ "sys_fcntl", /* 55 */
+ "sys_ni_syscall", /* old mpx syscall holder */
+ "sys_setpgid",
+ "sys_ni_syscall", /* old ulimit syscall holder */
+ "sys_olduname",
+ "sys_umask", /* 60 */
+ "sys_chroot",
+ "sys_ustat",
+ "sys_dup2",
+ "sys_getppid",
+ "sys_getpgrp", /* 65 */
+ "sys_setsid",
+ "sys_sigaction",
+ "sys_sgetmask",
+ "sys_ssetmask",
+ "sys_setreuid16", /* 70 */
+ "sys_setregid16",
+ "sys_sigsuspend",
+ "sys_sigpending",
+ "sys_sethostname",
+ "sys_setrlimit", /* 75 */
+ "sys_old_getrlimit",
+ "sys_getrusage",
+ "sys_gettimeofday",
+ "sys_settimeofday",
+ "sys_getgroups16", /* 80 */
+ "sys_setgroups16",
+ "old_select",
+ "sys_symlink",
+ "sys_lstat",
+ "sys_readlink", /* 85 */
+ "sys_uselib",
+ "sys_swapon",
+ "sys_reboot",
+ "old_readdir",
+ "old_mmap", /* 90 */
+ "sys_munmap",
+ "sys_truncate",
+ "sys_ftruncate",
+ "sys_fchmod",
+ "sys_fchown16", /* 95 */
+ "sys_getpriority",
+ "sys_setpriority",
+ "sys_ni_syscall", /* old profil syscall holder */
+ "sys_statfs",
+ "sys_fstatfs", /* 100 */
+ "sys_ioperm",
+ "sys_socketcall",
+ "sys_syslog",
+ "sys_setitimer",
+ "sys_getitimer", /* 105 */
+ "sys_newstat",
+ "sys_newlstat",
+ "sys_newfstat",
+ "sys_uname",
+ "sys_iopl", /* 110 */
+ "sys_vhangup",
+ "sys_ni_syscall", /* old "idle" system call */
+ "sys_vm86old",
+ "sys_wait4",
+ "sys_swapoff", /* 115 */
+ "sys_sysinfo",
+ "sys_ipc",
+ "sys_fsync",
+ "sys_sigreturn",
+ "sys_clone", /* 120 */
+ "sys_setdomainname",
+ "sys_newuname",
+ "sys_modify_ldt",
+ "sys_adjtimex",
+ "sys_mprotect", /* 125 */
+ "sys_sigprocmask",
+ "sys_ni_syscall", /* old "create_module" */
+ "sys_init_module",
+ "sys_delete_module",
+ "sys_ni_syscall", /* 130: old "get_kernel_syms" */
+ "sys_quotactl",
+ "sys_getpgid",
+ "sys_fchdir",
+ "sys_bdflush",
+ "sys_sysfs", /* 135 */
+ "sys_personality",
+ "sys_ni_syscall", /* reserved for afs_syscall */
+ "sys_setfsuid16",
+ "sys_setfsgid16",
+ "sys_llseek", /* 140 */
+ "sys_getdents",
+ "sys_select",
+ "sys_flock",
+ "sys_msync",
+ "sys_readv", /* 145 */
+ "sys_writev",
+ "sys_getsid",
+ "sys_fdatasync",
+ "sys_sysctl",
+ "sys_mlock", /* 150 */
+ "sys_munlock",
+ "sys_mlockall",
+ "sys_munlockall",
+ "sys_sched_setparam",
+ "sys_sched_getparam", /* 155 */
+ "sys_sched_setscheduler",
+ "sys_sched_getscheduler",
+ "sys_sched_yield",
+ "sys_sched_get_priority_max",
+ "sys_sched_get_priority_min", /* 160 */
+ "sys_sched_rr_get_interval",
+ "sys_nanosleep",
+ "sys_mremap",
+ "sys_setresuid16",
+ "sys_getresuid16", /* 165 */
+ "sys_vm86",
+ "sys_ni_syscall", /* Old sys_query_module */
+ "sys_poll",
+ "sys_nfsservctl",
+ "sys_setresgid16", /* 170 */
+ "sys_getresgid16",
+ "sys_prctl",
+ "sys_rt_sigreturn",
+ "sys_rt_sigaction",
+ "sys_rt_sigprocmask", /* 175 */
+ "sys_rt_sigpending",
+ "sys_rt_sigtimedwait",
+ "sys_rt_sigqueueinfo",
+ "sys_rt_sigsuspend",
+ "sys_pread64", /* 180 */
+ "sys_pwrite64",
+ "sys_chown16",
+ "sys_getcwd",
+ "sys_capget",
+ "sys_capset", /* 185 */
+ "sys_sigaltstack",
+ "sys_sendfile",
+ "sys_ni_syscall", /* reserved for streams1 */
+ "sys_ni_syscall", /* reserved for streams2 */
+ "sys_vfork", /* 190 */
+ "sys_getrlimit",
+ "sys_mmap2",
+ "sys_truncate64",
+ "sys_ftruncate64",
+ "sys_stat64", /* 195 */
+ "sys_lstat64",
+ "sys_fstat64",
+ "sys_lchown",
+ "sys_getuid",
+ "sys_getgid", /* 200 */
+ "sys_geteuid",
+ "sys_getegid",
+ "sys_setreuid",
+ "sys_setregid",
+ "sys_getgroups", /* 205 */
+ "sys_setgroups",
+ "sys_fchown",
+ "sys_setresuid",
+ "sys_getresuid",
+ "sys_setresgid", /* 210 */
+ "sys_getresgid",
+ "sys_chown",
+ "sys_setuid",
+ "sys_setgid",
+ "sys_setfsuid", /* 215 */
+ "sys_setfsgid",
+ "sys_pivot_root",
+ "sys_mincore",
+ "sys_madvise",
+ "sys_getdents64", /* 220 */
+ "sys_fcntl64",
+ "sys_ni_syscall", /* reserved for TUX */
+ "sys_ni_syscall",
+ "sys_gettid",
+ "sys_readahead", /* 225 */
+ "sys_setxattr",
+ "sys_lsetxattr",
+ "sys_fsetxattr",
+ "sys_getxattr",
+ "sys_lgetxattr", /* 230 */
+ "sys_fgetxattr",
+ "sys_listxattr",
+ "sys_llistxattr",
+ "sys_flistxattr",
+ "sys_removexattr", /* 235 */
+ "sys_lremovexattr",
+ "sys_fremovexattr",
+ "sys_tkill",
+ "sys_sendfile64",
+ "sys_futex", /* 240 */
+ "sys_sched_setaffinity",
+ "sys_sched_getaffinity",
+ "sys_set_thread_area",
+ "sys_get_thread_area",
+ "sys_io_setup", /* 245 */
+ "sys_io_destroy",
+ "sys_io_getevents",
+ "sys_io_submit",
+ "sys_io_cancel",
+ "sys_fadvise64", /* 250 */
+ "sys_ni_syscall",
+ "sys_exit_group",
+ "sys_lookup_dcookie",
+ "sys_epoll_create",
+ "sys_epoll_ctl", /* 255 */
+ "sys_epoll_wait",
+ "sys_remap_file_pages",
+ "sys_set_tid_address",
+ "sys_timer_create",
+ "sys_timer_settime", /* 260 */
+ "sys_timer_gettime",
+ "sys_timer_getoverrun",
+ "sys_timer_delete",
+ "sys_clock_settime",
+ "sys_clock_gettime", /* 265 */
+ "sys_clock_getres",
+ "sys_clock_nanosleep",
+ "sys_statfs64",
+ "sys_fstatfs64",
+ "sys_tgkill", /* 270 */
+ "sys_utimes",
+ "sys_fadvise64_64",
+ "sys_ni_syscall" /* sys_vserver */
+ };
+
+ uint32_t uEAX = CPUMGetGuestEAX(pVCpu);
+ switch (uEAX)
+ {
+ default:
+ if (uEAX < RT_ELEMENTS(apsz))
+ Log(("REM: linux syscall %3d: %s (eip=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x ebp=%08x)\n",
+ uEAX, apsz[uEAX], CPUMGetGuestEIP(pVCpu), CPUMGetGuestEBX(pVCpu), CPUMGetGuestECX(pVCpu),
+ CPUMGetGuestEDX(pVCpu), CPUMGetGuestESI(pVCpu), CPUMGetGuestEDI(pVCpu), CPUMGetGuestEBP(pVCpu)));
+ else
+ Log(("eip=%08x: linux syscall %d (#%x) unknown\n", CPUMGetGuestEIP(pVCpu), uEAX, uEAX));
+ break;
+
+ }
+}
+
+
+/**
+ * Dumps an OpenBSD system call.
+ * @param pVCpu VMCPU handle.
+ */
+void remR3DumpOBsdSyscall(PVMCPU pVCpu)
+{
+ static const char *apsz[] =
+ {
+ "SYS_syscall", //0
+ "SYS_exit", //1
+ "SYS_fork", //2
+ "SYS_read", //3
+ "SYS_write", //4
+ "SYS_open", //5
+ "SYS_close", //6
+ "SYS_wait4", //7
+ "SYS_8",
+ "SYS_link", //9
+ "SYS_unlink", //10
+ "SYS_11",
+ "SYS_chdir", //12
+ "SYS_fchdir", //13
+ "SYS_mknod", //14
+ "SYS_chmod", //15
+ "SYS_chown", //16
+ "SYS_break", //17
+ "SYS_18",
+ "SYS_19",
+ "SYS_getpid", //20
+ "SYS_mount", //21
+ "SYS_unmount", //22
+ "SYS_setuid", //23
+ "SYS_getuid", //24
+ "SYS_geteuid", //25
+ "SYS_ptrace", //26
+ "SYS_recvmsg", //27
+ "SYS_sendmsg", //28
+ "SYS_recvfrom", //29
+ "SYS_accept", //30
+ "SYS_getpeername", //31
+ "SYS_getsockname", //32
+ "SYS_access", //33
+ "SYS_chflags", //34
+ "SYS_fchflags", //35
+ "SYS_sync", //36
+ "SYS_kill", //37
+ "SYS_38",
+ "SYS_getppid", //39
+ "SYS_40",
+ "SYS_dup", //41
+ "SYS_opipe", //42
+ "SYS_getegid", //43
+ "SYS_profil", //44
+ "SYS_ktrace", //45
+ "SYS_sigaction", //46
+ "SYS_getgid", //47
+ "SYS_sigprocmask", //48
+ "SYS_getlogin", //49
+ "SYS_setlogin", //50
+ "SYS_acct", //51
+ "SYS_sigpending", //52
+ "SYS_osigaltstack", //53
+ "SYS_ioctl", //54
+ "SYS_reboot", //55
+ "SYS_revoke", //56
+ "SYS_symlink", //57
+ "SYS_readlink", //58
+ "SYS_execve", //59
+ "SYS_umask", //60
+ "SYS_chroot", //61
+ "SYS_62",
+ "SYS_63",
+ "SYS_64",
+ "SYS_65",
+ "SYS_vfork", //66
+ "SYS_67",
+ "SYS_68",
+ "SYS_sbrk", //69
+ "SYS_sstk", //70
+ "SYS_61",
+ "SYS_vadvise", //72
+ "SYS_munmap", //73
+ "SYS_mprotect", //74
+ "SYS_madvise", //75
+ "SYS_76",
+ "SYS_77",
+ "SYS_mincore", //78
+ "SYS_getgroups", //79
+ "SYS_setgroups", //80
+ "SYS_getpgrp", //81
+ "SYS_setpgid", //82
+ "SYS_setitimer", //83
+ "SYS_84",
+ "SYS_85",
+ "SYS_getitimer", //86
+ "SYS_87",
+ "SYS_88",
+ "SYS_89",
+ "SYS_dup2", //90
+ "SYS_91",
+ "SYS_fcntl", //92
+ "SYS_select", //93
+ "SYS_94",
+ "SYS_fsync", //95
+ "SYS_setpriority", //96
+ "SYS_socket", //97
+ "SYS_connect", //98
+ "SYS_99",
+ "SYS_getpriority", //100
+ "SYS_101",
+ "SYS_102",
+ "SYS_sigreturn", //103
+ "SYS_bind", //104
+ "SYS_setsockopt", //105
+ "SYS_listen", //106
+ "SYS_107",
+ "SYS_108",
+ "SYS_109",
+ "SYS_110",
+ "SYS_sigsuspend", //111
+ "SYS_112",
+ "SYS_113",
+ "SYS_114",
+ "SYS_115",
+ "SYS_gettimeofday", //116
+ "SYS_getrusage", //117
+ "SYS_getsockopt", //118
+ "SYS_119",
+ "SYS_readv", //120
+ "SYS_writev", //121
+ "SYS_settimeofday", //122
+ "SYS_fchown", //123
+ "SYS_fchmod", //124
+ "SYS_125",
+ "SYS_setreuid", //126
+ "SYS_setregid", //127
+ "SYS_rename", //128
+ "SYS_129",
+ "SYS_130",
+ "SYS_flock", //131
+ "SYS_mkfifo", //132
+ "SYS_sendto", //133
+ "SYS_shutdown", //134
+ "SYS_socketpair", //135
+ "SYS_mkdir", //136
+ "SYS_rmdir", //137
+ "SYS_utimes", //138
+ "SYS_139",
+ "SYS_adjtime", //140
+ "SYS_141",
+ "SYS_142",
+ "SYS_143",
+ "SYS_144",
+ "SYS_145",
+ "SYS_146",
+ "SYS_setsid", //147
+ "SYS_quotactl", //148
+ "SYS_149",
+ "SYS_150",
+ "SYS_151",
+ "SYS_152",
+ "SYS_153",
+ "SYS_154",
+ "SYS_nfssvc", //155
+ "SYS_156",
+ "SYS_157",
+ "SYS_158",
+ "SYS_159",
+ "SYS_160",
+ "SYS_getfh", //161
+ "SYS_162",
+ "SYS_163",
+ "SYS_164",
+ "SYS_sysarch", //165
+ "SYS_166",
+ "SYS_167",
+ "SYS_168",
+ "SYS_169",
+ "SYS_170",
+ "SYS_171",
+ "SYS_172",
+ "SYS_pread", //173
+ "SYS_pwrite", //174
+ "SYS_175",
+ "SYS_176",
+ "SYS_177",
+ "SYS_178",
+ "SYS_179",
+ "SYS_180",
+ "SYS_setgid", //181
+ "SYS_setegid", //182
+ "SYS_seteuid", //183
+ "SYS_lfs_bmapv", //184
+ "SYS_lfs_markv", //185
+ "SYS_lfs_segclean", //186
+ "SYS_lfs_segwait", //187
+ "SYS_188",
+ "SYS_189",
+ "SYS_190",
+ "SYS_pathconf", //191
+ "SYS_fpathconf", //192
+ "SYS_swapctl", //193
+ "SYS_getrlimit", //194
+ "SYS_setrlimit", //195
+ "SYS_getdirentries", //196
+ "SYS_mmap", //197
+ "SYS___syscall", //198
+ "SYS_lseek", //199
+ "SYS_truncate", //200
+ "SYS_ftruncate", //201
+ "SYS___sysctl", //202
+ "SYS_mlock", //203
+ "SYS_munlock", //204
+ "SYS_205",
+ "SYS_futimes", //206
+ "SYS_getpgid", //207
+ "SYS_xfspioctl", //208
+ "SYS_209",
+ "SYS_210",
+ "SYS_211",
+ "SYS_212",
+ "SYS_213",
+ "SYS_214",
+ "SYS_215",
+ "SYS_216",
+ "SYS_217",
+ "SYS_218",
+ "SYS_219",
+ "SYS_220",
+ "SYS_semget", //221
+ "SYS_222",
+ "SYS_223",
+ "SYS_224",
+ "SYS_msgget", //225
+ "SYS_msgsnd", //226
+ "SYS_msgrcv", //227
+ "SYS_shmat", //228
+ "SYS_229",
+ "SYS_shmdt", //230
+ "SYS_231",
+ "SYS_clock_gettime", //232
+ "SYS_clock_settime", //233
+ "SYS_clock_getres", //234
+ "SYS_235",
+ "SYS_236",
+ "SYS_237",
+ "SYS_238",
+ "SYS_239",
+ "SYS_nanosleep", //240
+ "SYS_241",
+ "SYS_242",
+ "SYS_243",
+ "SYS_244",
+ "SYS_245",
+ "SYS_246",
+ "SYS_247",
+ "SYS_248",
+ "SYS_249",
+ "SYS_minherit", //250
+ "SYS_rfork", //251
+ "SYS_poll", //252
+ "SYS_issetugid", //253
+ "SYS_lchown", //254
+ "SYS_getsid", //255
+ "SYS_msync", //256
+ "SYS_257",
+ "SYS_258",
+ "SYS_259",
+ "SYS_getfsstat", //260
+ "SYS_statfs", //261
+ "SYS_fstatfs", //262
+ "SYS_pipe", //263
+ "SYS_fhopen", //264
+ "SYS_265",
+ "SYS_fhstatfs", //266
+ "SYS_preadv", //267
+ "SYS_pwritev", //268
+ "SYS_kqueue", //269
+ "SYS_kevent", //270
+ "SYS_mlockall", //271
+ "SYS_munlockall", //272
+ "SYS_getpeereid", //273
+ "SYS_274",
+ "SYS_275",
+ "SYS_276",
+ "SYS_277",
+ "SYS_278",
+ "SYS_279",
+ "SYS_280",
+ "SYS_getresuid", //281
+ "SYS_setresuid", //282
+ "SYS_getresgid", //283
+ "SYS_setresgid", //284
+ "SYS_285",
+ "SYS_mquery", //286
+ "SYS_closefrom", //287
+ "SYS_sigaltstack", //288
+ "SYS_shmget", //289
+ "SYS_semop", //290
+ "SYS_stat", //291
+ "SYS_fstat", //292
+ "SYS_lstat", //293
+ "SYS_fhstat", //294
+ "SYS___semctl", //295
+ "SYS_shmctl", //296
+ "SYS_msgctl", //297
+ "SYS_MAXSYSCALL", //298
+ //299
+ //300
+ };
+ uint32_t uEAX;
+ if (!LogIsEnabled())
+ return;
+ uEAX = CPUMGetGuestEAX(pVCpu);
+ switch (uEAX)
+ {
+ default:
+ if (uEAX < RT_ELEMENTS(apsz))
+ {
+ uint32_t au32Args[8] = {0};
+ PGMPhysSimpleReadGCPtr(pVCpu, au32Args, CPUMGetGuestESP(pVCpu), sizeof(au32Args));
+ RTLogPrintf("REM: OpenBSD syscall %3d: %s (eip=%08x %08x %08x %08x %08x %08x %08x %08x %08x)\n",
+ uEAX, apsz[uEAX], CPUMGetGuestEIP(pVCpu), au32Args[0], au32Args[1], au32Args[2], au32Args[3],
+ au32Args[4], au32Args[5], au32Args[6], au32Args[7]);
+ }
+ else
+ RTLogPrintf("eip=%08x: OpenBSD syscall %d (#%x) unknown!!\n", CPUMGetGuestEIP(pVCpu), uEAX, uEAX);
+ break;
+ }
+}
+
+
+#if defined(IPRT_NO_CRT) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_X86)
+/**
+ * The Dll main entry point (stub).
+ */
+bool __stdcall _DllMainCRTStartup(void *hModule, uint32_t dwReason, void *pvReserved)
+{
+ return true;
+}
+
+void *memcpy(void *dst, const void *src, size_t size)
+{
+ uint8_t*pbDst = dst, *pbSrc = src;
+ while (size-- > 0)
+ *pbDst++ = *pbSrc++;
+ return dst;
+}
+
+#endif
+
+void cpu_smm_update(CPUX86State *env)
+{
+}
diff --git a/src/recompiler/bswap.h b/src/recompiler/bswap.h
new file mode 100644
index 00000000..4377c7b0
--- /dev/null
+++ b/src/recompiler/bswap.h
@@ -0,0 +1,225 @@
+#ifndef BSWAP_H
+#define BSWAP_H
+
+#include "config-host.h"
+
+#include <inttypes.h>
+
+#ifdef CONFIG_MACHINE_BSWAP_H
+#include <sys/endian.h>
+#include <sys/types.h>
+#include <machine/bswap.h>
+#else
+
+#ifdef CONFIG_BYTESWAP_H
+#include <byteswap.h>
+#else
+
+#define bswap_16(x) __extension__ /* <- VBOX */ \
+({ \
+ uint16_t __x = (x); \
+ ((uint16_t)( \
+ (((uint16_t)(__x) & (uint16_t)0x00ffU) << 8) | \
+ (((uint16_t)(__x) & (uint16_t)0xff00U) >> 8) )); \
+})
+
+#define bswap_32(x) __extension__ /* <- VBOX */ \
+({ \
+ uint32_t __x = (x); \
+ ((uint32_t)( \
+ (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
+ (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \
+ (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \
+ (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \
+})
+
+#define bswap_64(x) __extension__ /* <- VBOX */ \
+({ \
+ uint64_t __x = (x); \
+ ((uint64_t)( \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) << 8) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \
+})
+
+#endif /* !CONFIG_BYTESWAP_H */
+
+static inline uint16_t bswap16(uint16_t x)
+{
+ return bswap_16(x);
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return bswap_32(x);
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return bswap_64(x);
+}
+
+#endif /* ! CONFIG_MACHINE_BSWAP_H */
+
+static inline void bswap16s(uint16_t *s)
+{
+ *s = bswap16(*s);
+}
+
+static inline void bswap32s(uint32_t *s)
+{
+ *s = bswap32(*s);
+}
+
+static inline void bswap64s(uint64_t *s)
+{
+ *s = bswap64(*s);
+}
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define be_bswap(v, size) (v)
+#define le_bswap(v, size) bswap ## size(v)
+#define be_bswaps(v, size)
+#define le_bswaps(p, size) *p = bswap ## size(*p);
+#else
+#define le_bswap(v, size) (v)
+#define be_bswap(v, size) bswap ## size(v)
+#define le_bswaps(v, size)
+#define be_bswaps(p, size) *p = bswap ## size(*p);
+#endif
+
+#define CPU_CONVERT(endian, size, type)\
+static inline type endian ## size ## _to_cpu(type v)\
+{\
+ return endian ## _bswap(v, size);\
+}\
+\
+static inline type cpu_to_ ## endian ## size(type v)\
+{\
+ return endian ## _bswap(v, size);\
+}\
+\
+static inline void endian ## size ## _to_cpus(type *p)\
+{\
+ endian ## _bswaps(p, size)\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## s(type *p)\
+{\
+ endian ## _bswaps(p, size)\
+}\
+\
+static inline type endian ## size ## _to_cpup(const type *p)\
+{\
+ return endian ## size ## _to_cpu(*p);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\
+{\
+ *p = cpu_to_ ## endian ## size(v);\
+}
+
+CPU_CONVERT(be, 16, uint16_t)
+CPU_CONVERT(be, 32, uint32_t)
+CPU_CONVERT(be, 64, uint64_t)
+
+CPU_CONVERT(le, 16, uint16_t)
+CPU_CONVERT(le, 32, uint32_t)
+CPU_CONVERT(le, 64, uint64_t)
+
+/* unaligned versions (optimized for frequent unaligned accesses)*/
+
+#if defined(__i386__) || defined(_ARCH_PPC)
+
+#define cpu_to_le16wu(p, v) cpu_to_le16w(p, v)
+#define cpu_to_le32wu(p, v) cpu_to_le32w(p, v)
+#define le16_to_cpupu(p) le16_to_cpup(p)
+#define le32_to_cpupu(p) le32_to_cpup(p)
+#define be32_to_cpupu(p) be32_to_cpup(p)
+
+#define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
+#define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)
+
+#else
+
+static inline void cpu_to_le16wu(uint16_t *p, uint16_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v & 0xff;
+ p1[1] = v >> 8;
+}
+
+static inline void cpu_to_le32wu(uint32_t *p, uint32_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v & 0xff;
+ p1[1] = v >> 8;
+ p1[2] = v >> 16;
+ p1[3] = v >> 24;
+}
+
+static inline uint16_t le16_to_cpupu(const uint16_t *p)
+{
+ const uint8_t *p1 = (const uint8_t *)p;
+ return p1[0] | (p1[1] << 8);
+}
+
+static inline uint32_t le32_to_cpupu(const uint32_t *p)
+{
+ const uint8_t *p1 = (const uint8_t *)p;
+ return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24);
+}
+
+static inline uint32_t be32_to_cpupu(const uint32_t *p)
+{
+ const uint8_t *p1 = (const uint8_t *)p;
+ return p1[3] | (p1[2] << 8) | (p1[1] << 16) | (p1[0] << 24);
+}
+
+static inline void cpu_to_be16wu(uint16_t *p, uint16_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v >> 8;
+ p1[1] = v & 0xff;
+}
+
+static inline void cpu_to_be32wu(uint32_t *p, uint32_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v >> 24;
+ p1[1] = v >> 16;
+ p1[2] = v >> 8;
+ p1[3] = v & 0xff;
+}
+
+#endif
+
+#ifdef HOST_WORDS_BIGENDIAN
+#define cpu_to_32wu cpu_to_be32wu
+#define leul_to_cpu(v) glue(glue(le,HOST_LONG_BITS),_to_cpu)(v)
+#else
+#define cpu_to_32wu cpu_to_le32wu
+#define leul_to_cpu(v) (v)
+#endif
+
+#undef le_bswap
+#undef be_bswap
+#undef le_bswaps
+#undef be_bswaps
+
+/* len must be one of 1, 2, 4 */
+static inline uint32_t qemu_bswap_len(uint32_t value, int len)
+{
+ return bswap32(value) >> (32 - 8 * len);
+}
+
+#endif /* BSWAP_H */
diff --git a/src/recompiler/cache-utils.h b/src/recompiler/cache-utils.h
new file mode 100644
index 00000000..5d5d44f4
--- /dev/null
+++ b/src/recompiler/cache-utils.h
@@ -0,0 +1,41 @@
+#ifndef QEMU_CACHE_UTILS_H
+#define QEMU_CACHE_UTILS_H
+
+#if defined(_ARCH_PPC)
+struct qemu_cache_conf {
+ unsigned long dcache_bsize;
+ unsigned long icache_bsize;
+};
+
+extern struct qemu_cache_conf qemu_cache_conf;
+
+extern void qemu_cache_utils_init(char **envp);
+
+/* mildly adjusted code from tcg-dyngen.c */
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ unsigned long p, start1, stop1;
+ unsigned long dsize = qemu_cache_conf.dcache_bsize;
+ unsigned long isize = qemu_cache_conf.icache_bsize;
+
+ start1 = start & ~(dsize - 1);
+ stop1 = (stop + dsize - 1) & ~(dsize - 1);
+ for (p = start1; p < stop1; p += dsize) {
+ asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
+ }
+ asm volatile ("sync" : : : "memory");
+
+ start &= start & ~(isize - 1);
+ stop1 = (stop + isize - 1) & ~(isize - 1);
+ for (p = start1; p < stop1; p += isize) {
+ asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
+ }
+ asm volatile ("sync" : : : "memory");
+ asm volatile ("isync" : : : "memory");
+}
+
+#else
+#define qemu_cache_utils_init(envp) do { (void) (envp); } while (0)
+#endif
+
+#endif /* QEMU_CACHE_UTILS_H */
diff --git a/src/recompiler/cpu-all.h b/src/recompiler/cpu-all.h
new file mode 100644
index 00000000..128c672e
--- /dev/null
+++ b/src/recompiler/cpu-all.h
@@ -0,0 +1,1184 @@
+/*
+ * defines common to all virtual CPUs
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#ifndef CPU_ALL_H
+#define CPU_ALL_H
+
+#ifdef VBOX
+# ifndef LOG_GROUP
+# define LOG_GROUP LOG_GROUP_REM
+# endif
+# include <VBox/log.h>
+# include <VBox/vmm/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
+#endif /* VBOX */
+#include "qemu-common.h"
+#include "cpu-common.h"
+
+/* some important defines:
+ *
+ * WORDS_ALIGNED : if defined, the host cpu can only make word aligned
+ * memory accesses.
+ *
+ * HOST_WORDS_BIGENDIAN : if defined, the host cpu is big endian and
+ * otherwise little endian.
+ *
+ * (TARGET_WORDS_ALIGNED : same for target cpu (not supported yet))
+ *
+ * TARGET_WORDS_BIGENDIAN : same for target cpu
+ */
+
+#include "softfloat.h"
+
+#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
+#define BSWAP_NEEDED
+#endif
+
+#ifdef BSWAP_NEEDED
+
+static inline uint16_t tswap16(uint16_t s)
+{
+ return bswap16(s);
+}
+
+static inline uint32_t tswap32(uint32_t s)
+{
+ return bswap32(s);
+}
+
+static inline uint64_t tswap64(uint64_t s)
+{
+ return bswap64(s);
+}
+
+static inline void tswap16s(uint16_t *s)
+{
+ *s = bswap16(*s);
+}
+
+static inline void tswap32s(uint32_t *s)
+{
+ *s = bswap32(*s);
+}
+
+static inline void tswap64s(uint64_t *s)
+{
+ *s = bswap64(*s);
+}
+
+#else
+
+static inline uint16_t tswap16(uint16_t s)
+{
+ return s;
+}
+
+static inline uint32_t tswap32(uint32_t s)
+{
+ return s;
+}
+
+static inline uint64_t tswap64(uint64_t s)
+{
+ return s;
+}
+
+static inline void tswap16s(uint16_t *s)
+{
+}
+
+static inline void tswap32s(uint32_t *s)
+{
+}
+
+static inline void tswap64s(uint64_t *s)
+{
+}
+
+#endif
+
+#if TARGET_LONG_SIZE == 4
+#define tswapl(s) tswap32(s)
+#define tswapls(s) tswap32s((uint32_t *)(s))
+#define bswaptls(s) bswap32s(s)
+#else
+#define tswapl(s) tswap64(s)
+#define tswapls(s) tswap64s((uint64_t *)(s))
+#define bswaptls(s) bswap64s(s)
+#endif
+
+typedef union {
+ float32 f;
+ uint32_t l;
+} CPU_FloatU;
+
+/* NOTE: arm FPA is horrible as double 32 bit words are stored in big
+ endian ! */
+typedef union {
+ float64 d;
+#if defined(HOST_WORDS_BIGENDIAN) \
+ || (defined(__arm__) && !defined(__VFP_FP__) && !defined(CONFIG_SOFTFLOAT))
+ struct {
+ uint32_t upper;
+ uint32_t lower;
+ } l;
+#else
+ struct {
+ uint32_t lower;
+ uint32_t upper;
+ } l;
+#endif
+ uint64_t ll;
+} CPU_DoubleU;
+
+#ifdef TARGET_SPARC
+typedef union {
+ float128 q;
+#if defined(HOST_WORDS_BIGENDIAN) \
+ || (defined(__arm__) && !defined(__VFP_FP__) && !defined(CONFIG_SOFTFLOAT))
+ struct {
+ uint32_t upmost;
+ uint32_t upper;
+ uint32_t lower;
+ uint32_t lowest;
+ } l;
+ struct {
+ uint64_t upper;
+ uint64_t lower;
+ } ll;
+#else
+ struct {
+ uint32_t lowest;
+ uint32_t lower;
+ uint32_t upper;
+ uint32_t upmost;
+ } l;
+ struct {
+ uint64_t lower;
+ uint64_t upper;
+ } ll;
+#endif
+} CPU_QuadU;
+#endif
+
+/* CPU memory access without any memory or io remapping */
+
+/*
+ * the generic syntax for the memory accesses is:
+ *
+ * load: ld{type}{sign}{size}{endian}_{access_type}(ptr)
+ *
+ * store: st{type}{size}{endian}_{access_type}(ptr, val)
+ *
+ * type is:
+ * (empty): integer access
+ * f : float access
+ *
+ * sign is:
+ * (empty): for floats or 32 bit size
+ * u : unsigned
+ * s : signed
+ *
+ * size is:
+ * b: 8 bits
+ * w: 16 bits
+ * l: 32 bits
+ * q: 64 bits
+ *
+ * endian is:
+ * (empty): target cpu endianness or 8 bit access
+ * r : reversed target cpu endianness (not implemented yet)
+ * be : big endian (not implemented yet)
+ * le : little endian (not implemented yet)
+ *
+ * access_type is:
+ * raw : host memory access
+ * user : user mode access using soft MMU
+ * kernel : kernel mode access using soft MMU
+ */
+
+#ifdef VBOX
+void remAbort(int rc, const char *pszTip) __attribute__((__noreturn__));
+
+void remR3PhysRead(RTGCPHYS SrcGCPhys, void *pvDst, unsigned cb);
+RTCCUINTREG remR3PhysReadU8(RTGCPHYS SrcGCPhys);
+RTCCINTREG remR3PhysReadS8(RTGCPHYS SrcGCPhys);
+RTCCUINTREG remR3PhysReadU16(RTGCPHYS SrcGCPhys);
+RTCCINTREG remR3PhysReadS16(RTGCPHYS SrcGCPhys);
+RTCCUINTREG remR3PhysReadU32(RTGCPHYS SrcGCPhys);
+RTCCINTREG remR3PhysReadS32(RTGCPHYS SrcGCPhys);
+uint64_t remR3PhysReadU64(RTGCPHYS SrcGCPhys);
+int64_t remR3PhysReadS64(RTGCPHYS SrcGCPhys);
+void remR3PhysWrite(RTGCPHYS DstGCPhys, const void *pvSrc, unsigned cb);
+void remR3PhysWriteU8(RTGCPHYS DstGCPhys, uint8_t val);
+void remR3PhysWriteU16(RTGCPHYS DstGCPhys, uint16_t val);
+void remR3PhysWriteU32(RTGCPHYS DstGCPhys, uint32_t val);
+void remR3PhysWriteU64(RTGCPHYS DstGCPhys, uint64_t val);
+
+# ifndef REM_PHYS_ADDR_IN_TLB
+void *remR3TlbGCPhys2Ptr(CPUState *env1, target_ulong physAddr, int fWritable);
+# endif
+
+#endif /* VBOX */
+
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+
+DECLINLINE(uint8_t) ldub_p(const void *ptr)
+{
+ VBOX_CHECK_ADDR(ptr);
+ return remR3PhysReadU8((uintptr_t)ptr);
+}
+
+DECLINLINE(int8_t) ldsb_p(const void *ptr)
+{
+ VBOX_CHECK_ADDR(ptr);
+ return remR3PhysReadS8((uintptr_t)ptr);
+}
+
+DECLINLINE(void) stb_p(void *ptr, int v)
+{
+ VBOX_CHECK_ADDR(ptr);
+ remR3PhysWriteU8((uintptr_t)ptr, v);
+}
+
+DECLINLINE(uint32_t) lduw_le_p(const void *ptr)
+{
+ VBOX_CHECK_ADDR(ptr);
+ return remR3PhysReadU16((uintptr_t)ptr);
+}
+
+DECLINLINE(int32_t) ldsw_le_p(const void *ptr)
+{
+ VBOX_CHECK_ADDR(ptr);
+ return remR3PhysReadS16((uintptr_t)ptr);
+}
+
+DECLINLINE(void) stw_le_p(void *ptr, int v)
+{
+ VBOX_CHECK_ADDR(ptr);
+ remR3PhysWriteU16((uintptr_t)ptr, v);
+}
+
+DECLINLINE(uint32_t) ldl_le_p(const void *ptr)
+{
+ VBOX_CHECK_ADDR(ptr);
+ return remR3PhysReadU32((uintptr_t)ptr);
+}
+
+DECLINLINE(void) stl_le_p(void *ptr, int v)
+{
+ VBOX_CHECK_ADDR(ptr);
+ remR3PhysWriteU32((uintptr_t)ptr, v);
+}
+
+DECLINLINE(void) stq_le_p(void *ptr, uint64_t v)
+{
+ VBOX_CHECK_ADDR(ptr);
+ remR3PhysWriteU64((uintptr_t)ptr, v);
+}
+
+DECLINLINE(uint64_t) ldq_le_p(const void *ptr)
+{
+ VBOX_CHECK_ADDR(ptr);
+ return remR3PhysReadU64((uintptr_t)ptr);
+}
+
+# undef VBOX_CHECK_ADDR
+
+/* float access */
+
+DECLINLINE(float32) ldfl_le_p(const void *ptr)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.i = ldl_le_p(ptr);
+ return u.f;
+}
+
+DECLINLINE(void) stfl_le_p(void *ptr, float32 v)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.f = v;
+ stl_le_p(ptr, u.i);
+}
+
+DECLINLINE(float64) ldfq_le_p(const void *ptr)
+{
+ CPU_DoubleU u;
+ u.l.lower = ldl_le_p(ptr);
+ u.l.upper = ldl_le_p((uint8_t*)ptr + 4);
+ return u.d;
+}
+
+DECLINLINE(void) stfq_le_p(void *ptr, float64 v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stl_le_p(ptr, u.l.lower);
+ stl_le_p((uint8_t*)ptr + 4, u.l.upper);
+}
+
+#else /* !VBOX || !REM_PHYS_ADDR_IN_TLB */
+
+static inline int ldub_p(const void *ptr)
+{
+ return *(uint8_t *)ptr;
+}
+
+static inline int ldsb_p(const void *ptr)
+{
+ return *(int8_t *)ptr;
+}
+
+static inline void stb_p(void *ptr, int v)
+{
+ *(uint8_t *)ptr = v;
+}
+
+/* NOTE: on arm, putting 2 in /proc/sys/debug/alignment so that the
+ kernel handles unaligned load/stores may give better results, but
+ it is a system wide setting : bad */
+#if defined(HOST_WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)
+
+/* conservative code for little endian unaligned accesses */
+static inline int lduw_le_p(const void *ptr)
+{
+#ifdef _ARCH_PPC
+ int val;
+ __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+ return val;
+#else
+ const uint8_t *p = ptr;
+ return p[0] | (p[1] << 8);
+#endif
+}
+
+static inline int ldsw_le_p(const void *ptr)
+{
+#ifdef _ARCH_PPC
+ int val;
+ __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+ return (int16_t)val;
+#else
+ const uint8_t *p = ptr;
+ return (int16_t)(p[0] | (p[1] << 8));
+#endif
+}
+
+static inline int ldl_le_p(const void *ptr)
+{
+#ifdef _ARCH_PPC
+ int val;
+ __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+ return val;
+#else
+ const uint8_t *p = ptr;
+ return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline uint64_t ldq_le_p(const void *ptr)
+{
+ const uint8_t *p = ptr;
+ uint32_t v1, v2;
+ v1 = ldl_le_p(p);
+ v2 = ldl_le_p(p + 4);
+ return v1 | ((uint64_t)v2 << 32);
+}
+
+static inline void stw_le_p(void *ptr, int v)
+{
+#ifdef _ARCH_PPC
+ __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr));
+#else
+ uint8_t *p = ptr;
+ p[0] = v;
+ p[1] = v >> 8;
+#endif
+}
+
+static inline void stl_le_p(void *ptr, int v)
+{
+#ifdef _ARCH_PPC
+ __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr));
+#else
+ uint8_t *p = ptr;
+ p[0] = v;
+ p[1] = v >> 8;
+ p[2] = v >> 16;
+ p[3] = v >> 24;
+#endif
+}
+
+static inline void stq_le_p(void *ptr, uint64_t v)
+{
+ uint8_t *p = ptr;
+ stl_le_p(p, (uint32_t)v);
+ stl_le_p(p + 4, v >> 32);
+}
+
+/* float access */
+
+static inline float32 ldfl_le_p(const void *ptr)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.i = ldl_le_p(ptr);
+ return u.f;
+}
+
+static inline void stfl_le_p(void *ptr, float32 v)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.f = v;
+ stl_le_p(ptr, u.i);
+}
+
+static inline float64 ldfq_le_p(const void *ptr)
+{
+ CPU_DoubleU u;
+ u.l.lower = ldl_le_p(ptr);
+ u.l.upper = ldl_le_p(ptr + 4);
+ return u.d;
+}
+
+static inline void stfq_le_p(void *ptr, float64 v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stl_le_p(ptr, u.l.lower);
+ stl_le_p(ptr + 4, u.l.upper);
+}
+
+#else
+
+static inline int lduw_le_p(const void *ptr)
+{
+ return *(uint16_t *)ptr;
+}
+
+static inline int ldsw_le_p(const void *ptr)
+{
+ return *(int16_t *)ptr;
+}
+
+static inline int ldl_le_p(const void *ptr)
+{
+ return *(uint32_t *)ptr;
+}
+
+static inline uint64_t ldq_le_p(const void *ptr)
+{
+ return *(uint64_t *)ptr;
+}
+
+static inline void stw_le_p(void *ptr, int v)
+{
+ *(uint16_t *)ptr = v;
+}
+
+static inline void stl_le_p(void *ptr, int v)
+{
+ *(uint32_t *)ptr = v;
+}
+
+static inline void stq_le_p(void *ptr, uint64_t v)
+{
+ *(uint64_t *)ptr = v;
+}
+
+/* float access */
+
+static inline float32 ldfl_le_p(const void *ptr)
+{
+ return *(float32 *)ptr;
+}
+
+static inline float64 ldfq_le_p(const void *ptr)
+{
+ return *(float64 *)ptr;
+}
+
+static inline void stfl_le_p(void *ptr, float32 v)
+{
+ *(float32 *)ptr = v;
+}
+
+static inline void stfq_le_p(void *ptr, float64 v)
+{
+ *(float64 *)ptr = v;
+}
+#endif
+
+#endif /* !VBOX || !REM_PHYS_ADDR_IN_TLB */
+
+#if !defined(HOST_WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)
+
+static inline int lduw_be_p(const void *ptr)
+{
+#if defined(__i386__)
+ int val;
+ asm volatile ("movzwl %1, %0\n"
+ "xchgb %b0, %h0\n"
+ : "=q" (val)
+ : "m" (*(uint16_t *)ptr));
+ return val;
+#else
+ const uint8_t *b = ptr;
+ return ((b[0] << 8) | b[1]);
+#endif
+}
+
+static inline int ldsw_be_p(const void *ptr)
+{
+#if defined(__i386__)
+ int val;
+ asm volatile ("movzwl %1, %0\n"
+ "xchgb %b0, %h0\n"
+ : "=q" (val)
+ : "m" (*(uint16_t *)ptr));
+ return (int16_t)val;
+#else
+ const uint8_t *b = ptr;
+ return (int16_t)((b[0] << 8) | b[1]);
+#endif
+}
+
+static inline int ldl_be_p(const void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+ int val;
+ asm volatile ("movl %1, %0\n"
+ "bswap %0\n"
+ : "=r" (val)
+ : "m" (*(uint32_t *)ptr));
+ return val;
+#else
+ const uint8_t *b = ptr;
+ return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline uint64_t ldq_be_p(const void *ptr)
+{
+ uint32_t a,b;
+ a = ldl_be_p(ptr);
+ b = ldl_be_p((uint8_t *)ptr + 4);
+ return (((uint64_t)a<<32)|b);
+}
+
+static inline void stw_be_p(void *ptr, int v)
+{
+#if defined(__i386__)
+ asm volatile ("xchgb %b0, %h0\n"
+ "movw %w0, %1\n"
+ : "=q" (v)
+ : "m" (*(uint16_t *)ptr), "0" (v));
+#else
+ uint8_t *d = (uint8_t *) ptr;
+ d[0] = v >> 8;
+ d[1] = v;
+#endif
+}
+
+static inline void stl_be_p(void *ptr, int v)
+{
+#if defined(__i386__) || defined(__x86_64__)
+ asm volatile ("bswap %0\n"
+ "movl %0, %1\n"
+ : "=r" (v)
+ : "m" (*(uint32_t *)ptr), "0" (v));
+#else
+ uint8_t *d = (uint8_t *) ptr;
+ d[0] = v >> 24;
+ d[1] = v >> 16;
+ d[2] = v >> 8;
+ d[3] = v;
+#endif
+}
+
+static inline void stq_be_p(void *ptr, uint64_t v)
+{
+ stl_be_p(ptr, v >> 32);
+ stl_be_p((uint8_t *)ptr + 4, v);
+}
+
+/* float access */
+
+static inline float32 ldfl_be_p(const void *ptr)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.i = ldl_be_p(ptr);
+ return u.f;
+}
+
+static inline void stfl_be_p(void *ptr, float32 v)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.f = v;
+ stl_be_p(ptr, u.i);
+}
+
+static inline float64 ldfq_be_p(const void *ptr)
+{
+ CPU_DoubleU u;
+ u.l.upper = ldl_be_p(ptr);
+ u.l.lower = ldl_be_p((uint8_t *)ptr + 4);
+ return u.d;
+}
+
+static inline void stfq_be_p(void *ptr, float64 v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stl_be_p(ptr, u.l.upper);
+ stl_be_p((uint8_t *)ptr + 4, u.l.lower);
+}
+
+#else
+
+static inline int lduw_be_p(const void *ptr)
+{
+ return *(uint16_t *)ptr;
+}
+
+static inline int ldsw_be_p(const void *ptr)
+{
+ return *(int16_t *)ptr;
+}
+
+static inline int ldl_be_p(const void *ptr)
+{
+ return *(uint32_t *)ptr;
+}
+
+static inline uint64_t ldq_be_p(const void *ptr)
+{
+ return *(uint64_t *)ptr;
+}
+
+static inline void stw_be_p(void *ptr, int v)
+{
+ *(uint16_t *)ptr = v;
+}
+
+static inline void stl_be_p(void *ptr, int v)
+{
+ *(uint32_t *)ptr = v;
+}
+
+static inline void stq_be_p(void *ptr, uint64_t v)
+{
+ *(uint64_t *)ptr = v;
+}
+
+/* float access */
+
+static inline float32 ldfl_be_p(const void *ptr)
+{
+ return *(float32 *)ptr;
+}
+
+static inline float64 ldfq_be_p(const void *ptr)
+{
+ return *(float64 *)ptr;
+}
+
+static inline void stfl_be_p(void *ptr, float32 v)
+{
+ *(float32 *)ptr = v;
+}
+
+static inline void stfq_be_p(void *ptr, float64 v)
+{
+ *(float64 *)ptr = v;
+}
+
+#endif
+
+/* target CPU memory access functions */
+#if defined(TARGET_WORDS_BIGENDIAN)
+#define lduw_p(p) lduw_be_p(p)
+#define ldsw_p(p) ldsw_be_p(p)
+#define ldl_p(p) ldl_be_p(p)
+#define ldq_p(p) ldq_be_p(p)
+#define ldfl_p(p) ldfl_be_p(p)
+#define ldfq_p(p) ldfq_be_p(p)
+#define stw_p(p, v) stw_be_p(p, v)
+#define stl_p(p, v) stl_be_p(p, v)
+#define stq_p(p, v) stq_be_p(p, v)
+#define stfl_p(p, v) stfl_be_p(p, v)
+#define stfq_p(p, v) stfq_be_p(p, v)
+#else
+#define lduw_p(p) lduw_le_p(p)
+#define ldsw_p(p) ldsw_le_p(p)
+#define ldl_p(p) ldl_le_p(p)
+#define ldq_p(p) ldq_le_p(p)
+#define ldfl_p(p) ldfl_le_p(p)
+#define ldfq_p(p) ldfq_le_p(p)
+#define stw_p(p, v) stw_le_p(p, v)
+#define stl_p(p, v) stl_le_p(p, v)
+#define stq_p(p, v) stq_le_p(p, v)
+#define stfl_p(p, v) stfl_le_p(p, v)
+#define stfq_p(p, v) stfq_le_p(p, v)
+#endif
+
+/* MMU memory access macros */
+
+#if defined(CONFIG_USER_ONLY)
+#include <assert.h>
+#include "qemu-types.h"
+
+/* On some host systems the guest address space is reserved on the host.
+ * This allows the guest address space to be offset to a convenient location.
+ */
+#if defined(CONFIG_USE_GUEST_BASE)
+extern uintptr_t guest_base;
+extern int have_guest_base;
+extern uintptr_t reserved_va;
+#define GUEST_BASE guest_base
+#define RESERVED_VA reserved_va
+#else
+#define GUEST_BASE 0ul
+#define RESERVED_VA 0ul
+#endif
+
+/* All direct uses of g2h and h2g need to go away for usermode softmmu. */
+#define g2h(x) ((void *)((uintptr_t)(x) + GUEST_BASE))
+
+#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS
+#define h2g_valid(x) 1
+#else
+#define h2g_valid(x) ({ \
+ uintptr_t __guest = (uintptr_t)(x) - GUEST_BASE; \
+ __guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS); \
+})
+#endif
+
+#define h2g(x) ({ \
+ uintptr_t __ret = (uintptr_t)(x) - GUEST_BASE; \
+ /* Check if given address fits target address space */ \
+ assert(h2g_valid(x)); \
+ (abi_ulong)__ret; \
+})
+
+#define saddr(x) g2h(x)
+#define laddr(x) g2h(x)
+
+#else /* !CONFIG_USER_ONLY */
+/* NOTE: we use double casts if pointers and target_ulong have
+ different sizes */
+#define saddr(x) (uint8_t *)(intptr_t)(x)
+#define laddr(x) (uint8_t *)(intptr_t)(x)
+#endif
+
+#define ldub_raw(p) ldub_p(laddr((p)))
+#define ldsb_raw(p) ldsb_p(laddr((p)))
+#define lduw_raw(p) lduw_p(laddr((p)))
+#define ldsw_raw(p) ldsw_p(laddr((p)))
+#define ldl_raw(p) ldl_p(laddr((p)))
+#define ldq_raw(p) ldq_p(laddr((p)))
+#define ldfl_raw(p) ldfl_p(laddr((p)))
+#define ldfq_raw(p) ldfq_p(laddr((p)))
+#define stb_raw(p, v) stb_p(saddr((p)), v)
+#define stw_raw(p, v) stw_p(saddr((p)), v)
+#define stl_raw(p, v) stl_p(saddr((p)), v)
+#define stq_raw(p, v) stq_p(saddr((p)), v)
+#define stfl_raw(p, v) stfl_p(saddr((p)), v)
+#define stfq_raw(p, v) stfq_p(saddr((p)), v)
+
+
+#if defined(CONFIG_USER_ONLY)
+
+/* if user mode, no other memory access functions */
+#define ldub(p) ldub_raw(p)
+#define ldsb(p) ldsb_raw(p)
+#define lduw(p) lduw_raw(p)
+#define ldsw(p) ldsw_raw(p)
+#define ldl(p) ldl_raw(p)
+#define ldq(p) ldq_raw(p)
+#define ldfl(p) ldfl_raw(p)
+#define ldfq(p) ldfq_raw(p)
+#define stb(p, v) stb_raw(p, v)
+#define stw(p, v) stw_raw(p, v)
+#define stl(p, v) stl_raw(p, v)
+#define stq(p, v) stq_raw(p, v)
+#define stfl(p, v) stfl_raw(p, v)
+#define stfq(p, v) stfq_raw(p, v)
+
+#define ldub_code(p) ldub_raw(p)
+#define ldsb_code(p) ldsb_raw(p)
+#define lduw_code(p) lduw_raw(p)
+#define ldsw_code(p) ldsw_raw(p)
+#define ldl_code(p) ldl_raw(p)
+#define ldq_code(p) ldq_raw(p)
+
+#define ldub_kernel(p) ldub_raw(p)
+#define ldsb_kernel(p) ldsb_raw(p)
+#define lduw_kernel(p) lduw_raw(p)
+#define ldsw_kernel(p) ldsw_raw(p)
+#define ldl_kernel(p) ldl_raw(p)
+#define ldq_kernel(p) ldq_raw(p)
+#define ldfl_kernel(p) ldfl_raw(p)
+#define ldfq_kernel(p) ldfq_raw(p)
+#define stb_kernel(p, v) stb_raw(p, v)
+#define stw_kernel(p, v) stw_raw(p, v)
+#define stl_kernel(p, v) stl_raw(p, v)
+#define stq_kernel(p, v) stq_raw(p, v)
+#define stfl_kernel(p, v) stfl_raw(p, v)
+#define stfq_kernel(p, vt) stfq_raw(p, v)
+
+#endif /* defined(CONFIG_USER_ONLY) */
+
+/* page related stuff */
+
+#define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS)
+#define TARGET_PAGE_MASK ~(TARGET_PAGE_SIZE - 1)
+#define TARGET_PAGE_ALIGN(addr) (((addr) + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK)
+
+/* ??? These should be the larger of uintptr_t and target_ulong. */
+extern size_t qemu_real_host_page_size;
+extern size_t qemu_host_page_bits;
+extern size_t qemu_host_page_size;
+extern uintptr_t qemu_host_page_mask;
+
+#define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask)
+
+/* same as PROT_xxx */
+#define PAGE_READ 0x0001
+#define PAGE_WRITE 0x0002
+#define PAGE_EXEC 0x0004
+#define PAGE_BITS (PAGE_READ | PAGE_WRITE | PAGE_EXEC)
+#define PAGE_VALID 0x0008
+/* original state of the write flag (used when tracking self-modifying
+ code */
+#define PAGE_WRITE_ORG 0x0010
+#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
+/* FIXME: Code that sets/uses this is broken and needs to go away. */
+#define PAGE_RESERVED 0x0020
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+void page_dump(FILE *f);
+
+typedef int (*walk_memory_regions_fn)(void *, abi_ulong,
+ abi_ulong, uintptr_t);
+int walk_memory_regions(void *, walk_memory_regions_fn);
+
+int page_get_flags(target_ulong address);
+void page_set_flags(target_ulong start, target_ulong end, int flags);
+int page_check_range(target_ulong start, target_ulong len, int flags);
+#endif
+
+CPUState *cpu_copy(CPUState *env);
+CPUState *qemu_get_cpu(int cpu);
+
+void cpu_dump_state(CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ int flags);
+void cpu_dump_statistics (CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ int flags);
+
+void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...)
+#ifndef VBOX
+ __attribute__ ((__format__ (__printf__, 2, 3)));
+#else /* VBOX */
+ ;
+#endif /* VBOX */
+extern CPUState *first_cpu;
+extern CPUState *cpu_single_env;
+
+#define CPU_INTERRUPT_HARD 0x02 /* hardware interrupt pending */
+#define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
+#define CPU_INTERRUPT_TIMER 0x08 /* internal timer exception pending */
+#define CPU_INTERRUPT_FIQ 0x10 /* Fast interrupt pending. */
+#define CPU_INTERRUPT_HALT 0x20 /* CPU halt wanted */
+#define CPU_INTERRUPT_SMI 0x40 /* (x86 only) SMI interrupt pending */
+#define CPU_INTERRUPT_DEBUG 0x80 /* Debug event occured. */
+#define CPU_INTERRUPT_VIRQ 0x100 /* virtual interrupt pending. */
+#define CPU_INTERRUPT_NMI 0x200 /* NMI pending. */
+#define CPU_INTERRUPT_INIT 0x400 /* INIT pending. */
+#define CPU_INTERRUPT_SIPI 0x800 /* SIPI pending. */
+#define CPU_INTERRUPT_MCE 0x1000 /* (x86 only) MCE pending. */
+
+#ifdef VBOX
+/** Executes a single instruction. cpu_exec() will normally return EXCP_SINGLE_INSTR. */
+# define CPU_INTERRUPT_SINGLE_INSTR 0x01000000
+/** Executing a CPU_INTERRUPT_SINGLE_INSTR request, quit the cpu_loop. (for exceptions and suchlike) */
+# define CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT 0x02000000
+/** VM execution was interrupted by VMR3Reset, VMR3Suspend or VMR3PowerOff. */
+# define CPU_INTERRUPT_RC 0x04000000
+/** Exit current TB to process an external request. */
+# define CPU_INTERRUPT_EXTERNAL_FLUSH_TLB 0x08000000
+/** Exit current TB to process an external request. */
+# define CPU_INTERRUPT_EXTERNAL_EXIT 0x10000000
+/** Exit current TB to process an external interrupt request. */
+# define CPU_INTERRUPT_EXTERNAL_HARD 0x20000000
+/** Exit current TB to process an external timer request. */
+# define CPU_INTERRUPT_EXTERNAL_TIMER 0x40000000
+/** Exit current TB to process an external DMA request. */
+# define CPU_INTERRUPT_EXTERNAL_DMA 0x80000000
+#endif /* VBOX */
+void cpu_interrupt(CPUState *s, int mask);
+void cpu_reset_interrupt(CPUState *env, int mask);
+
+void cpu_exit(CPUState *s);
+
+int qemu_cpu_has_work(CPUState *env);
+
+/* Breakpoint/watchpoint flags */
+#define BP_MEM_READ 0x01
+#define BP_MEM_WRITE 0x02
+#define BP_MEM_ACCESS (BP_MEM_READ | BP_MEM_WRITE)
+#define BP_STOP_BEFORE_ACCESS 0x04
+#define BP_WATCHPOINT_HIT 0x08
+#define BP_GDB 0x10
+#define BP_CPU 0x20
+
+int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
+ CPUBreakpoint **breakpoint);
+int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags);
+void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint);
+void cpu_breakpoint_remove_all(CPUState *env, int mask);
+int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
+ int flags, CPUWatchpoint **watchpoint);
+int cpu_watchpoint_remove(CPUState *env, target_ulong addr,
+ target_ulong len, int flags);
+void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint);
+void cpu_watchpoint_remove_all(CPUState *env, int mask);
+
+#define SSTEP_ENABLE 0x1 /* Enable simulated HW single stepping */
+#define SSTEP_NOIRQ 0x2 /* Do not use IRQ while single stepping */
+#define SSTEP_NOTIMER 0x4 /* Do not Timers while single stepping */
+
+void cpu_single_step(CPUState *env, int enabled);
+void cpu_reset(CPUState *s);
+int cpu_is_stopped(CPUState *env);
+void run_on_cpu(CPUState *env, void (*func)(void *data), void *data);
+
+#define CPU_LOG_TB_OUT_ASM (1 << 0)
+#define CPU_LOG_TB_IN_ASM (1 << 1)
+#define CPU_LOG_TB_OP (1 << 2)
+#define CPU_LOG_TB_OP_OPT (1 << 3)
+#define CPU_LOG_INT (1 << 4)
+#define CPU_LOG_EXEC (1 << 5)
+#define CPU_LOG_PCALL (1 << 6)
+#define CPU_LOG_IOPORT (1 << 7)
+#define CPU_LOG_TB_CPU (1 << 8)
+#define CPU_LOG_RESET (1 << 9)
+
+/* define log items */
+typedef struct CPULogItem {
+ int mask;
+ const char *name;
+ const char *help;
+} CPULogItem;
+
+extern const CPULogItem cpu_log_items[];
+
+void cpu_set_log(int log_flags);
+void cpu_set_log_filename(const char *filename);
+int cpu_str_to_log_mask(const char *str);
+
+#if !defined(CONFIG_USER_ONLY)
+
+/* Return the physical page corresponding to a virtual one. Use it
+ only for debugging because no protection checks are done. Return -1
+ if no page found. */
+target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr);
+
+/* memory API */
+
+#ifndef VBOX
+extern int phys_ram_fd;
+extern ram_addr_t ram_size;
+#endif /* !VBOX */
+
+typedef struct RAMBlock {
+ uint8_t *host;
+ ram_addr_t offset;
+ ram_addr_t length;
+ char idstr[256];
+ QLIST_ENTRY(RAMBlock) next;
+#if defined(__linux__) && !defined(TARGET_S390X)
+ int fd;
+#endif
+} RAMBlock;
+
+typedef struct RAMList {
+ uint8_t *phys_dirty;
+#ifdef VBOX
+ /** This is required for bounds checking the phys_ram_dirty accesses.
+ * We have memory ranges (the high PC-BIOS mapping) which causes some pages
+ * to fall outside the dirty map. */
+ RTGCPHYS phys_dirty_size;
+#if 1
+# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr,rv) \
+ do { \
+ if (RT_UNLIKELY( ((addr) >> TARGET_PAGE_BITS) >= ram_list.phys_dirty_size)) { \
+ Log(("%s: %RGp\n", __FUNCTION__, (RTGCPHYS)addr)); \
+ return (rv); \
+ } \
+ } while (0)
+# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr) \
+ do { \
+ if (RT_UNLIKELY( ((addr) >> TARGET_PAGE_BITS) >= ram_list.phys_dirty_size)) { \
+ Log(("%s: %RGp\n", __FUNCTION__, (RTGCPHYS)addr)); \
+ return; \
+ } \
+ } while (0)
+#else
+# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr,rv) \
+ AssertMsgReturn(((addr) >> TARGET_PAGE_BITS) < ram_list.phys_dirty_size, ("%#RGp\n", (RTGCPHYS)(addr)), (rv));
+# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr) \
+ AssertMsgReturnVoid(((addr) >> TARGET_PAGE_BITS) < ram_list.phys_dirty_size, ("%#RGp\n", (RTGCPHYS)(addr)));
+# endif
+#else
+# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr,rv) do {} while()
+# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr) do {} while()
+#endif /* VBOX */
+ QLIST_HEAD(ram, RAMBlock) blocks;
+} RAMList;
+extern RAMList ram_list;
+
+extern const char *mem_path;
+extern int mem_prealloc;
+
+/* physical memory access */
+
+/* MMIO pages are identified by a combination of an IO device index and
+ 3 flags. The ROMD code stores the page ram offset in iotlb entry,
+ so only a limited number of ids are avaiable. */
+
+#define IO_MEM_NB_ENTRIES (1 << (TARGET_PAGE_BITS - IO_MEM_SHIFT))
+
+/* Flags stored in the low bits of the TLB virtual address. These are
+ defined so that fast path ram access is all zeros. */
+/* Zero if TLB entry is valid. */
+#define TLB_INVALID_MASK (1 << 3)
+/* Set if TLB entry references a clean RAM page. The iotlb entry will
+ contain the page physical address. */
+#define TLB_NOTDIRTY (1 << 4)
+/* Set if TLB entry is an IO callback. */
+#define TLB_MMIO (1 << 5)
+
+#define VGA_DIRTY_FLAG 0x01
+#define CODE_DIRTY_FLAG 0x02
+#define MIGRATION_DIRTY_FLAG 0x08
+
+/* read dirty bit (return 0 or 1) */
+static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
+{
+ VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0);
+ return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+}
+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+ VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0xff);
+ return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
+}
+
+static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
+ int dirty_flags)
+{
+ VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0xff & dirty_flags);
+ return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
+}
+
+static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
+{
+ VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr);
+ ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+}
+
+static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
+ int dirty_flags)
+{
+ VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0xff);
+ return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
+}
+
+static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
+ int length,
+ int dirty_flags)
+{
+ int i, mask, len;
+ uint8_t *p;
+
+ VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(start);
+ len = length >> TARGET_PAGE_BITS;
+ mask = ~dirty_flags;
+ p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
+ for (i = 0; i < len; i++) {
+ p[i] &= mask;
+ }
+}
+
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
+ int dirty_flags);
+void cpu_tlb_update_dirty(CPUState *env);
+
+int cpu_physical_memory_set_dirty_tracking(int enable);
+
+int cpu_physical_memory_get_dirty_tracking(void);
+
+int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
+ target_phys_addr_t end_addr);
+
+void dump_exec_info(FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+#endif /* !CONFIG_USER_ONLY */
+
+int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
+ uint8_t *buf, int len, int is_write);
+
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+ uint64_t mcg_status, uint64_t addr, uint64_t misc);
+
+#ifdef VBOX
+void tb_invalidate_virt(CPUState *env, uint32_t eip);
+#endif /* VBOX */
+
+#endif /* CPU_ALL_H */
diff --git a/src/recompiler/cpu-common.h b/src/recompiler/cpu-common.h
new file mode 100644
index 00000000..b523d527
--- /dev/null
+++ b/src/recompiler/cpu-common.h
@@ -0,0 +1,135 @@
+#ifndef CPU_COMMON_H
+#define CPU_COMMON_H 1
+
+/* CPU interfaces that are target indpendent. */
+
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__ia64__)
+#define WORDS_ALIGNED
+#endif
+
+#ifdef TARGET_PHYS_ADDR_BITS
+#include "targphys.h"
+#endif
+
+#ifndef NEED_CPU_H
+#include "poison.h"
+#endif
+
+#include "bswap.h"
+#include "qemu-queue.h"
+
+#if !defined(CONFIG_USER_ONLY)
+
+/* address in the RAM (different from a physical address) */
+typedef uintptr_t ram_addr_t;
+
+/* memory API */
+
+typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
+typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
+
+void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
+ ram_addr_t size,
+ ram_addr_t phys_offset,
+ ram_addr_t region_offset);
+static inline void cpu_register_physical_memory(target_phys_addr_t start_addr,
+ ram_addr_t size,
+ ram_addr_t phys_offset)
+{
+ cpu_register_physical_memory_offset(start_addr, size, phys_offset, 0);
+}
+
+ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr);
+#ifndef VBOX
+ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
+ ram_addr_t size, void *host);
+ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size);
+void qemu_ram_free(ram_addr_t addr);
+/* This should only be used for ram local to a device. */
+void *qemu_get_ram_ptr(ram_addr_t addr);
+/* This should not be used by devices. */
+ram_addr_t qemu_ram_addr_from_host(void *ptr);
+#endif /* !VBOX */
+
+int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
+ CPUWriteMemoryFunc * const *mem_write,
+ void *opaque);
+void cpu_unregister_io_memory(int table_address);
+
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+ int len, int is_write);
+static inline void cpu_physical_memory_read(target_phys_addr_t addr,
+ uint8_t *buf, int len)
+{
+ cpu_physical_memory_rw(addr, buf, len, 0);
+}
+static inline void cpu_physical_memory_write(target_phys_addr_t addr,
+ const uint8_t *buf, int len)
+{
+ cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1);
+}
+void *cpu_physical_memory_map(target_phys_addr_t addr,
+ target_phys_addr_t *plen,
+ int is_write);
+void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
+ int is_write, target_phys_addr_t access_len);
+void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
+void cpu_unregister_map_client(void *cookie);
+
+struct CPUPhysMemoryClient;
+typedef struct CPUPhysMemoryClient CPUPhysMemoryClient;
+struct CPUPhysMemoryClient {
+ void (*set_memory)(struct CPUPhysMemoryClient *client,
+ target_phys_addr_t start_addr,
+ ram_addr_t size,
+ ram_addr_t phys_offset);
+ int (*sync_dirty_bitmap)(struct CPUPhysMemoryClient *client,
+ target_phys_addr_t start_addr,
+ target_phys_addr_t end_addr);
+ int (*migration_log)(struct CPUPhysMemoryClient *client,
+ int enable);
+ QLIST_ENTRY(CPUPhysMemoryClient) list;
+};
+
+void cpu_register_phys_memory_client(CPUPhysMemoryClient *);
+void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *);
+
+/* Coalesced MMIO regions are areas where write operations can be reordered.
+ * This usually implies that write operations are side-effect free. This allows
+ * batching which can make a major impact on performance when using
+ * virtualization.
+ */
+void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
+
+void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
+
+void qemu_flush_coalesced_mmio_buffer(void);
+
+uint32_t ldub_phys(target_phys_addr_t addr);
+uint32_t lduw_phys(target_phys_addr_t addr);
+uint32_t ldl_phys(target_phys_addr_t addr);
+uint64_t ldq_phys(target_phys_addr_t addr);
+void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
+void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
+void stb_phys(target_phys_addr_t addr, uint32_t val);
+void stw_phys(target_phys_addr_t addr, uint32_t val);
+void stl_phys(target_phys_addr_t addr, uint32_t val);
+void stq_phys(target_phys_addr_t addr, uint64_t val);
+
+void cpu_physical_memory_write_rom(target_phys_addr_t addr,
+ const uint8_t *buf, int len);
+
+#define IO_MEM_SHIFT 3
+
+#define IO_MEM_RAM (0 << IO_MEM_SHIFT) /* hardcoded offset */
+#define IO_MEM_ROM (1 << IO_MEM_SHIFT) /* hardcoded offset */
+#define IO_MEM_UNASSIGNED (2 << IO_MEM_SHIFT)
+#define IO_MEM_NOTDIRTY (3 << IO_MEM_SHIFT)
+
+/* Acts like a ROM when read and like a device when written. */
+#define IO_MEM_ROMD (1)
+#define IO_MEM_SUBPAGE (2)
+
+#endif
+
+#endif /* !CPU_COMMON_H */
diff --git a/src/recompiler/cpu-defs.h b/src/recompiler/cpu-defs.h
new file mode 100644
index 00000000..65d57063
--- /dev/null
+++ b/src/recompiler/cpu-defs.h
@@ -0,0 +1,235 @@
+/*
+ * common defines for all CPUs
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#ifndef CPU_DEFS_H
+#define CPU_DEFS_H
+
+#ifndef NEED_CPU_H
+#error cpu.h included from common code
+#endif
+
+#include "config.h"
+#include <setjmp.h>
+#include <inttypes.h>
+#ifndef VBOX
+#include <signal.h>
+#else /* VBOX */
+# define sig_atomic_t int32_t
+#endif /* VBOX */
+#include "osdep.h"
+#include "qemu-queue.h"
+#include "targphys.h"
+
+#ifndef TARGET_LONG_BITS
+#error TARGET_LONG_BITS must be defined before including this header
+#endif
+
+#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
+
+/* target_ulong is the type of a virtual address */
+#if TARGET_LONG_SIZE == 4
+typedef int32_t target_long;
+typedef uint32_t target_ulong;
+#define TARGET_FMT_lx "%08x"
+#define TARGET_FMT_ld "%d"
+#define TARGET_FMT_lu "%u"
+#elif TARGET_LONG_SIZE == 8
+typedef int64_t target_long;
+typedef uint64_t target_ulong;
+#define TARGET_FMT_lx "%016" PRIx64
+#define TARGET_FMT_ld "%" PRId64
+#define TARGET_FMT_lu "%" PRIu64
+#else
+#error TARGET_LONG_SIZE undefined
+#endif
+
+#define HOST_LONG_SIZE (HOST_LONG_BITS / 8)
+
+#define EXCP_INTERRUPT 0x10000 /* async interruption */
+#define EXCP_HLT 0x10001 /* hlt instruction reached */
+#define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */
+#define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */
+#ifdef VBOX
+# define EXCP_EXECUTE_RAW 0x11024 /**< execute raw mode. */
+# define EXCP_EXECUTE_HM 0x11025 /**< execute hardware accelerated raw mode. */
+# define EXCP_SINGLE_INSTR 0x11026 /**< executed single instruction. */
+# define EXCP_RC 0x11027 /**< a EM rc was raised (VMR3Reset/Suspend/PowerOff). */
+#endif /* VBOX */
+
+#define TB_JMP_CACHE_BITS 12
+#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
+
+/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
+ addresses on the same page. The top bits are the same. This allows
+ TLB invalidation to quickly clear a subset of the hash table. */
+#define TB_JMP_PAGE_BITS (TB_JMP_CACHE_BITS / 2)
+#define TB_JMP_PAGE_SIZE (1 << TB_JMP_PAGE_BITS)
+#define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1)
+#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
+
+#if !defined(CONFIG_USER_ONLY)
+#define CPU_TLB_BITS 8
+#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
+
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+#define CPU_TLB_ENTRY_BITS 4
+#else
+#define CPU_TLB_ENTRY_BITS 5
+#endif
+
+typedef struct CPUTLBEntry {
+ /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
+ bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not
+ go directly to ram.
+ bit 3 : indicates that the entry is invalid
+ bit 2..0 : zero
+ */
+ target_ulong addr_read;
+ target_ulong addr_write;
+ target_ulong addr_code;
+ /* Addend to virtual address to get host address. IO accesses
+ use the corresponding iotlb value. */
+ uintptr_t addend;
+ /* padding to get a power of two size */
+ uint8_t dummy[(1 << CPU_TLB_ENTRY_BITS) -
+ (sizeof(target_ulong) * 3 +
+ ((-sizeof(target_ulong) * 3) & (sizeof(uintptr_t) - 1)) +
+ sizeof(uintptr_t))];
+} CPUTLBEntry;
+
+extern int CPUTLBEntry_wrong_size[sizeof(CPUTLBEntry) == (1 << CPU_TLB_ENTRY_BITS) ? 1 : -1];
+
+#define CPU_COMMON_TLB \
+ /* The meaning of the MMU modes is defined in the target code. */ \
+ CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \
+ target_phys_addr_t iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \
+ target_ulong tlb_flush_addr; \
+ target_ulong tlb_flush_mask;
+
+#else
+
+#define CPU_COMMON_TLB
+
+#endif
+
+
+#ifdef HOST_WORDS_BIGENDIAN
+typedef struct icount_decr_u16 {
+ uint16_t high;
+ uint16_t low;
+} icount_decr_u16;
+#else
+typedef struct icount_decr_u16 {
+ uint16_t low;
+ uint16_t high;
+} icount_decr_u16;
+#endif
+
+struct kvm_run;
+struct KVMState;
+struct qemu_work_item;
+
+typedef struct CPUBreakpoint {
+ target_ulong pc;
+ int flags; /* BP_* */
+ QTAILQ_ENTRY(CPUBreakpoint) entry;
+} CPUBreakpoint;
+
+typedef struct CPUWatchpoint {
+ target_ulong vaddr;
+ target_ulong len_mask;
+ int flags; /* BP_* */
+ QTAILQ_ENTRY(CPUWatchpoint) entry;
+} CPUWatchpoint;
+
+#define CPU_TEMP_BUF_NLONGS 128
+#define CPU_COMMON \
+ struct TranslationBlock *current_tb; /* currently executing TB */ \
+ /* soft mmu support */ \
+ /* in order to avoid passing too many arguments to the MMIO \
+ helpers, we store some rarely used information in the CPU \
+ context) */ \
+ uintptr_t mem_io_pc; /* host pc at which the memory was \
+ accessed */ \
+ target_ulong mem_io_vaddr; /* target virtual addr at which the \
+ memory was accessed */ \
+ uint32_t halted; /* Nonzero if the CPU is in suspend state */ \
+ uint32_t interrupt_request; \
+ volatile sig_atomic_t exit_request; \
+ CPU_COMMON_TLB \
+ struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; \
+ /* buffer for temporaries in the code generator */ \
+ long temp_buf[CPU_TEMP_BUF_NLONGS]; \
+ \
+ int64_t icount_extra; /* Instructions until next timer event. */ \
+ /* Number of cycles left, with interrupt flag in high bit. \
+ This allows a single read-compare-cbranch-write sequence to test \
+ for both decrementer underflow and exceptions. */ \
+ union { \
+ uint32_t u32; \
+ icount_decr_u16 u16; \
+ } icount_decr; \
+ uint32_t can_do_io; /* nonzero if memory mapped IO is safe. */ \
+ \
+ /* from this point: preserved by CPU reset */ \
+ /* ice debug support */ \
+ QTAILQ_HEAD(breakpoints_head, CPUBreakpoint) breakpoints; \
+ int singlestep_enabled; \
+ \
+ QTAILQ_HEAD(watchpoints_head, CPUWatchpoint) watchpoints; \
+ CPUWatchpoint *watchpoint_hit; \
+ \
+ struct GDBRegisterState *gdb_regs; \
+ \
+ /* Core interrupt code */ \
+ jmp_buf jmp_env; \
+ int exception_index; \
+ \
+ CPUState *next_cpu; /* next CPU sharing TB cache */ \
+ int cpu_index; /* CPU index (informative) */ \
+ uint32_t host_tid; /* host thread ID */ \
+ int numa_node; /* NUMA node this cpu is belonging to */ \
+ int nr_cores; /* number of cores within this CPU package */ \
+ int nr_threads;/* number of threads within this CPU */ \
+ int running; /* Nonzero if cpu is currently running(usermode). */ \
+ /* user data */ \
+ void *opaque; \
+ \
+ uint32_t created; \
+ uint32_t stop; /* Stop request */ \
+ uint32_t stopped; /* Artificially stopped */ \
+ struct QemuThread *thread; \
+ struct QemuCond *halt_cond; \
+ struct qemu_work_item *queued_work_first, *queued_work_last; \
+ const char *cpu_model_str; \
+ struct KVMState *kvm_state; \
+ struct kvm_run *kvm_run; \
+ int kvm_fd; \
+ int kvm_vcpu_dirty;
+
+#endif
diff --git a/src/recompiler/cpu-exec.c b/src/recompiler/cpu-exec.c
new file mode 100644
index 00000000..4151363c
--- /dev/null
+++ b/src/recompiler/cpu-exec.c
@@ -0,0 +1,1455 @@
+/*
+ * i386 emulator main execution loop
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include "config.h"
+#include "exec.h"
+#include "disas.h"
+#include "tcg.h"
+#include "kvm.h"
+#include "qemu-barrier.h"
+
+#if !defined(CONFIG_SOFTMMU)
+#undef EAX
+#undef ECX
+#undef EDX
+#undef EBX
+#undef ESP
+#undef EBP
+#undef ESI
+#undef EDI
+#undef EIP
+#include <signal.h>
+#ifdef __linux__
+#include <sys/ucontext.h>
+#endif
+#endif
+
+#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
+// Work around ugly bugs in glibc that mangle global register contents
+#undef env
+#define env cpu_single_env
+#endif
+
+int tb_invalidated_flag;
+
+//#define CONFIG_DEBUG_EXEC
+//#define DEBUG_SIGNAL
+
+int qemu_cpu_has_work(CPUState *env)
+{
+ return cpu_has_work(env);
+}
+
+void cpu_loop_exit(void)
+{
+ env->current_tb = NULL;
+ longjmp(env->jmp_env, 1);
+}
+
+/* exit the current TB from a signal handler. The host registers are
+ restored in a state compatible with the CPU emulator
+ */
+void cpu_resume_from_signal(CPUState *env1, void *puc)
+{
+#if !defined(CONFIG_SOFTMMU)
+#ifdef __linux__
+ struct ucontext *uc = puc;
+#elif defined(__OpenBSD__)
+ struct sigcontext *uc = puc;
+#endif
+#endif
+
+ env = env1;
+
+ /* XXX: restore cpu registers saved in host registers */
+
+#if !defined(CONFIG_SOFTMMU)
+ if (puc) {
+ /* XXX: use siglongjmp ? */
+#ifdef __linux__
+#ifdef __ia64
+ sigprocmask(SIG_SETMASK, (sigset_t *)&uc->uc_sigmask, NULL);
+#else
+ sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
+#endif
+#elif defined(__OpenBSD__)
+ sigprocmask(SIG_SETMASK, &uc->sc_mask, NULL);
+#endif
+ }
+#endif
+ env->exception_index = -1;
+ longjmp(env->jmp_env, 1);
+}
+
+/* Execute the code without caching the generated code. An interpreter
+ could be used if available. */
+static void cpu_exec_nocache(int max_cycles, TranslationBlock *orig_tb)
+{
+ uintptr_t next_tb;
+ TranslationBlock *tb;
+
+ /* Should never happen.
+ We only end up here when an existing TB is too long. */
+ if (max_cycles > CF_COUNT_MASK)
+ max_cycles = CF_COUNT_MASK;
+
+ tb = tb_gen_code(env, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
+ max_cycles);
+ env->current_tb = tb;
+ /* execute the generated code */
+#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM)
+ tcg_qemu_tb_exec(tb->tc_ptr, next_tb);
+#else
+ next_tb = tcg_qemu_tb_exec(tb->tc_ptr);
+#endif
+ env->current_tb = NULL;
+
+ if ((next_tb & 3) == 2) {
+ /* Restore PC. This may happen if async event occurs before
+ the TB starts executing. */
+ cpu_pc_from_tb(env, tb);
+ }
+ tb_phys_invalidate(tb, -1);
+ tb_free(tb);
+}
+
+static TranslationBlock *tb_find_slow(target_ulong pc,
+ target_ulong cs_base,
+ uint64_t flags)
+{
+ TranslationBlock *tb, **ptb1;
+ unsigned int h;
+ tb_page_addr_t phys_pc, phys_page1, phys_page2;
+ target_ulong virt_page2;
+
+ tb_invalidated_flag = 0;
+
+ /* find translated block using physical mappings */
+ phys_pc = get_page_addr_code(env, pc);
+ phys_page1 = phys_pc & TARGET_PAGE_MASK;
+ phys_page2 = -1;
+ h = tb_phys_hash_func(phys_pc);
+ ptb1 = &tb_phys_hash[h];
+ for(;;) {
+ tb = *ptb1;
+ if (!tb)
+ goto not_found;
+ if (tb->pc == pc &&
+ tb->page_addr[0] == phys_page1 &&
+ tb->cs_base == cs_base &&
+ tb->flags == flags) {
+ /* check next page if needed */
+ if (tb->page_addr[1] != -1) {
+ virt_page2 = (pc & TARGET_PAGE_MASK) +
+ TARGET_PAGE_SIZE;
+ phys_page2 = get_page_addr_code(env, virt_page2);
+ if (tb->page_addr[1] == phys_page2)
+ goto found;
+ } else {
+ goto found;
+ }
+ }
+ ptb1 = &tb->phys_hash_next;
+ }
+ not_found:
+ /* if no translated code available, then translate it now */
+ tb = tb_gen_code(env, pc, cs_base, flags, 0);
+
+ found:
+ /* we add the TB in the virtual pc hash table */
+ env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
+ return tb;
+}
+
+static inline TranslationBlock *tb_find_fast(void)
+{
+ TranslationBlock *tb;
+ target_ulong cs_base, pc;
+ int flags;
+
+ /* we record a subset of the CPU state. It will
+ always be the same before a given translated block
+ is executed. */
+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+ tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+ if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
+ tb->flags != flags)) {
+ tb = tb_find_slow(pc, cs_base, flags);
+ }
+ return tb;
+}
+
+static CPUDebugExcpHandler *debug_excp_handler;
+
+CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
+{
+ CPUDebugExcpHandler *old_handler = debug_excp_handler;
+
+ debug_excp_handler = handler;
+ return old_handler;
+}
+
+static void cpu_handle_debug_exception(CPUState *env)
+{
+ CPUWatchpoint *wp;
+
+ if (!env->watchpoint_hit)
+ QTAILQ_FOREACH(wp, &env->watchpoints, entry)
+ wp->flags &= ~BP_WATCHPOINT_HIT;
+
+ if (debug_excp_handler)
+ debug_excp_handler(env);
+}
+
+/* main execution loop */
+
+volatile sig_atomic_t exit_request;
+
+int cpu_exec(CPUState *env1)
+{
+ volatile host_reg_t saved_env_reg;
+ int ret VBOX_ONLY(= 0), interrupt_request;
+ TranslationBlock *tb;
+ uint8_t *tc_ptr;
+ uintptr_t next_tb;
+
+# ifndef VBOX
+ if (cpu_halted(env1) == EXCP_HALTED)
+ return EXCP_HALTED;
+# endif /* !VBOX */
+
+ cpu_single_env = env1;
+
+ /* the access to env below is actually saving the global register's
+ value, so that files not including target-xyz/exec.h are free to
+ use it. */
+ QEMU_BUILD_BUG_ON (sizeof (saved_env_reg) != sizeof (env));
+ saved_env_reg = (host_reg_t) env;
+ barrier();
+ env = env1;
+
+ if (unlikely(exit_request)) {
+ env->exit_request = 1;
+ }
+
+#if defined(TARGET_I386)
+ if (!kvm_enabled()) {
+ /* put eflags in CPU temporary format */
+ CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+ DF = 1 - (2 * ((env->eflags >> 10) & 1));
+ CC_OP = CC_OP_EFLAGS;
+ env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+ }
+#elif defined(TARGET_SPARC)
+#elif defined(TARGET_M68K)
+ env->cc_op = CC_OP_FLAGS;
+ env->cc_dest = env->sr & 0xf;
+ env->cc_x = (env->sr >> 4) & 1;
+#elif defined(TARGET_ALPHA)
+#elif defined(TARGET_ARM)
+#elif defined(TARGET_PPC)
+#elif defined(TARGET_MICROBLAZE)
+#elif defined(TARGET_MIPS)
+#elif defined(TARGET_SH4)
+#elif defined(TARGET_CRIS)
+#elif defined(TARGET_S390X)
+ /* XXXXX */
+#else
+#error unsupported target CPU
+#endif
+#ifndef VBOX /* VBOX: We need to raise traps and suchlike from the outside. */
+ env->exception_index = -1;
+#endif /* !VBOX */
+
+ /* prepare setjmp context for exception handling */
+ for(;;) {
+ if (setjmp(env->jmp_env) == 0) {
+#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
+#undef env
+ env = cpu_single_env;
+#define env cpu_single_env
+#endif
+#ifdef VBOX
+ env->current_tb = NULL; /* probably not needed, but whatever... */
+
+ /*
+ * Check for fatal errors first
+ */
+ if (env->interrupt_request & CPU_INTERRUPT_RC) {
+ env->exception_index = EXCP_RC;
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_RC);
+ ret = env->exception_index;
+ cpu_loop_exit();
+ }
+#endif
+
+ /* if an exception is pending, we execute it here */
+ if (env->exception_index >= 0) {
+ if (env->exception_index >= EXCP_INTERRUPT) {
+ /* exit request from the cpu execution loop */
+ ret = env->exception_index;
+#ifdef VBOX /* because of the above stuff */
+ env->exception_index = -1;
+#endif
+ if (ret == EXCP_DEBUG)
+ cpu_handle_debug_exception(env);
+ break;
+ } else {
+#if defined(CONFIG_USER_ONLY)
+ /* if user mode only, we simulate a fake exception
+ which will be handled outside the cpu execution
+ loop */
+#if defined(TARGET_I386)
+ do_interrupt_user(env->exception_index,
+ env->exception_is_int,
+ env->error_code,
+ env->exception_next_eip);
+ /* successfully delivered */
+ env->old_exception = -1;
+#endif
+ ret = env->exception_index;
+ break;
+#else
+#if defined(TARGET_I386)
+ /* simulate a real cpu exception. On i386, it can
+ trigger new exceptions, but we do not handle
+ double or triple faults yet. */
+# ifdef VBOX
+ RAWEx_ProfileStart(env, STATS_IRQ_HANDLING);
+ Log(("do_interrupt: vec=%#x int=%d pc=%04x:%RGv\n", env->exception_index, env->exception_is_int,
+ env->segs[R_CS].selector, (RTGCPTR)env->exception_next_eip));
+# endif /* VBOX */
+ do_interrupt(env->exception_index,
+ env->exception_is_int && env->exception_is_int != EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ,
+ env->error_code,
+ env->exception_next_eip,
+ env->exception_is_int == EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ);
+ /* successfully delivered */
+ env->old_exception = -1;
+# ifdef VBOX
+ RAWEx_ProfileStop(env, STATS_IRQ_HANDLING);
+# endif /* VBOX */
+#elif defined(TARGET_PPC)
+ do_interrupt(env);
+#elif defined(TARGET_MICROBLAZE)
+ do_interrupt(env);
+#elif defined(TARGET_MIPS)
+ do_interrupt(env);
+#elif defined(TARGET_SPARC)
+ do_interrupt(env);
+#elif defined(TARGET_ARM)
+ do_interrupt(env);
+#elif defined(TARGET_SH4)
+ do_interrupt(env);
+#elif defined(TARGET_ALPHA)
+ do_interrupt(env);
+#elif defined(TARGET_CRIS)
+ do_interrupt(env);
+#elif defined(TARGET_M68K)
+ do_interrupt(0);
+#endif
+ env->exception_index = -1;
+#endif
+ }
+ }
+
+# ifndef VBOX
+ if (kvm_enabled()) {
+ kvm_cpu_exec(env);
+ longjmp(env->jmp_env, 1);
+ }
+# endif /* !VBOX */
+
+ next_tb = 0; /* force lookup of first TB */
+ for(;;) {
+ interrupt_request = env->interrupt_request;
+ if (unlikely(interrupt_request)) {
+ if (unlikely(env->singlestep_enabled & SSTEP_NOIRQ)) {
+ /* Mask out external interrupts for this step. */
+ interrupt_request &= ~(CPU_INTERRUPT_HARD |
+ CPU_INTERRUPT_FIQ |
+ CPU_INTERRUPT_SMI |
+ CPU_INTERRUPT_NMI);
+ }
+ if (interrupt_request & CPU_INTERRUPT_DEBUG) {
+ env->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
+ env->exception_index = EXCP_DEBUG;
+ cpu_loop_exit();
+ }
+#if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
+ defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
+ defined(TARGET_MICROBLAZE)
+ if (interrupt_request & CPU_INTERRUPT_HALT) {
+ env->interrupt_request &= ~CPU_INTERRUPT_HALT;
+ env->halted = 1;
+ env->exception_index = EXCP_HLT;
+ cpu_loop_exit();
+ }
+#endif
+#if defined(TARGET_I386)
+# ifdef VBOX
+ /* Memory registration may post a tlb flush request, process it ASAP. */
+ if (interrupt_request & (CPU_INTERRUPT_EXTERNAL_FLUSH_TLB)) {
+ tlb_flush(env, true); /* (clears the flush flag) */
+ }
+
+ /* Single instruction exec request, we execute it and return (one way or the other).
+ The caller will always reschedule after doing this operation! */
+ if (interrupt_request & CPU_INTERRUPT_SINGLE_INSTR)
+ {
+ /* not in flight are we? (if we are, we trapped) */
+ if (!(env->interrupt_request & CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT))
+ {
+ ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request, CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT);
+ env->exception_index = EXCP_SINGLE_INSTR;
+ if (emulate_single_instr(env) == -1)
+ AssertMsgFailed(("REM: emulate_single_instr failed for EIP=%RGv!!\n", (RTGCPTR)env->eip));
+
+ /* When we receive an external interrupt during execution of this single
+ instruction, then we should stay here. We will leave when we're ready
+ for raw-mode or when interrupted by pending EMT requests. */
+ interrupt_request = env->interrupt_request; /* reload this! */
+ if ( !(interrupt_request & CPU_INTERRUPT_HARD)
+ || !(env->eflags & IF_MASK)
+ || (env->hflags & HF_INHIBIT_IRQ_MASK)
+ || (env->state & CPU_RAW_HM)
+ )
+ {
+ env->exception_index = ret = EXCP_SINGLE_INSTR;
+ cpu_loop_exit();
+ }
+ }
+ /* Clear CPU_INTERRUPT_SINGLE_INSTR and leave CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT set. */
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_SINGLE_INSTR);
+ }
+# endif /* VBOX */
+
+# ifndef VBOX /** @todo reconcile our code with the following... */
+ if (interrupt_request & CPU_INTERRUPT_INIT) {
+ svm_check_intercept(SVM_EXIT_INIT);
+ do_cpu_init(env);
+ env->exception_index = EXCP_HALTED;
+ cpu_loop_exit();
+ } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
+ do_cpu_sipi(env);
+ } else if (env->hflags2 & HF2_GIF_MASK) {
+ if ((interrupt_request & CPU_INTERRUPT_SMI) &&
+ !(env->hflags & HF_SMM_MASK)) {
+ svm_check_intercept(SVM_EXIT_SMI);
+ env->interrupt_request &= ~CPU_INTERRUPT_SMI;
+ do_smm_enter();
+ next_tb = 0;
+ } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
+ !(env->hflags2 & HF2_NMI_MASK)) {
+ env->interrupt_request &= ~CPU_INTERRUPT_NMI;
+ env->hflags2 |= HF2_NMI_MASK;
+ do_interrupt(EXCP02_NMI, 0, 0, 0, 1);
+ next_tb = 0;
+ } else if (interrupt_request & CPU_INTERRUPT_MCE) {
+ env->interrupt_request &= ~CPU_INTERRUPT_MCE;
+ do_interrupt(EXCP12_MCHK, 0, 0, 0, 0);
+ next_tb = 0;
+ } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+ (((env->hflags2 & HF2_VINTR_MASK) &&
+ (env->hflags2 & HF2_HIF_MASK)) ||
+ (!(env->hflags2 & HF2_VINTR_MASK) &&
+ (env->eflags & IF_MASK &&
+ !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
+ int intno;
+ svm_check_intercept(SVM_EXIT_INTR);
+ env->interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_VIRQ);
+ intno = cpu_get_pic_interrupt(env);
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing hardware INT=0x%02x\n", intno);
+#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
+#undef env
+ env = cpu_single_env;
+#define env cpu_single_env
+#endif
+ do_interrupt(intno, 0, 0, 0, 1);
+ /* ensure that no TB jump will be modified as
+ the program flow was changed */
+ next_tb = 0;
+#if !defined(CONFIG_USER_ONLY)
+ } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) &&
+ (env->eflags & IF_MASK) &&
+ !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
+ int intno;
+ /* FIXME: this should respect TPR */
+ svm_check_intercept(SVM_EXIT_VINTR);
+ intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno);
+ do_interrupt(intno, 0, 0, 0, 1);
+ env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+ next_tb = 0;
+#endif
+ }
+ }
+# else /* VBOX */
+ RAWEx_ProfileStart(env, STATS_IRQ_HANDLING);
+ if ((interrupt_request & CPU_INTERRUPT_SMI) &&
+ !(env->hflags & HF_SMM_MASK)) {
+ env->interrupt_request &= ~CPU_INTERRUPT_SMI;
+ do_smm_enter();
+ next_tb = 0;
+ }
+ else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+ (env->eflags & IF_MASK) &&
+ !(env->hflags & HF_INHIBIT_IRQ_MASK))
+ {
+ /* if hardware interrupt pending, we execute it */
+ int intno;
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_HARD);
+ intno = cpu_get_pic_interrupt(env);
+ if (intno >= 0)
+ {
+ Log(("do_interrupt %d\n", intno));
+ do_interrupt(intno, 0, 0, 0, 1);
+ }
+ /* ensure that no TB jump will be modified as
+ the program flow was changed */
+ next_tb = 0;
+ }
+# endif /* VBOX */
+#elif defined(TARGET_PPC)
+#if 0
+ if ((interrupt_request & CPU_INTERRUPT_RESET)) {
+ cpu_reset(env);
+ }
+#endif
+ if (interrupt_request & CPU_INTERRUPT_HARD) {
+ ppc_hw_interrupt(env);
+ if (env->pending_interrupts == 0)
+ env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+ next_tb = 0;
+ }
+#elif defined(TARGET_MICROBLAZE)
+ if ((interrupt_request & CPU_INTERRUPT_HARD)
+ && (env->sregs[SR_MSR] & MSR_IE)
+ && !(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP))
+ && !(env->iflags & (D_FLAG | IMM_FLAG))) {
+ env->exception_index = EXCP_IRQ;
+ do_interrupt(env);
+ next_tb = 0;
+ }
+#elif defined(TARGET_MIPS)
+ if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+ (env->CP0_Status & env->CP0_Cause & CP0Ca_IP_mask) &&
+ (env->CP0_Status & (1 << CP0St_IE)) &&
+ !(env->CP0_Status & (1 << CP0St_EXL)) &&
+ !(env->CP0_Status & (1 << CP0St_ERL)) &&
+ !(env->hflags & MIPS_HFLAG_DM)) {
+ /* Raise it */
+ env->exception_index = EXCP_EXT_INTERRUPT;
+ env->error_code = 0;
+ do_interrupt(env);
+ next_tb = 0;
+ }
+#elif defined(TARGET_SPARC)
+ if (interrupt_request & CPU_INTERRUPT_HARD) {
+ if (cpu_interrupts_enabled(env) &&
+ env->interrupt_index > 0) {
+ int pil = env->interrupt_index & 0xf;
+ int type = env->interrupt_index & 0xf0;
+
+ if (((type == TT_EXTINT) &&
+ cpu_pil_allowed(env, pil)) ||
+ type != TT_EXTINT) {
+ env->exception_index = env->interrupt_index;
+ do_interrupt(env);
+ next_tb = 0;
+ }
+ }
+ } else if (interrupt_request & CPU_INTERRUPT_TIMER) {
+ //do_interrupt(0, 0, 0, 0, 0);
+ env->interrupt_request &= ~CPU_INTERRUPT_TIMER;
+ }
+#elif defined(TARGET_ARM)
+ if (interrupt_request & CPU_INTERRUPT_FIQ
+ && !(env->uncached_cpsr & CPSR_F)) {
+ env->exception_index = EXCP_FIQ;
+ do_interrupt(env);
+ next_tb = 0;
+ }
+ /* ARMv7-M interrupt return works by loading a magic value
+ into the PC. On real hardware the load causes the
+ return to occur. The qemu implementation performs the
+ jump normally, then does the exception return when the
+ CPU tries to execute code at the magic address.
+ This will cause the magic PC value to be pushed to
+ the stack if an interrupt occured at the wrong time.
+ We avoid this by disabling interrupts when
+ pc contains a magic address. */
+ if (interrupt_request & CPU_INTERRUPT_HARD
+ && ((IS_M(env) && env->regs[15] < 0xfffffff0)
+ || !(env->uncached_cpsr & CPSR_I))) {
+ env->exception_index = EXCP_IRQ;
+ do_interrupt(env);
+ next_tb = 0;
+ }
+#elif defined(TARGET_SH4)
+ if (interrupt_request & CPU_INTERRUPT_HARD) {
+ do_interrupt(env);
+ next_tb = 0;
+ }
+#elif defined(TARGET_ALPHA)
+ if (interrupt_request & CPU_INTERRUPT_HARD) {
+ do_interrupt(env);
+ next_tb = 0;
+ }
+#elif defined(TARGET_CRIS)
+ if (interrupt_request & CPU_INTERRUPT_HARD
+ && (env->pregs[PR_CCS] & I_FLAG)
+ && !env->locked_irq) {
+ env->exception_index = EXCP_IRQ;
+ do_interrupt(env);
+ next_tb = 0;
+ }
+ if (interrupt_request & CPU_INTERRUPT_NMI
+ && (env->pregs[PR_CCS] & M_FLAG)) {
+ env->exception_index = EXCP_NMI;
+ do_interrupt(env);
+ next_tb = 0;
+ }
+#elif defined(TARGET_M68K)
+ if (interrupt_request & CPU_INTERRUPT_HARD
+ && ((env->sr & SR_I) >> SR_I_SHIFT)
+ < env->pending_level) {
+ /* Real hardware gets the interrupt vector via an
+ IACK cycle at this point. Current emulated
+ hardware doesn't rely on this, so we
+ provide/save the vector when the interrupt is
+ first signalled. */
+ env->exception_index = env->pending_vector;
+ do_interrupt(1);
+ next_tb = 0;
+ }
+#endif
+ /* Don't use the cached interupt_request value,
+ do_interrupt may have updated the EXITTB flag. */
+ if (env->interrupt_request & CPU_INTERRUPT_EXITTB) {
+#ifndef VBOX
+ env->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
+#else /* VBOX */
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_EXITTB);
+#endif /* VBOX */
+ /* ensure that no TB jump will be modified as
+ the program flow was changed */
+ next_tb = 0;
+ }
+#ifdef VBOX
+ RAWEx_ProfileStop(env, STATS_IRQ_HANDLING);
+ if (interrupt_request & CPU_INTERRUPT_RC) {
+ env->exception_index = EXCP_RC;
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_RC);
+ ret = env->exception_index;
+ cpu_loop_exit();
+ }
+ if (interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT)) {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~(CPU_INTERRUPT_EXTERNAL_EXIT));
+ env->exit_request = 1;
+ }
+#endif
+ }
+ if (unlikely(env->exit_request)) {
+ env->exit_request = 0;
+ env->exception_index = EXCP_INTERRUPT;
+ cpu_loop_exit();
+ }
+
+#ifdef VBOX
+ /*
+ * Check if we the CPU state allows us to execute the code in raw-mode.
+ */
+ RAWEx_ProfileStart(env, STATS_RAW_CHECK);
+ if (remR3CanExecuteRaw(env,
+ env->eip + env->segs[R_CS].base,
+ env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK)),
+ &env->exception_index))
+ {
+ RAWEx_ProfileStop(env, STATS_RAW_CHECK);
+ ret = env->exception_index;
+ cpu_loop_exit();
+ }
+ RAWEx_ProfileStop(env, STATS_RAW_CHECK);
+#endif /* VBOX */
+
+#if defined(DEBUG_DISAS) || defined(CONFIG_DEBUG_EXEC)
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
+ /* restore flags in standard format */
+#if defined(TARGET_I386)
+ env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+ log_cpu_state(env, X86_DUMP_CCOP);
+ env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+#elif defined(TARGET_M68K)
+ cpu_m68k_flush_flags(env, env->cc_op);
+ env->cc_op = CC_OP_FLAGS;
+ env->sr = (env->sr & 0xffe0)
+ | env->cc_dest | (env->cc_x << 4);
+ log_cpu_state(env, 0);
+#else
+ log_cpu_state(env, 0);
+#endif
+ }
+#endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
+#ifdef VBOX
+ RAWEx_ProfileStart(env, STATS_TLB_LOOKUP);
+#endif /*VBOX*/
+ spin_lock(&tb_lock);
+ tb = tb_find_fast();
+ /* Note: we do it here to avoid a gcc bug on Mac OS X when
+ doing it in tb_find_slow */
+ if (tb_invalidated_flag) {
+ /* as some TB could have been invalidated because
+ of memory exceptions while generating the code, we
+ must recompute the hash index here */
+ next_tb = 0;
+ tb_invalidated_flag = 0;
+ }
+#ifdef CONFIG_DEBUG_EXEC
+ qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n",
+ (void *)tb->tc_ptr, tb->pc,
+ lookup_symbol(tb->pc));
+#endif
+ /* see if we can patch the calling TB. When the TB
+ spans two pages, we cannot safely do a direct
+ jump. */
+#ifndef VBOX
+ if (next_tb != 0 && tb->page_addr[1] == -1) {
+#else /* VBOX */
+ if (next_tb != 0 && !(tb->cflags & CF_RAW_MODE) && tb->page_addr[1] == -1) {
+#endif /* VBOX */
+ tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
+ }
+ spin_unlock(&tb_lock);
+#ifdef VBOX
+ RAWEx_ProfileStop(env, STATS_TLB_LOOKUP);
+#endif
+
+ /* cpu_interrupt might be called while translating the
+ TB, but before it is linked into a potentially
+ infinite loop and becomes env->current_tb. Avoid
+ starting execution if there is a pending interrupt. */
+ env->current_tb = tb;
+ barrier();
+ if (likely(!env->exit_request)) {
+ tc_ptr = tb->tc_ptr;
+ /* execute the generated code */
+#ifdef VBOX
+ RAWEx_ProfileStart(env, STATS_QEMU_RUN_EMULATED_CODE);
+#endif
+#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
+#undef env
+ env = cpu_single_env;
+#define env cpu_single_env
+#endif
+ Log5(("REM: tb=%p tc_ptr=%p %04x:%08RGv\n", tb, tc_ptr, env->segs[R_CS].selector, (RTGCPTR)env->eip));
+#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM)
+ tcg_qemu_tb_exec(tc_ptr, next_tb);
+#else
+ next_tb = tcg_qemu_tb_exec(tc_ptr);
+#endif
+ if (next_tb)
+ Log5(("REM: next_tb=%p %04x:%08RGv\n", next_tb, env->segs[R_CS].selector, (RTGCPTR)env->eip));
+#ifdef VBOX
+ RAWEx_ProfileStop(env, STATS_QEMU_RUN_EMULATED_CODE);
+#endif
+ if ((next_tb & 3) == 2) {
+ /* Instruction counter expired. */
+ int insns_left;
+ tb = (TranslationBlock *)(uintptr_t)(next_tb & ~3);
+ /* Restore PC. */
+ cpu_pc_from_tb(env, tb);
+ insns_left = env->icount_decr.u32;
+ if (env->icount_extra && insns_left >= 0) {
+ /* Refill decrementer and continue execution. */
+ env->icount_extra += insns_left;
+ if (env->icount_extra > 0xffff) {
+ insns_left = 0xffff;
+ } else {
+ insns_left = env->icount_extra;
+ }
+ env->icount_extra -= insns_left;
+ env->icount_decr.u16.low = insns_left;
+ } else {
+ if (insns_left > 0) {
+ /* Execute remaining instructions. */
+ cpu_exec_nocache(insns_left, tb);
+ }
+ env->exception_index = EXCP_INTERRUPT;
+ next_tb = 0;
+ cpu_loop_exit();
+ }
+ }
+ }
+ env->current_tb = NULL;
+ /* reset soft MMU for next block (it can currently
+ only be set by a memory fault) */
+ } /* for(;;) */
+ }
+#ifdef VBOX_HIGH_RES_TIMERS_HACK
+ /* NULL the current_tb here so cpu_interrupt() doesn't do anything
+ unnecessary (like crashing during emulate single instruction).
+ Note! Don't use env1->pVM here, the code wouldn't run with
+ gcc-4.4/amd64 anymore, see #3883. */
+ env->current_tb = NULL;
+ if ( !(env->interrupt_request & ( CPU_INTERRUPT_DEBUG | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_RC
+ | CPU_INTERRUPT_SINGLE_INSTR | CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT))
+ && ( (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_TIMER)
+ || TMTimerPollBool(env->pVM, env->pVCpu)) ) {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_TIMER);
+ remR3ProfileStart(STATS_QEMU_RUN_TIMERS);
+ TMR3TimerQueuesDo(env->pVM);
+ remR3ProfileStop(STATS_QEMU_RUN_TIMERS);
+ }
+#endif
+ } /* for(;;) */
+
+
+#if defined(TARGET_I386)
+ /* restore flags in standard format */
+ env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+#elif defined(TARGET_ARM)
+ /* XXX: Save/restore host fpu exception state?. */
+#elif defined(TARGET_SPARC)
+#elif defined(TARGET_PPC)
+#elif defined(TARGET_M68K)
+ cpu_m68k_flush_flags(env, env->cc_op);
+ env->cc_op = CC_OP_FLAGS;
+ env->sr = (env->sr & 0xffe0)
+ | env->cc_dest | (env->cc_x << 4);
+#elif defined(TARGET_MICROBLAZE)
+#elif defined(TARGET_MIPS)
+#elif defined(TARGET_SH4)
+#elif defined(TARGET_ALPHA)
+#elif defined(TARGET_CRIS)
+#elif defined(TARGET_S390X)
+ /* XXXXX */
+#else
+#error unsupported target CPU
+#endif
+
+ /* restore global registers */
+ barrier();
+ env = (void *) saved_env_reg;
+
+# ifndef VBOX /* we might be using elsewhere, we only have one. */
+ /* fail safe : never use cpu_single_env outside cpu_exec() */
+ cpu_single_env = NULL;
+# endif
+ return ret;
+}
+
+/* must only be called from the generated code as an exception can be
+ generated */
+void tb_invalidate_page_range(target_ulong start, target_ulong end)
+{
+ /* XXX: cannot enable it yet because it yields to MMU exception
+ where NIP != read address on PowerPC */
+#if 0
+ target_ulong phys_addr;
+ phys_addr = get_phys_addr_code(env, start);
+ tb_invalidate_phys_page_range(phys_addr, phys_addr + end - start, 0);
+#endif
+}
+
+#if defined(TARGET_I386) && defined(CONFIG_USER_ONLY)
+
+void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector)
+{
+ CPUX86State *saved_env;
+
+ saved_env = env;
+ env = s;
+ if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+ selector &= 0xffff;
+ cpu_x86_load_seg_cache(env, seg_reg, selector,
+ (selector << 4), 0xffff, 0);
+ } else {
+ helper_load_seg(seg_reg, selector);
+ }
+ env = saved_env;
+}
+
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32)
+{
+ CPUX86State *saved_env;
+
+ saved_env = env;
+ env = s;
+
+ helper_fsave(ptr, data32);
+
+ env = saved_env;
+}
+
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32)
+{
+ CPUX86State *saved_env;
+
+ saved_env = env;
+ env = s;
+
+ helper_frstor(ptr, data32);
+
+ env = saved_env;
+}
+
+#endif /* TARGET_I386 */
+
+#if !defined(CONFIG_SOFTMMU)
+
+#if defined(TARGET_I386)
+#define EXCEPTION_ACTION raise_exception_err(env->exception_index, env->error_code)
+#else
+#define EXCEPTION_ACTION cpu_loop_exit()
+#endif
+
+/* 'pc' is the host PC at which the exception was raised. 'address' is
+ the effective address of the memory exception. 'is_write' is 1 if a
+ write caused the exception and otherwise 0'. 'old_set' is the
+ signal set which should be restored */
+static inline int handle_cpu_signal(uintptr_t pc, uintptr_t address,
+ int is_write, sigset_t *old_set,
+ void *puc)
+{
+ TranslationBlock *tb;
+ int ret;
+
+ if (cpu_single_env)
+ env = cpu_single_env; /* XXX: find a correct solution for multithread */
+#if defined(DEBUG_SIGNAL)
+ qemu_printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
+ pc, address, is_write, *(unsigned long *)old_set);
+#endif
+ /* XXX: locking issue */
+ if (is_write && page_unprotect(h2g(address), pc, puc)) {
+ return 1;
+ }
+
+ /* see if it is an MMU fault */
+ ret = cpu_handle_mmu_fault(env, address, is_write, MMU_USER_IDX, 0);
+ if (ret < 0)
+ return 0; /* not an MMU fault */
+ if (ret == 0)
+ return 1; /* the MMU fault was handled without causing real CPU fault */
+ /* now we have a real cpu fault */
+ tb = tb_find_pc(pc);
+ if (tb) {
+ /* the PC is inside the translated code. It means that we have
+ a virtual CPU fault */
+ cpu_restore_state(tb, env, pc, puc);
+ }
+
+ /* we restore the process signal mask as the sigreturn should
+ do it (XXX: use sigsetjmp) */
+ sigprocmask(SIG_SETMASK, old_set, NULL);
+ EXCEPTION_ACTION;
+
+ /* never comes here */
+ return 1;
+}
+
+#if defined(__i386__)
+
+#if defined(__APPLE__)
+# include <sys/ucontext.h>
+
+# define EIP_sig(context) (*((unsigned long*)&(context)->uc_mcontext->ss.eip))
+# define TRAP_sig(context) ((context)->uc_mcontext->es.trapno)
+# define ERROR_sig(context) ((context)->uc_mcontext->es.err)
+# define MASK_sig(context) ((context)->uc_sigmask)
+#elif defined (__NetBSD__)
+# include <ucontext.h>
+
+# define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP])
+# define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
+# define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR])
+# define MASK_sig(context) ((context)->uc_sigmask)
+#elif defined (__FreeBSD__) || defined(__DragonFly__)
+# include <ucontext.h>
+
+# define EIP_sig(context) (*((unsigned long*)&(context)->uc_mcontext.mc_eip))
+# define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno)
+# define ERROR_sig(context) ((context)->uc_mcontext.mc_err)
+# define MASK_sig(context) ((context)->uc_sigmask)
+#elif defined(__OpenBSD__)
+# define EIP_sig(context) ((context)->sc_eip)
+# define TRAP_sig(context) ((context)->sc_trapno)
+# define ERROR_sig(context) ((context)->sc_err)
+# define MASK_sig(context) ((context)->sc_mask)
+#else
+# define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP])
+# define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO])
+# define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR])
+# define MASK_sig(context) ((context)->uc_sigmask)
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+#if defined(__NetBSD__) || defined (__FreeBSD__) || defined(__DragonFly__)
+ ucontext_t *uc = puc;
+#elif defined(__OpenBSD__)
+ struct sigcontext *uc = puc;
+#else
+ struct ucontext *uc = puc;
+#endif
+ uintptr_t pc;
+ int trapno;
+
+#ifndef REG_EIP
+/* for glibc 2.1 */
+#define REG_EIP EIP
+#define REG_ERR ERR
+#define REG_TRAPNO TRAPNO
+#endif
+ pc = EIP_sig(uc);
+ trapno = TRAP_sig(uc);
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ trapno == 0xe ?
+ (ERROR_sig(uc) >> 1) & 1 : 0,
+ &MASK_sig(uc), puc);
+}
+
+#elif defined(__x86_64__)
+
+#ifdef __NetBSD__
+#define PC_sig(context) _UC_MACHINE_PC(context)
+#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
+#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR])
+#define MASK_sig(context) ((context)->uc_sigmask)
+#elif defined(__OpenBSD__)
+#define PC_sig(context) ((context)->sc_rip)
+#define TRAP_sig(context) ((context)->sc_trapno)
+#define ERROR_sig(context) ((context)->sc_err)
+#define MASK_sig(context) ((context)->sc_mask)
+#elif defined (__FreeBSD__) || defined(__DragonFly__)
+#include <ucontext.h>
+
+#define PC_sig(context) (*((unsigned long*)&(context)->uc_mcontext.mc_rip))
+#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno)
+#define ERROR_sig(context) ((context)->uc_mcontext.mc_err)
+#define MASK_sig(context) ((context)->uc_sigmask)
+#else
+#define PC_sig(context) ((context)->uc_mcontext.gregs[REG_RIP])
+#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO])
+#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR])
+#define MASK_sig(context) ((context)->uc_sigmask)
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+ uintptr_t pc;
+#if defined(__NetBSD__) || defined (__FreeBSD__) || defined(__DragonFly__)
+ ucontext_t *uc = puc;
+#elif defined(__OpenBSD__)
+ struct sigcontext *uc = puc;
+#else
+ struct ucontext *uc = puc;
+#endif
+
+ pc = PC_sig(uc);
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ TRAP_sig(uc) == 0xe ?
+ (ERROR_sig(uc) >> 1) & 1 : 0,
+ &MASK_sig(uc), puc);
+}
+
+#elif defined(_ARCH_PPC)
+
+/***********************************************************************
+ * signal context platform-specific definitions
+ * From Wine
+ */
+#ifdef linux
+/* All Registers access - only for local access */
+# define REG_sig(reg_name, context) ((context)->uc_mcontext.regs->reg_name)
+/* Gpr Registers access */
+# define GPR_sig(reg_num, context) REG_sig(gpr[reg_num], context)
+# define IAR_sig(context) REG_sig(nip, context) /* Program counter */
+# define MSR_sig(context) REG_sig(msr, context) /* Machine State Register (Supervisor) */
+# define CTR_sig(context) REG_sig(ctr, context) /* Count register */
+# define XER_sig(context) REG_sig(xer, context) /* User's integer exception register */
+# define LR_sig(context) REG_sig(link, context) /* Link register */
+# define CR_sig(context) REG_sig(ccr, context) /* Condition register */
+/* Float Registers access */
+# define FLOAT_sig(reg_num, context) (((double*)((char*)((context)->uc_mcontext.regs+48*4)))[reg_num])
+# define FPSCR_sig(context) (*(int*)((char*)((context)->uc_mcontext.regs+(48+32*2)*4)))
+/* Exception Registers access */
+# define DAR_sig(context) REG_sig(dar, context)
+# define DSISR_sig(context) REG_sig(dsisr, context)
+# define TRAP_sig(context) REG_sig(trap, context)
+#endif /* linux */
+
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <ucontext.h>
+# define IAR_sig(context) ((context)->uc_mcontext.mc_srr0)
+# define MSR_sig(context) ((context)->uc_mcontext.mc_srr1)
+# define CTR_sig(context) ((context)->uc_mcontext.mc_ctr)
+# define XER_sig(context) ((context)->uc_mcontext.mc_xer)
+# define LR_sig(context) ((context)->uc_mcontext.mc_lr)
+# define CR_sig(context) ((context)->uc_mcontext.mc_cr)
+/* Exception Registers access */
+# define DAR_sig(context) ((context)->uc_mcontext.mc_dar)
+# define DSISR_sig(context) ((context)->uc_mcontext.mc_dsisr)
+# define TRAP_sig(context) ((context)->uc_mcontext.mc_exc)
+#endif /* __FreeBSD__|| __FreeBSD_kernel__ */
+
+#ifdef __APPLE__
+# include <sys/ucontext.h>
+typedef struct ucontext SIGCONTEXT;
+/* All Registers access - only for local access */
+# define REG_sig(reg_name, context) ((context)->uc_mcontext->ss.reg_name)
+# define FLOATREG_sig(reg_name, context) ((context)->uc_mcontext->fs.reg_name)
+# define EXCEPREG_sig(reg_name, context) ((context)->uc_mcontext->es.reg_name)
+# define VECREG_sig(reg_name, context) ((context)->uc_mcontext->vs.reg_name)
+/* Gpr Registers access */
+# define GPR_sig(reg_num, context) REG_sig(r##reg_num, context)
+# define IAR_sig(context) REG_sig(srr0, context) /* Program counter */
+# define MSR_sig(context) REG_sig(srr1, context) /* Machine State Register (Supervisor) */
+# define CTR_sig(context) REG_sig(ctr, context)
+# define XER_sig(context) REG_sig(xer, context) /* Link register */
+# define LR_sig(context) REG_sig(lr, context) /* User's integer exception register */
+# define CR_sig(context) REG_sig(cr, context) /* Condition register */
+/* Float Registers access */
+# define FLOAT_sig(reg_num, context) FLOATREG_sig(fpregs[reg_num], context)
+# define FPSCR_sig(context) ((double)FLOATREG_sig(fpscr, context))
+/* Exception Registers access */
+# define DAR_sig(context) EXCEPREG_sig(dar, context) /* Fault registers for coredump */
+# define DSISR_sig(context) EXCEPREG_sig(dsisr, context)
+# define TRAP_sig(context) EXCEPREG_sig(exception, context) /* number of powerpc exception taken */
+#endif /* __APPLE__ */
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+ ucontext_t *uc = puc;
+#else
+ struct ucontext *uc = puc;
+#endif
+ uintptr_t pc;
+ int is_write;
+
+ pc = IAR_sig(uc);
+ is_write = 0;
+#if 0
+ /* ppc 4xx case */
+ if (DSISR_sig(uc) & 0x00800000)
+ is_write = 1;
+#else
+ if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000))
+ is_write = 1;
+#endif
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write, &uc->uc_sigmask, puc);
+}
+
+#elif defined(__alpha__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+ struct ucontext *uc = puc;
+ uint32_t *pc = uc->uc_mcontext.sc_pc;
+ uint32_t insn = *pc;
+ int is_write = 0;
+
+ /* XXX: need kernel patch to get write flag faster */
+ switch (insn >> 26) {
+ case 0x0d: // stw
+ case 0x0e: // stb
+ case 0x0f: // stq_u
+ case 0x24: // stf
+ case 0x25: // stg
+ case 0x26: // sts
+ case 0x27: // stt
+ case 0x2c: // stl
+ case 0x2d: // stq
+ case 0x2e: // stl_c
+ case 0x2f: // stq_c
+ is_write = 1;
+ }
+
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write, &uc->uc_sigmask, puc);
+}
+#elif defined(__sparc__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+ int is_write;
+ uint32_t insn;
+#if !defined(__arch64__) || defined(CONFIG_SOLARIS)
+ uint32_t *regs = (uint32_t *)(info + 1);
+ void *sigmask = (regs + 20);
+ /* XXX: is there a standard glibc define ? */
+ uintptr_t pc = regs[1];
+#else
+#ifdef __linux__
+ struct sigcontext *sc = puc;
+ uintptr_t pc = sc->sigc_regs.tpc;
+ void *sigmask = (void *)sc->sigc_mask;
+#elif defined(__OpenBSD__)
+ struct sigcontext *uc = puc;
+ uintptr_t pc = uc->sc_pc;
+ void *sigmask = (void *)(uintptr_t)uc->sc_mask;
+#endif
+#endif
+
+ /* XXX: need kernel patch to get write flag faster */
+ is_write = 0;
+ insn = *(uint32_t *)pc;
+ if ((insn >> 30) == 3) {
+ switch((insn >> 19) & 0x3f) {
+ case 0x05: // stb
+ case 0x15: // stba
+ case 0x06: // sth
+ case 0x16: // stha
+ case 0x04: // st
+ case 0x14: // sta
+ case 0x07: // std
+ case 0x17: // stda
+ case 0x0e: // stx
+ case 0x1e: // stxa
+ case 0x24: // stf
+ case 0x34: // stfa
+ case 0x27: // stdf
+ case 0x37: // stdfa
+ case 0x26: // stqf
+ case 0x36: // stqfa
+ case 0x25: // stfsr
+ case 0x3c: // casa
+ case 0x3e: // casxa
+ is_write = 1;
+ break;
+ }
+ }
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write, sigmask, NULL);
+}
+
+#elif defined(__arm__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+ struct ucontext *uc = puc;
+ uintptr_t pc;
+ int is_write;
+
+#if (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3))
+ pc = uc->uc_mcontext.gregs[R15];
+#else
+ pc = uc->uc_mcontext.arm_pc;
+#endif
+ /* XXX: compute is_write */
+ is_write = 0;
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write,
+ &uc->uc_sigmask, puc);
+}
+
+#elif defined(__mc68000)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+ struct ucontext *uc = puc;
+ uintptr_t pc;
+ int is_write;
+
+ pc = uc->uc_mcontext.gregs[16];
+ /* XXX: compute is_write */
+ is_write = 0;
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write,
+ &uc->uc_sigmask, puc);
+}
+
+#elif defined(__ia64)
+
+#ifndef __ISR_VALID
+ /* This ought to be in <bits/siginfo.h>... */
+# define __ISR_VALID 1
+#endif
+
+int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
+{
+ siginfo_t *info = pinfo;
+ struct ucontext *uc = puc;
+ uintptr_t ip;
+ int is_write = 0;
+
+ ip = uc->uc_mcontext.sc_ip;
+ switch (host_signum) {
+ case SIGILL:
+ case SIGFPE:
+ case SIGSEGV:
+ case SIGBUS:
+ case SIGTRAP:
+ if (info->si_code && (info->si_segvflags & __ISR_VALID))
+ /* ISR.W (write-access) is bit 33: */
+ is_write = (info->si_isr >> 33) & 1;
+ break;
+
+ default:
+ break;
+ }
+ return handle_cpu_signal(ip, (uintptr_t)info->si_addr,
+ is_write,
+ (sigset_t *)&uc->uc_sigmask, puc);
+}
+
+#elif defined(__s390__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+ struct ucontext *uc = puc;
+ uintptr_t pc;
+ uint16_t *pinsn;
+ int is_write = 0;
+
+ pc = uc->uc_mcontext.psw.addr;
+
+ /* ??? On linux, the non-rt signal handler has 4 (!) arguments instead
+ of the normal 2 arguments. The 3rd argument contains the "int_code"
+ from the hardware which does in fact contain the is_write value.
+ The rt signal handler, as far as I can tell, does not give this value
+ at all. Not that we could get to it from here even if it were. */
+ /* ??? This is not even close to complete, since it ignores all
+ of the read-modify-write instructions. */
+ pinsn = (uint16_t *)pc;
+ switch (pinsn[0] >> 8) {
+ case 0x50: /* ST */
+ case 0x42: /* STC */
+ case 0x40: /* STH */
+ is_write = 1;
+ break;
+ case 0xc4: /* RIL format insns */
+ switch (pinsn[0] & 0xf) {
+ case 0xf: /* STRL */
+ case 0xb: /* STGRL */
+ case 0x7: /* STHRL */
+ is_write = 1;
+ }
+ break;
+ case 0xe3: /* RXY format insns */
+ switch (pinsn[2] & 0xff) {
+ case 0x50: /* STY */
+ case 0x24: /* STG */
+ case 0x72: /* STCY */
+ case 0x70: /* STHY */
+ case 0x8e: /* STPQ */
+ case 0x3f: /* STRVH */
+ case 0x3e: /* STRV */
+ case 0x2f: /* STRVG */
+ is_write = 1;
+ }
+ break;
+ }
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write, &uc->uc_sigmask, puc);
+}
+
+#elif defined(__mips__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ siginfo_t *info = pinfo;
+ struct ucontext *uc = puc;
+ greg_t pc = uc->uc_mcontext.pc;
+ int is_write;
+
+ /* XXX: compute is_write */
+ is_write = 0;
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write, &uc->uc_sigmask, puc);
+}
+
+#elif defined(__hppa__)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+ void *puc)
+{
+ struct siginfo *info = pinfo;
+ struct ucontext *uc = puc;
+ uintptr_t pc = uc->uc_mcontext.sc_iaoq[0];
+ uint32_t insn = *(uint32_t *)pc;
+ int is_write = 0;
+
+ /* XXX: need kernel patch to get write flag faster. */
+ switch (insn >> 26) {
+ case 0x1a: /* STW */
+ case 0x19: /* STH */
+ case 0x18: /* STB */
+ case 0x1b: /* STWM */
+ is_write = 1;
+ break;
+
+ case 0x09: /* CSTWX, FSTWX, FSTWS */
+ case 0x0b: /* CSTDX, FSTDX, FSTDS */
+ /* Distinguish from coprocessor load ... */
+ is_write = (insn >> 9) & 1;
+ break;
+
+ case 0x03:
+ switch ((insn >> 6) & 15) {
+ case 0xa: /* STWS */
+ case 0x9: /* STHS */
+ case 0x8: /* STBS */
+ case 0xe: /* STWAS */
+ case 0xc: /* STBYS */
+ is_write = 1;
+ }
+ break;
+ }
+
+ return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
+ is_write, &uc->uc_sigmask, puc);
+}
+
+#else
+
+#error host CPU specific signal handler needed
+
+#endif
+
+#endif /* !defined(CONFIG_SOFTMMU) */
diff --git a/src/recompiler/cutils.c b/src/recompiler/cutils.c
new file mode 100644
index 00000000..5fbf70bd
--- /dev/null
+++ b/src/recompiler/cutils.c
@@ -0,0 +1,752 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "host-utils.h"
+
+#ifdef VBOX
+# include "osdep.h"
+
+
+static inline int toupper(int ch) {
+ if ( (unsigned int)(ch - 'a') < 26u )
+ ch += 'A' - 'a';
+ return ch;
+}
+
+/* Quick sort from OpenSolaris:
+ http://src.opensolaris.org/source/raw/onnv/onnv-gate/usr/src/common/util/qsort.c */
+/*
+ * choose a median of 3 values
+ *
+ * note: cstyle specifically prohibits nested conditional operators
+ * but this is the only way to do the median of 3 function in-line
+ */
+#define med3(a, b, c) (cmp((a), (b)) < 0) \
+ ? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \
+ : ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a))
+
+#define THRESH_L 5 /* threshold for insertion sort */
+#define THRESH_M3 20 /* threshold for median of 3 */
+#define THRESH_M9 50 /* threshold for median of 9 */
+
+typedef struct {
+ char *b_lim;
+ size_t nrec;
+} stk_t;
+
+/*
+ * The following swap functions should not create a stack frame
+ * the SPARC call / return instruction will be executed
+ * but the a save / restore will not be executed
+ * which means we won't do a window turn with the spill / fill overhead
+ * verify this by examining the assembly code
+ */
+
+/* ARGSUSED */
+static void
+swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
+{
+ uint32_t temp;
+
+ temp = *r1;
+ *r1++ = *r2;
+ *r2++ = temp;
+}
+
+/* ARGSUSED */
+static void
+swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
+{
+ uint64_t temp;
+
+ temp = *r1;
+ *r1++ = *r2;
+ *r2++ = temp;
+}
+
+static void
+swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
+{
+ uint32_t temp;
+
+ /* character by character */
+ while (cnt--) {
+ temp = *r1;
+ *r1++ = *r2;
+ *r2++ = temp;
+ }
+}
+
+static void
+swapb(char *r1, char *r2, size_t cnt)
+{
+ char temp;
+
+ /* character by character */
+ while (cnt--) {
+ temp = *r1;
+ *r1++ = *r2;
+ *r2++ = temp;
+ }
+}
+
+/*
+ * qsort() is a general purpose, in-place sorting routine using a
+ * user provided call back function for comparisons. This implementation
+ * utilizes a ternary quicksort algorithm, and cuts over to an
+ * insertion sort for partitions involving fewer than THRESH_L records.
+ *
+ * Potential User Errors
+ * There is no return value from qsort, this function has no method
+ * of alerting the user that a sort did not work or could not work.
+ * We do not print an error message or exit the process or thread,
+ * Even if we can detect an error, We CANNOT silently return without
+ * sorting the data, if we did so the user could never be sure the
+ * sort completed successfully.
+ * It is possible we could change the return value of sort from void
+ * to int and return success or some error codes, but this gets into
+ * standards and compatibility issues.
+ *
+ * Examples of qsort parameter errors might be
+ * 1) record size (rsiz) equal to 0
+ * qsort will loop and never return.
+ * 2) record size (rsiz) less than 0
+ * rsiz is unsigned, so a negative value is insanely large
+ * 3) number of records (nrec) is 0
+ * This is legal - qsort will return without examining any records
+ * 4) number of records (nrec) is less than 0
+ * nrec is unsigned, so a negative value is insanely large.
+ * 5) nrec * rsiz > memory allocation for sort array
+ * a segment violation may occur
+ * corruption of other memory may occur
+ * 6) The base address of the sort array is invalid
+ * a segment violation may occur
+ * corruption of other memory may occur
+ * 7) The user call back function is invalid
+ * we may get alignment errors or segment violations
+ * we may jump into never-never land
+ *
+ * Some less obvious errors might be
+ * 8) The user compare function is not comparing correctly
+ * 9) The user compare function modifies the data records
+ */
+
+void
+qemu_qsort(
+ void *basep,
+ size_t nrec,
+ size_t rsiz,
+ int (*cmp)(const void *, const void *))
+{
+ size_t i; /* temporary variable */
+
+ /* variables used by swap */
+ void (*swapf)(char *, char *, size_t);
+ size_t loops;
+
+ /* variables used by sort */
+ stk_t stack[8 * sizeof (nrec) + 1];
+ stk_t *sp;
+ char *b_lim; /* bottom limit */
+ char *b_dup; /* bottom duplicate */
+ char *b_par; /* bottom partition */
+ char *t_lim; /* top limit */
+ char *t_dup; /* top duplicate */
+ char *t_par; /* top partition */
+ char *m1, *m2, *m3; /* median pointers */
+ uintptr_t d_bytelength; /* byte length of duplicate records */
+ int b_nrec;
+ int t_nrec;
+ int cv; /* results of compare (bottom / top) */
+
+ /*
+ * choose a swap function based on alignment and size
+ *
+ * The qsort function sorts an array of fixed length records.
+ * We have very limited knowledge about the data record itself.
+ * It may be that the data record is in the array we are sorting
+ * or it may be that the array contains pointers or indexes to
+ * the actual data record and all that we are sorting is the indexes.
+ *
+ * The following decision will choose an optimal swap function
+ * based on the size and alignment of the data records
+ * swapp64 will swap 64 bit pointers
+ * swapp32 will swap 32 bit pointers
+ * swapi will swap an array of 32 bit integers
+ * swapb will swap an array of 8 bit characters
+ *
+ * swapi and swapb will also require the variable loops to be set
+ * to control the length of the array being swapped
+ */
+ if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
+ (rsiz == sizeof (uint64_t))) {
+ loops = 1;
+ swapf = (void (*)(char *, char *, size_t))swapp64;
+ } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
+ (rsiz == sizeof (uint32_t))) {
+ loops = 1;
+ swapf = (void (*)(char *, char *, size_t))swapp32;
+ } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
+ ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
+ loops = rsiz / sizeof (int);
+ swapf = (void (*)(char *, char *, size_t))swapi;
+ } else {
+ loops = rsiz;
+ swapf = swapb;
+ }
+
+ /*
+ * qsort is a partitioning sort
+ *
+ * the stack is the bookkeeping mechanism to keep track of all
+ * the partitions.
+ *
+ * each sort pass takes one partition and sorts it into two partitions.
+ * at the top of the loop we simply take the partition on the top
+ * of the stack and sort it. See the comments at the bottom
+ * of the loop regarding which partitions to add in what order.
+ *
+ * initially put the whole partition on the stack
+ */
+ sp = stack;
+ sp->b_lim = (char *)basep;
+ sp->nrec = nrec;
+ sp++;
+ while (sp > stack) {
+ sp--;
+ b_lim = sp->b_lim;
+ nrec = sp->nrec;
+
+ /*
+ * a linear insertion sort i faster than a qsort for
+ * very small number of records (THRESH_L)
+ *
+ * if number records < threshold use linear insertion sort
+ *
+ * this also handles the special case where the partition
+ * 0 or 1 records length.
+ */
+ if (nrec < THRESH_L) {
+ /*
+ * Linear insertion sort
+ */
+ t_par = b_lim;
+ for (i = 1; i < nrec; i++) {
+ t_par += rsiz;
+ b_par = t_par;
+ while (b_par > b_lim) {
+ b_par -= rsiz;
+ if ((*cmp)(b_par, b_par + rsiz) <= 0) {
+ break;
+ }
+ (*swapf)(b_par, b_par + rsiz, loops);
+ }
+ }
+
+ /*
+ * a linear insertion sort will put all records
+ * in their final position and will not create
+ * subpartitions.
+ *
+ * therefore when the insertion sort is complete
+ * just go to the top of the loop and get the
+ * next partition to sort.
+ */
+ continue;
+ }
+
+ /* quicksort */
+
+ /*
+ * choose a pivot record
+ *
+ * Ideally the pivot record will divide the partition
+ * into two equal parts. however we have to balance the
+ * work involved in selecting the pivot record with the
+ * expected benefit.
+ *
+ * The choice of pivot record depends on the number of
+ * records in the partition
+ *
+ * for small partitions (nrec < THRESH_M3)
+ * we just select the record in the middle of the partition
+ *
+ * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
+ * we select three values and choose the median of 3
+ *
+ * if (nrec >= THRESH_M9)
+ * then we use an approximate median of 9
+ * 9 records are selected and grouped in 3 groups of 3
+ * the median of each of these 3 groups is fed into another
+ * median of 3 decision.
+ *
+ * Each median of 3 decision is 2 or 3 compares,
+ * so median of 9 costs between 8 and 12 compares.
+ *
+ * i is byte distance between two consecutive samples
+ * m2 will point to the pivot record
+ */
+ if (nrec < THRESH_M3) {
+ m2 = b_lim + (nrec / 2) * rsiz;
+ } else if (nrec < THRESH_M9) {
+ /* use median of 3 */
+ i = ((nrec - 1) / 2) * rsiz;
+ m2 = med3(b_lim, b_lim + i, b_lim + 2 * i);
+ } else {
+ /* approx median of 9 */
+ i = ((nrec - 1) / 8) * rsiz;
+ m1 = med3(b_lim, b_lim + i, b_lim + 2 * i);
+ m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i);
+ m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i);
+ m2 = med3(m1, m2, m3);
+ }
+
+ /*
+ * quick sort partitioning
+ *
+ * The partition limits are defined by bottom and top pointers
+ * b_lim and t_lim.
+ *
+ * qsort uses a fairly standard method of moving the
+ * partitioning pointers, b_par and t_par, to the middle of
+ * the partition and exchanging records that are in the
+ * wrong part of the partition.
+ *
+ * Two enhancements have been made to the basic algorithm.
+ * One for handling duplicate records and one to minimize
+ * the number of swaps.
+ *
+ * Two duplicate records pointers are (b_dup and t_dup) are
+ * initially set to b_lim and t_lim. Each time a record
+ * whose sort key value is equal to the pivot record is found
+ * it will be swapped with the record pointed to by
+ * b_dup or t_dup and the duplicate pointer will be
+ * incremented toward the center.
+ * When partitioning is complete, all the duplicate records
+ * will have been collected at the upper and lower limits of
+ * the partition and can easily be moved adjacent to the
+ * pivot record.
+ *
+ * The second optimization is to minimize the number of swaps.
+ * The pointer m2 points to the pivot record.
+ * During partitioning, if m2 is ever equal to the partitioning
+ * pointers, b_par or t_par, then b_par or t_par just moves
+ * onto the next record without doing a compare.
+ * If as a result of duplicate record detection,
+ * b_dup or t_dup is ever equal to m2,
+ * then m2 is changed to point to the duplicate record and
+ * b_dup or t_dup is incremented with out swapping records.
+ *
+ * When partitioning is done, we may not have the same pivot
+ * record that we started with, but we will have one with
+ * an equal sort key.
+ */
+ b_dup = b_par = b_lim;
+ t_dup = t_par = t_lim = b_lim + rsiz * (nrec - 1);
+ for (;;) {
+
+ /* move bottom pointer up */
+ for (; b_par <= t_par; b_par += rsiz) {
+ if (b_par == m2) {
+ continue;
+ }
+ cv = cmp(b_par, m2);
+ if (cv > 0) {
+ break;
+ }
+ if (cv == 0) {
+ if (b_dup == m2) {
+ m2 = b_par;
+ } else if (b_dup != b_par) {
+ (*swapf)(b_dup, b_par, loops);
+ }
+ b_dup += rsiz;
+ }
+ }
+
+ /* move top pointer down */
+ for (; b_par < t_par; t_par -= rsiz) {
+ if (t_par == m2) {
+ continue;
+ }
+ cv = cmp(t_par, m2);
+ if (cv < 0) {
+ break;
+ }
+ if (cv == 0) {
+ if (t_dup == m2) {
+ m2 = t_par;
+ } else if (t_dup != t_par) {
+ (*swapf)(t_dup, t_par, loops);
+ }
+ t_dup -= rsiz;
+ }
+ }
+
+ /* break if we are done partitioning */
+ if (b_par >= t_par) {
+ break;
+ }
+
+ /* exchange records at upper and lower break points */
+ (*swapf)(b_par, t_par, loops);
+ b_par += rsiz;
+ t_par -= rsiz;
+ }
+
+ /*
+ * partitioning is now complete
+ *
+ * there are two termination conditions from the partitioning
+ * loop above. Either b_par or t_par have crossed or
+ * they are equal.
+ *
+ * we need to swap the pivot record to its final position
+ * m2 could be in either the upper or lower partitions
+ * or it could already be in its final position
+ */
+ /*
+ * R[b_par] > R[m2]
+ * R[t_par] < R[m2]
+ */
+ if (t_par < b_par) {
+ if (m2 < t_par) {
+ (*swapf)(m2, t_par, loops);
+ m2 = b_par = t_par;
+ } else if (m2 > b_par) {
+ (*swapf)(m2, b_par, loops);
+ m2 = t_par = b_par;
+ } else {
+ b_par = t_par = m2;
+ }
+ } else {
+ if (m2 < t_par) {
+ t_par = b_par = t_par - rsiz;
+ }
+ if (m2 != b_par) {
+ (*swapf)(m2, b_par, loops);
+ }
+ m2 = t_par;
+ }
+
+ /*
+ * move bottom duplicates next to pivot
+ * optimized to eliminate overlap
+ */
+ d_bytelength = b_dup - b_lim;
+ if (b_par - b_dup < d_bytelength) {
+ b_dup = b_lim + (b_par - b_dup);
+ }
+ while (b_dup > b_lim) {
+ b_dup -= rsiz;
+ b_par -= rsiz;
+ (*swapf)(b_dup, b_par, loops);
+ }
+ b_par = m2 - d_bytelength;
+
+ /*
+ * move top duplicates next to pivot
+ */
+ d_bytelength = t_lim - t_dup;
+ if (t_dup - t_par < d_bytelength) {
+ t_dup = t_lim - (t_dup - t_par);
+ }
+ while (t_dup < t_lim) {
+ t_dup += rsiz;
+ t_par += rsiz;
+ (*swapf)(t_dup, t_par, loops);
+ }
+ t_par = m2 + d_bytelength;
+
+ /*
+ * when a qsort pass completes there are three partitions
+ * 1) the lower contains all records less than pivot
+ * 2) the upper contains all records greater than pivot
+ * 3) the pivot partition contains all record equal to pivot
+ *
+ * all records in the pivot partition are in their final
+ * position and do not need to be accounted for by the stack
+ *
+ * when adding partitions to the stack
+ * it is important to add the largest partition first
+ * to prevent stack overflow.
+ *
+ * calculate number of unsorted records in top and bottom
+ * push resulting partitions on stack
+ */
+ b_nrec = (b_par - b_lim) / rsiz;
+ t_nrec = (t_lim - t_par) / rsiz;
+ if (b_nrec < t_nrec) {
+ sp->b_lim = t_par + rsiz;
+ sp->nrec = t_nrec;
+ sp++;
+ sp->b_lim = b_lim;
+ sp->nrec = b_nrec;
+ sp++;
+ } else {
+ sp->b_lim = b_lim;
+ sp->nrec = b_nrec;
+ sp++;
+ sp->b_lim = t_par + rsiz;
+ sp->nrec = t_nrec;
+ sp++;
+ }
+ }
+}
+
+#endif /* VBOX */
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+ int c;
+ char *q = buf;
+
+ if (buf_size <= 0)
+ return;
+
+ for(;;) {
+ c = *str++;
+ if (c == 0 || q >= buf + buf_size - 1)
+ break;
+ *q++ = c;
+ }
+ *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+ int len;
+ len = strlen(buf);
+ if (len < buf_size)
+ pstrcpy(buf + len, buf_size - len, s);
+ return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (*p != *q)
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+int stristart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (qemu_toupper(*p) != qemu_toupper(*q))
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+/* XXX: use host strnlen if available ? */
+int qemu_strnlen(const char *s, int max_len)
+{
+ int i;
+
+ for(i = 0; i < max_len; i++) {
+ if (s[i] == '\0') {
+ break;
+ }
+ }
+ return i;
+}
+
+#ifndef VBOX
+time_t mktimegm(struct tm *tm)
+{
+ time_t t;
+ int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday;
+ if (m < 3) {
+ m += 12;
+ y--;
+ }
+ t = 86400 * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 +
+ y / 400 - 719469);
+ t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
+ return t;
+}
+#endif /* !VBOX */
+
+int qemu_fls(int i)
+{
+ return 32 - clz32(i);
+}
+
+#ifndef VBOX
+
+/*
+ * Make sure data goes on disk, but if possible do not bother to
+ * write out the inode just for timestamp updates.
+ *
+ * Unfortunately even in 2009 many operating systems do not support
+ * fdatasync and have to fall back to fsync.
+ */
+int qemu_fdatasync(int fd)
+{
+#ifdef CONFIG_FDATASYNC
+ return fdatasync(fd);
+#else
+ return fsync(fd);
+#endif
+}
+
+/* io vectors */
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
+{
+ qiov->iov = qemu_malloc(alloc_hint * sizeof(struct iovec));
+ qiov->niov = 0;
+ qiov->nalloc = alloc_hint;
+ qiov->size = 0;
+}
+
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
+{
+ int i;
+
+ qiov->iov = iov;
+ qiov->niov = niov;
+ qiov->nalloc = -1;
+ qiov->size = 0;
+ for (i = 0; i < niov; i++)
+ qiov->size += iov[i].iov_len;
+}
+
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
+{
+ assert(qiov->nalloc != -1);
+
+ if (qiov->niov == qiov->nalloc) {
+ qiov->nalloc = 2 * qiov->nalloc + 1;
+ qiov->iov = qemu_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
+ }
+ qiov->iov[qiov->niov].iov_base = base;
+ qiov->iov[qiov->niov].iov_len = len;
+ qiov->size += len;
+ ++qiov->niov;
+}
+
+/*
+ * Copies iovecs from src to the end dst until src is completely copied or the
+ * total size of the copied iovec reaches size. The size of the last copied
+ * iovec is changed in order to fit the specified total size if it isn't a
+ * perfect fit already.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
+{
+ int i;
+ size_t done;
+
+ assert(dst->nalloc != -1);
+
+ done = 0;
+ for (i = 0; (i < src->niov) && (done != size); i++) {
+ if (done + src->iov[i].iov_len > size) {
+ qemu_iovec_add(dst, src->iov[i].iov_base, size - done);
+ break;
+ } else {
+ qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len);
+ }
+ done += src->iov[i].iov_len;
+ }
+}
+
+void qemu_iovec_destroy(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qemu_free(qiov->iov);
+}
+
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qiov->niov = 0;
+ qiov->size = 0;
+}
+
+void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
+{
+ uint8_t *p = (uint8_t *)buf;
+ int i;
+
+ for (i = 0; i < qiov->niov; ++i) {
+ memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
+ p += qiov->iov[i].iov_len;
+ }
+}
+
+void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count)
+{
+ const uint8_t *p = (const uint8_t *)buf;
+ size_t copy;
+ int i;
+
+ for (i = 0; i < qiov->niov && count; ++i) {
+ copy = count;
+ if (copy > qiov->iov[i].iov_len)
+ copy = qiov->iov[i].iov_len;
+ memcpy(qiov->iov[i].iov_base, p, copy);
+ p += copy;
+ count -= copy;
+ }
+}
+
+#ifndef _WIN32
+/* Sets a specific flag */
+int fcntl_setfl(int fd, int flag)
+{
+ int flags;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags == -1)
+ return -errno;
+
+ if (fcntl(fd, F_SETFL, flags | flag) == -1)
+ return -errno;
+
+ return 0;
+}
+#endif
+
+#endif /* !VBOX */
+
diff --git a/src/recompiler/def-helper.h b/src/recompiler/def-helper.h
new file mode 100644
index 00000000..ad5b9459
--- /dev/null
+++ b/src/recompiler/def-helper.h
@@ -0,0 +1,258 @@
+/* Helper file for declaring TCG helper functions.
+ Should be included at the start and end of target-foo/helper.h.
+
+ Targets should use DEF_HELPER_N and DEF_HELPER_FLAGS_N to declare helper
+ functions. Names should be specified without the helper_ prefix, and
+ the return and argument types specified. 3 basic types are understood
+ (i32, i64 and ptr). Additional aliases are provided for convenience and
+ to match the types used by the C helper implementation.
+
+ The target helper.h should be included in all files that use/define
+ helper functions. THis will ensure that function prototypes are
+ consistent. In addition it should be included an extra two times for
+ helper.c, defining:
+ GEN_HELPER 1 to produce op generation functions (gen_helper_*)
+ GEN_HELPER 2 to do runtime registration helper functions.
+ */
+
+#ifndef DEF_HELPER_H
+#define DEF_HELPER_H 1
+
+#define HELPER(name) glue(helper_, name)
+
+#define GET_TCGV_i32 GET_TCGV_I32
+#define GET_TCGV_i64 GET_TCGV_I64
+#define GET_TCGV_ptr GET_TCGV_PTR
+
+/* Some types that make sense in C, but not for TCG. */
+#define dh_alias_i32 i32
+#define dh_alias_s32 i32
+#define dh_alias_int i32
+#define dh_alias_i64 i64
+#define dh_alias_s64 i64
+#define dh_alias_f32 i32
+#define dh_alias_f64 i64
+#if TARGET_LONG_BITS == 32
+#define dh_alias_tl i32
+#else
+#define dh_alias_tl i64
+#endif
+#define dh_alias_ptr ptr
+#define dh_alias_void void
+#define dh_alias_env ptr
+#ifdef VBOX
+# if ARCH_BITS == 32
+# define dh_alias_RTCCUINTREG i32
+# define dh_alias_RTCCINTREG i32
+# else
+# define dh_alias_RTCCUINTREG i64
+# define dh_alias_RTCCINTREG i64
+# endif
+#endif
+#define dh_alias(t) glue(dh_alias_, t)
+
+#define dh_ctype_i32 uint32_t
+#define dh_ctype_s32 int32_t
+#define dh_ctype_int int
+#define dh_ctype_i64 uint64_t
+#define dh_ctype_s64 int64_t
+#define dh_ctype_f32 float32
+#define dh_ctype_f64 float64
+#define dh_ctype_tl target_ulong
+#define dh_ctype_ptr void *
+#define dh_ctype_void void
+#define dh_ctype_env CPUState *
+#ifdef VBOX
+# if ARCH_BITS == 32
+# define dh_ctype_RTCCUINTREG uint32_t
+# define dh_ctype_RTCCINTREG int32_t
+# else
+# define dh_ctype_RTCCUINTREG uint64_t
+# define dh_ctype_RTCCINTREG int64_t
+# endif
+#endif
+#define dh_ctype(t) dh_ctype_##t
+
+/* We can't use glue() here because it falls foul of C preprocessor
+ recursive expansion rules. */
+#define dh_retvar_decl0_void void
+#define dh_retvar_decl0_i32 TCGv_i32 retval
+#define dh_retvar_decl0_i64 TCGv_i64 retval
+#define dh_retvar_decl0_ptr TCGv_ptr retval
+#define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t))
+
+#define dh_retvar_decl_void
+#define dh_retvar_decl_i32 TCGv_i32 retval,
+#define dh_retvar_decl_i64 TCGv_i64 retval,
+#define dh_retvar_decl_ptr TCGv_ptr retval,
+#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t))
+
+#define dh_retvar_void TCG_CALL_DUMMY_ARG
+#define dh_retvar_i32 GET_TCGV_i32(retval)
+#define dh_retvar_i64 GET_TCGV_i64(retval)
+#define dh_retvar_ptr GET_TCGV_ptr(retval)
+#define dh_retvar(t) glue(dh_retvar_, dh_alias(t))
+
+#define dh_is_64bit_void 0
+#define dh_is_64bit_i32 0
+#define dh_is_64bit_i64 1
+#define dh_is_64bit_ptr (TCG_TARGET_REG_BITS == 64)
+#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t))
+
+#define dh_is_signed_void 0
+#define dh_is_signed_i32 0
+#define dh_is_signed_s32 1
+#define dh_is_signed_i64 0
+#define dh_is_signed_s64 1
+#define dh_is_signed_f32 0
+#define dh_is_signed_f64 0
+#define dh_is_signed_tl 0
+#define dh_is_signed_int 1
+/* ??? This is highly specific to the host cpu. There are even special
+ extension instructions that may be required, e.g. ia64's addp4. But
+ for now we don't support any 64-bit targets with 32-bit pointers. */
+#define dh_is_signed_ptr 0
+#define dh_is_signed_env dh_is_signed_ptr
+#define dh_is_signed(t) dh_is_signed_##t
+
+#define dh_sizemask(t, n) \
+ sizemask |= dh_is_64bit(t) << (n*2); \
+ sizemask |= dh_is_signed(t) << (n*2+1)
+
+#define dh_arg(t, n) \
+ args[n - 1] = glue(GET_TCGV_, dh_alias(t))(glue(arg, n)); \
+ dh_sizemask(t, n)
+
+#define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n)
+
+
+#define DEF_HELPER_0(name, ret) \
+ DEF_HELPER_FLAGS_0(name, 0, ret)
+#define DEF_HELPER_1(name, ret, t1) \
+ DEF_HELPER_FLAGS_1(name, 0, ret, t1)
+#define DEF_HELPER_2(name, ret, t1, t2) \
+ DEF_HELPER_FLAGS_2(name, 0, ret, t1, t2)
+#define DEF_HELPER_3(name, ret, t1, t2, t3) \
+ DEF_HELPER_FLAGS_3(name, 0, ret, t1, t2, t3)
+#define DEF_HELPER_4(name, ret, t1, t2, t3, t4) \
+ DEF_HELPER_FLAGS_4(name, 0, ret, t1, t2, t3, t4)
+
+#endif /* DEF_HELPER_H */
+
+#ifndef GEN_HELPER
+/* Function prototypes. */
+
+#define DEF_HELPER_FLAGS_0(name, flags, ret) \
+dh_ctype(ret) HELPER(name) (void);
+
+#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1));
+
+#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
+
+#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3));
+
+#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
+ dh_ctype(t4));
+
+#undef GEN_HELPER
+#define GEN_HELPER -1
+
+#elif GEN_HELPER == 1
+/* Gen functions. */
+
+#define DEF_HELPER_FLAGS_0(name, flags, ret) \
+static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret)) \
+{ \
+ int sizemask; \
+ sizemask = dh_is_64bit(ret); \
+ tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 0, NULL); \
+}
+
+#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1)) \
+{ \
+ TCGArg args[1]; \
+ int sizemask = 0; \
+ dh_sizemask(ret, 0); \
+ dh_arg(t1, 1); \
+ tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 1, args); \
+}
+
+#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1), \
+ dh_arg_decl(t2, 2)) \
+{ \
+ TCGArg args[2]; \
+ int sizemask = 0; \
+ dh_sizemask(ret, 0); \
+ dh_arg(t1, 1); \
+ dh_arg(t2, 2); \
+ tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 2, args); \
+}
+
+#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1), \
+ dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \
+{ \
+ TCGArg args[3]; \
+ int sizemask = 0; \
+ dh_sizemask(ret, 0); \
+ dh_arg(t1, 1); \
+ dh_arg(t2, 2); \
+ dh_arg(t3, 3); \
+ tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 3, args); \
+}
+
+#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1), \
+ dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \
+{ \
+ TCGArg args[4]; \
+ int sizemask = 0; \
+ dh_sizemask(ret, 0); \
+ dh_arg(t1, 1); \
+ dh_arg(t2, 2); \
+ dh_arg(t3, 3); \
+ dh_arg(t4, 4); \
+ tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 4, args); \
+}
+
+#undef GEN_HELPER
+#define GEN_HELPER -1
+
+#elif GEN_HELPER == 2
+/* Register helpers. */
+
+#define DEF_HELPER_FLAGS_0(name, flags, ret) \
+tcg_register_helper(HELPER(name), #name);
+
+#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
+DEF_HELPER_FLAGS_0(name, flags, ret)
+
+#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
+DEF_HELPER_FLAGS_0(name, flags, ret)
+
+#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
+DEF_HELPER_FLAGS_0(name, flags, ret)
+
+#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
+DEF_HELPER_FLAGS_0(name, flags, ret)
+
+#undef GEN_HELPER
+#define GEN_HELPER -1
+
+#elif GEN_HELPER == -1
+/* Undefine macros. */
+
+#undef DEF_HELPER_FLAGS_0
+#undef DEF_HELPER_FLAGS_1
+#undef DEF_HELPER_FLAGS_2
+#undef DEF_HELPER_FLAGS_3
+#undef DEF_HELPER_FLAGS_4
+#undef GEN_HELPER
+
+#endif
diff --git a/src/recompiler/disas.h b/src/recompiler/disas.h
new file mode 100644
index 00000000..a20df127
--- /dev/null
+++ b/src/recompiler/disas.h
@@ -0,0 +1,47 @@
+#ifndef _QEMU_DISAS_H
+#define _QEMU_DISAS_H
+
+#include "qemu-common.h"
+
+#ifdef NEED_CPU_H
+/* Disassemble this for me please... (debugging). */
+void disas(FILE *out, void *code, unsigned long size);
+void target_disas(FILE *out, target_ulong code, target_ulong size, int flags);
+
+#ifndef VBOX
+/* The usual mess... FIXME: Remove this condition once dyngen-exec.h is gone */
+#ifndef __DYNGEN_EXEC_H__
+void monitor_disas(Monitor *mon, CPUState *env,
+ target_ulong pc, int nb_insn, int is_physical, int flags);
+#endif
+#endif /*!VBOX*/
+
+/* Look up symbol for debugging purpose. Returns "" if unknown. */
+const char *lookup_symbol(target_ulong orig_addr);
+#endif
+
+struct syminfo;
+struct elf32_sym;
+struct elf64_sym;
+
+#if defined(CONFIG_USER_ONLY)
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
+#else
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_phys_addr_t orig_addr);
+#endif
+
+struct syminfo {
+ lookup_symbol_t lookup_symbol;
+ unsigned int disas_num_syms;
+ union {
+ struct elf32_sym *elf32;
+ struct elf64_sym *elf64;
+ } disas_symtab;
+ const char *disas_strtab;
+ struct syminfo *next;
+};
+
+/* Filled in by elfload.c. Simplistic, but will do for now. */
+extern struct syminfo *syminfos;
+
+#endif /* _QEMU_DISAS_H */
diff --git a/src/recompiler/dyngen-exec.h b/src/recompiler/dyngen-exec.h
new file mode 100644
index 00000000..5397a211
--- /dev/null
+++ b/src/recompiler/dyngen-exec.h
@@ -0,0 +1,131 @@
+/*
+ * dyngen defines for micro operation code
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#if !defined(__DYNGEN_EXEC_H__)
+#define __DYNGEN_EXEC_H__
+
+#ifndef VBOX
+/* prevent Solaris from trying to typedef FILE in gcc's
+ include/floatingpoint.h which will conflict with the
+ definition down below */
+#ifdef __sun__
+#define _FILEDEFED
+#endif
+#endif /* !VBOX */
+
+/* NOTE: standard headers should be used with special care at this
+ point because host CPU registers are used as global variables. Some
+ host headers do not allow that. */
+#include <stddef.h>
+#ifndef VBOX
+#include <stdint.h>
+
+#ifdef __OpenBSD__
+#include <sys/types.h>
+#endif
+
+/* XXX: This may be wrong for 64-bit ILP32 hosts. */
+typedef void * host_reg_t;
+
+#ifdef CONFIG_BSD
+typedef struct __sFILE FILE;
+#else
+typedef struct FILE FILE;
+#endif
+extern int fprintf(FILE *, const char *, ...);
+extern int fputs(const char *, FILE *);
+extern int printf(const char *, ...);
+
+#else /* VBOX */
+
+/* XXX: This may be wrong for 64-bit ILP32 hosts. */
+typedef void * host_reg_t;
+
+#include <iprt/stdint.h>
+#include <stdio.h>
+
+#endif /* VBOX */
+
+#if defined(__i386__)
+# ifndef VBOX
+#define AREG0 "ebp"
+# else /* VBOX - why are we different? frame-pointer optimizations on mac? */
+# define AREG0 "esi"
+# endif /* VBOX */
+#elif defined(__x86_64__)
+#define AREG0 "r14"
+#elif defined(_ARCH_PPC)
+#define AREG0 "r27"
+#elif defined(__arm__)
+#define AREG0 "r7"
+#elif defined(__hppa__)
+#define AREG0 "r17"
+#elif defined(__mips__)
+#define AREG0 "s0"
+#elif defined(__sparc__)
+#ifdef CONFIG_SOLARIS
+#define AREG0 "g2"
+#else
+#ifdef __sparc_v9__
+#define AREG0 "g5"
+#else
+#define AREG0 "g6"
+#endif
+#endif
+#elif defined(__s390__)
+#define AREG0 "r10"
+#elif defined(__alpha__)
+/* Note $15 is the frame pointer, so anything in op-i386.c that would
+ require a frame pointer, like alloca, would probably loose. */
+#define AREG0 "$15"
+#elif defined(__mc68000)
+#define AREG0 "%a5"
+#elif defined(__ia64__)
+#define AREG0 "r7"
+#else
+#error unsupported CPU
+#endif
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+
+/* The return address may point to the start of the next instruction.
+ Subtracting one gets us the call instruction itself. */
+#if defined(__s390__) && !defined(__s390x__)
+# define GETPC() ((void*)(((uintptr_t)__builtin_return_address(0) & 0x7fffffffUL) - 1))
+#elif defined(__arm__)
+/* Thumb return addresses have the low bit set, so we need to subtract two.
+ This is still safe in ARM mode because instructions are 4 bytes. */
+# define GETPC() ((void *)((uintptr_t)__builtin_return_address(0) - 2))
+#else
+# define GETPC() ((void *)((uintptr_t)__builtin_return_address(0) - 1))
+#endif
+
+#endif /* !defined(__DYNGEN_EXEC_H__) */
diff --git a/src/recompiler/elf.h b/src/recompiler/elf.h
new file mode 100644
index 00000000..eb9e3bec
--- /dev/null
+++ b/src/recompiler/elf.h
@@ -0,0 +1,1196 @@
+#ifndef _QEMU_ELF_H
+#define _QEMU_ELF_H
+
+#include <inttypes.h>
+
+/* 32-bit ELF base types. */
+typedef uint32_t Elf32_Addr;
+typedef uint16_t Elf32_Half;
+typedef uint32_t Elf32_Off;
+typedef int32_t Elf32_Sword;
+typedef uint32_t Elf32_Word;
+
+/* 64-bit ELF base types. */
+typedef uint64_t Elf64_Addr;
+typedef uint16_t Elf64_Half;
+typedef int16_t Elf64_SHalf;
+typedef uint64_t Elf64_Off;
+typedef int32_t Elf64_Sword;
+typedef uint32_t Elf64_Word;
+typedef uint64_t Elf64_Xword;
+typedef int64_t Elf64_Sxword;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL 0
+#define PT_LOAD 1
+#define PT_DYNAMIC 2
+#define PT_INTERP 3
+#define PT_NOTE 4
+#define PT_SHLIB 5
+#define PT_PHDR 6
+#define PT_LOPROC 0x70000000
+#define PT_HIPROC 0x7fffffff
+#define PT_MIPS_REGINFO 0x70000000
+#define PT_MIPS_OPTIONS 0x70000001
+
+/* Flags in the e_flags field of the header */
+/* MIPS architecture level. */
+#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code. */
+#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code. */
+#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code. */
+#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code. */
+#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code. */
+#define EF_MIPS_ARCH_32 0x50000000 /* MIPS32 code. */
+#define EF_MIPS_ARCH_64 0x60000000 /* MIPS64 code. */
+
+/* The ABI of a file. */
+#define EF_MIPS_ABI_O32 0x00001000 /* O32 ABI. */
+#define EF_MIPS_ABI_O64 0x00002000 /* O32 extended for 64 bit. */
+
+#define EF_MIPS_NOREORDER 0x00000001
+#define EF_MIPS_PIC 0x00000002
+#define EF_MIPS_CPIC 0x00000004
+#define EF_MIPS_ABI2 0x00000020
+#define EF_MIPS_OPTIONS_FIRST 0x00000080
+#define EF_MIPS_32BITMODE 0x00000100
+#define EF_MIPS_ABI 0x0000f000
+#define EF_MIPS_ARCH 0xf0000000
+
+/* These constants define the different elf file types */
+#define ET_NONE 0
+#define ET_REL 1
+#define ET_EXEC 2
+#define ET_DYN 3
+#define ET_CORE 4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* These constants define the various ELF target machines */
+#define EM_NONE 0
+#define EM_M32 1
+#define EM_SPARC 2
+#define EM_386 3
+#define EM_68K 4
+#define EM_88K 5
+#define EM_486 6 /* Perhaps disused */
+#define EM_860 7
+
+#define EM_MIPS 8 /* MIPS R3000 (officially, big-endian only) */
+
+#define EM_MIPS_RS4_BE 10 /* MIPS R4000 big-endian */
+
+#define EM_PARISC 15 /* HPPA */
+
+#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */
+
+#define EM_PPC 20 /* PowerPC */
+#define EM_PPC64 21 /* PowerPC64 */
+
+#define EM_ARM 40 /* ARM */
+
+#define EM_SH 42 /* SuperH */
+
+#define EM_SPARCV9 43 /* SPARC v9 64-bit */
+
+#define EM_IA_64 50 /* HP/Intel IA-64 */
+
+#define EM_X86_64 62 /* AMD x86-64 */
+
+#define EM_S390 22 /* IBM S/390 */
+
+#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */
+
+#define EM_V850 87 /* NEC v850 */
+
+#define EM_H8_300H 47 /* Hitachi H8/300H */
+#define EM_H8S 48 /* Hitachi H8S */
+
+/*
+ * This is an interim value that we will use until the committee comes
+ * up with a final number.
+ */
+#define EM_ALPHA 0x9026
+
+/* Bogus old v850 magic number, used by old tools. */
+#define EM_CYGNUS_V850 0x9080
+
+/*
+ * This is the old interim value for S/390 architecture
+ */
+#define EM_S390_OLD 0xA390
+
+#define EM_MICROBLAZE 189
+#define EM_MICROBLAZE_OLD 0xBAAB
+
+/* This is the info that is needed to parse the dynamic section of the file */
+#define DT_NULL 0
+#define DT_NEEDED 1
+#define DT_PLTRELSZ 2
+#define DT_PLTGOT 3
+#define DT_HASH 4
+#define DT_STRTAB 5
+#define DT_SYMTAB 6
+#define DT_RELA 7
+#define DT_RELASZ 8
+#define DT_RELAENT 9
+#define DT_STRSZ 10
+#define DT_SYMENT 11
+#define DT_INIT 12
+#define DT_FINI 13
+#define DT_SONAME 14
+#define DT_RPATH 15
+#define DT_SYMBOLIC 16
+#define DT_REL 17
+#define DT_RELSZ 18
+#define DT_RELENT 19
+#define DT_PLTREL 20
+#define DT_DEBUG 21
+#define DT_TEXTREL 22
+#define DT_JMPREL 23
+#define DT_LOPROC 0x70000000
+#define DT_HIPROC 0x7fffffff
+#define DT_MIPS_RLD_VERSION 0x70000001
+#define DT_MIPS_TIME_STAMP 0x70000002
+#define DT_MIPS_ICHECKSUM 0x70000003
+#define DT_MIPS_IVERSION 0x70000004
+#define DT_MIPS_FLAGS 0x70000005
+ #define RHF_NONE 0
+ #define RHF_HARDWAY 1
+ #define RHF_NOTPOT 2
+#define DT_MIPS_BASE_ADDRESS 0x70000006
+#define DT_MIPS_CONFLICT 0x70000008
+#define DT_MIPS_LIBLIST 0x70000009
+#define DT_MIPS_LOCAL_GOTNO 0x7000000a
+#define DT_MIPS_CONFLICTNO 0x7000000b
+#define DT_MIPS_LIBLISTNO 0x70000010
+#define DT_MIPS_SYMTABNO 0x70000011
+#define DT_MIPS_UNREFEXTNO 0x70000012
+#define DT_MIPS_GOTSYM 0x70000013
+#define DT_MIPS_HIPAGENO 0x70000014
+#define DT_MIPS_RLD_MAP 0x70000016
+
+/* This info is needed when parsing the symbol table */
+#define STB_LOCAL 0
+#define STB_GLOBAL 1
+#define STB_WEAK 2
+
+#define STT_NOTYPE 0
+#define STT_OBJECT 1
+#define STT_FUNC 2
+#define STT_SECTION 3
+#define STT_FILE 4
+
+#define ELF_ST_BIND(x) ((x) >> 4)
+#define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf)
+#define ELF32_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)
+
+/* Symbolic values for the entries in the auxiliary table
+ put on the initial stack */
+#define AT_NULL 0 /* end of vector */
+#define AT_IGNORE 1 /* entry should be ignored */
+#define AT_EXECFD 2 /* file descriptor of program */
+#define AT_PHDR 3 /* program headers for program */
+#define AT_PHENT 4 /* size of program header entry */
+#define AT_PHNUM 5 /* number of program headers */
+#define AT_PAGESZ 6 /* system page size */
+#define AT_BASE 7 /* base address of interpreter */
+#define AT_FLAGS 8 /* flags */
+#define AT_ENTRY 9 /* entry point of program */
+#define AT_NOTELF 10 /* program is not ELF */
+#define AT_UID 11 /* real uid */
+#define AT_EUID 12 /* effective uid */
+#define AT_GID 13 /* real gid */
+#define AT_EGID 14 /* effective gid */
+#define AT_PLATFORM 15 /* string identifying CPU for optimizations */
+#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
+#define AT_CLKTCK 17 /* frequency at which times() increments */
+
+typedef struct dynamic{
+ Elf32_Sword d_tag;
+ union{
+ Elf32_Sword d_val;
+ Elf32_Addr d_ptr;
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+ Elf64_Sxword d_tag; /* entry tag value */
+ union {
+ Elf64_Xword d_val;
+ Elf64_Addr d_ptr;
+ } d_un;
+} Elf64_Dyn;
+
+/* The following are used with relocations */
+#define ELF32_R_SYM(x) ((x) >> 8)
+#define ELF32_R_TYPE(x) ((x) & 0xff)
+
+#define ELF64_R_SYM(i) ((i) >> 32)
+#define ELF64_R_TYPE(i) ((i) & 0xffffffff)
+#define ELF64_R_TYPE_DATA(i) (((ELF64_R_TYPE(i) >> 8) ^ 0x00800000) - 0x00800000)
+
+#define R_386_NONE 0
+#define R_386_32 1
+#define R_386_PC32 2
+#define R_386_GOT32 3
+#define R_386_PLT32 4
+#define R_386_COPY 5
+#define R_386_GLOB_DAT 6
+#define R_386_JMP_SLOT 7
+#define R_386_RELATIVE 8
+#define R_386_GOTOFF 9
+#define R_386_GOTPC 10
+#define R_386_NUM 11
+/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */
+#define R_386_PC8 23
+
+#define R_MIPS_NONE 0
+#define R_MIPS_16 1
+#define R_MIPS_32 2
+#define R_MIPS_REL32 3
+#define R_MIPS_26 4
+#define R_MIPS_HI16 5
+#define R_MIPS_LO16 6
+#define R_MIPS_GPREL16 7
+#define R_MIPS_LITERAL 8
+#define R_MIPS_GOT16 9
+#define R_MIPS_PC16 10
+#define R_MIPS_CALL16 11
+#define R_MIPS_GPREL32 12
+/* The remaining relocs are defined on Irix, although they are not
+ in the MIPS ELF ABI. */
+#define R_MIPS_UNUSED1 13
+#define R_MIPS_UNUSED2 14
+#define R_MIPS_UNUSED3 15
+#define R_MIPS_SHIFT5 16
+#define R_MIPS_SHIFT6 17
+#define R_MIPS_64 18
+#define R_MIPS_GOT_DISP 19
+#define R_MIPS_GOT_PAGE 20
+#define R_MIPS_GOT_OFST 21
+/*
+ * The following two relocation types are specified in the MIPS ABI
+ * conformance guide version 1.2 but not yet in the psABI.
+ */
+#define R_MIPS_GOTHI16 22
+#define R_MIPS_GOTLO16 23
+#define R_MIPS_SUB 24
+#define R_MIPS_INSERT_A 25
+#define R_MIPS_INSERT_B 26
+#define R_MIPS_DELETE 27
+#define R_MIPS_HIGHER 28
+#define R_MIPS_HIGHEST 29
+/*
+ * The following two relocation types are specified in the MIPS ABI
+ * conformance guide version 1.2 but not yet in the psABI.
+ */
+#define R_MIPS_CALLHI16 30
+#define R_MIPS_CALLLO16 31
+/*
+ * This range is reserved for vendor specific relocations.
+ */
+#define R_MIPS_LOVENDOR 100
+#define R_MIPS_HIVENDOR 127
+
+
+/*
+ * Sparc ELF relocation types
+ */
+#define R_SPARC_NONE 0
+#define R_SPARC_8 1
+#define R_SPARC_16 2
+#define R_SPARC_32 3
+#define R_SPARC_DISP8 4
+#define R_SPARC_DISP16 5
+#define R_SPARC_DISP32 6
+#define R_SPARC_WDISP30 7
+#define R_SPARC_WDISP22 8
+#define R_SPARC_HI22 9
+#define R_SPARC_22 10
+#define R_SPARC_13 11
+#define R_SPARC_LO10 12
+#define R_SPARC_GOT10 13
+#define R_SPARC_GOT13 14
+#define R_SPARC_GOT22 15
+#define R_SPARC_PC10 16
+#define R_SPARC_PC22 17
+#define R_SPARC_WPLT30 18
+#define R_SPARC_COPY 19
+#define R_SPARC_GLOB_DAT 20
+#define R_SPARC_JMP_SLOT 21
+#define R_SPARC_RELATIVE 22
+#define R_SPARC_UA32 23
+#define R_SPARC_PLT32 24
+#define R_SPARC_HIPLT22 25
+#define R_SPARC_LOPLT10 26
+#define R_SPARC_PCPLT32 27
+#define R_SPARC_PCPLT22 28
+#define R_SPARC_PCPLT10 29
+#define R_SPARC_10 30
+#define R_SPARC_11 31
+#define R_SPARC_64 32
+#define R_SPARC_OLO10 33
+#define R_SPARC_HH22 34
+#define R_SPARC_HM10 35
+#define R_SPARC_LM22 36
+#define R_SPARC_WDISP16 40
+#define R_SPARC_WDISP19 41
+#define R_SPARC_7 43
+#define R_SPARC_5 44
+#define R_SPARC_6 45
+
+/* Bits present in AT_HWCAP, primarily for Sparc32. */
+
+#define HWCAP_SPARC_FLUSH 1 /* CPU supports flush instruction. */
+#define HWCAP_SPARC_STBAR 2
+#define HWCAP_SPARC_SWAP 4
+#define HWCAP_SPARC_MULDIV 8
+#define HWCAP_SPARC_V9 16
+#define HWCAP_SPARC_ULTRA3 32
+
+/*
+ * 68k ELF relocation types
+ */
+#define R_68K_NONE 0
+#define R_68K_32 1
+#define R_68K_16 2
+#define R_68K_8 3
+#define R_68K_PC32 4
+#define R_68K_PC16 5
+#define R_68K_PC8 6
+#define R_68K_GOT32 7
+#define R_68K_GOT16 8
+#define R_68K_GOT8 9
+#define R_68K_GOT32O 10
+#define R_68K_GOT16O 11
+#define R_68K_GOT8O 12
+#define R_68K_PLT32 13
+#define R_68K_PLT16 14
+#define R_68K_PLT8 15
+#define R_68K_PLT32O 16
+#define R_68K_PLT16O 17
+#define R_68K_PLT8O 18
+#define R_68K_COPY 19
+#define R_68K_GLOB_DAT 20
+#define R_68K_JMP_SLOT 21
+#define R_68K_RELATIVE 22
+
+/*
+ * Alpha ELF relocation types
+ */
+#define R_ALPHA_NONE 0 /* No reloc */
+#define R_ALPHA_REFLONG 1 /* Direct 32 bit */
+#define R_ALPHA_REFQUAD 2 /* Direct 64 bit */
+#define R_ALPHA_GPREL32 3 /* GP relative 32 bit */
+#define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */
+#define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */
+#define R_ALPHA_GPDISP 6 /* Add displacement to GP */
+#define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */
+#define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */
+#define R_ALPHA_SREL16 9 /* PC relative 16 bit */
+#define R_ALPHA_SREL32 10 /* PC relative 32 bit */
+#define R_ALPHA_SREL64 11 /* PC relative 64 bit */
+#define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */
+#define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */
+#define R_ALPHA_GPREL16 19 /* GP relative 16 bit */
+#define R_ALPHA_COPY 24 /* Copy symbol at runtime */
+#define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */
+#define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */
+#define R_ALPHA_RELATIVE 27 /* Adjust by program base */
+#define R_ALPHA_BRSGP 28
+#define R_ALPHA_TLSGD 29
+#define R_ALPHA_TLS_LDM 30
+#define R_ALPHA_DTPMOD64 31
+#define R_ALPHA_GOTDTPREL 32
+#define R_ALPHA_DTPREL64 33
+#define R_ALPHA_DTPRELHI 34
+#define R_ALPHA_DTPRELLO 35
+#define R_ALPHA_DTPREL16 36
+#define R_ALPHA_GOTTPREL 37
+#define R_ALPHA_TPREL64 38
+#define R_ALPHA_TPRELHI 39
+#define R_ALPHA_TPRELLO 40
+#define R_ALPHA_TPREL16 41
+
+#define SHF_ALPHA_GPREL 0x10000000
+
+
+/* PowerPC relocations defined by the ABIs */
+#define R_PPC_NONE 0
+#define R_PPC_ADDR32 1 /* 32bit absolute address */
+#define R_PPC_ADDR24 2 /* 26bit address, 2 bits ignored. */
+#define R_PPC_ADDR16 3 /* 16bit absolute address */
+#define R_PPC_ADDR16_LO 4 /* lower 16bit of absolute address */
+#define R_PPC_ADDR16_HI 5 /* high 16bit of absolute address */
+#define R_PPC_ADDR16_HA 6 /* adjusted high 16bit */
+#define R_PPC_ADDR14 7 /* 16bit address, 2 bits ignored */
+#define R_PPC_ADDR14_BRTAKEN 8
+#define R_PPC_ADDR14_BRNTAKEN 9
+#define R_PPC_REL24 10 /* PC relative 26 bit */
+#define R_PPC_REL14 11 /* PC relative 16 bit */
+#define R_PPC_REL14_BRTAKEN 12
+#define R_PPC_REL14_BRNTAKEN 13
+#define R_PPC_GOT16 14
+#define R_PPC_GOT16_LO 15
+#define R_PPC_GOT16_HI 16
+#define R_PPC_GOT16_HA 17
+#define R_PPC_PLTREL24 18
+#define R_PPC_COPY 19
+#define R_PPC_GLOB_DAT 20
+#define R_PPC_JMP_SLOT 21
+#define R_PPC_RELATIVE 22
+#define R_PPC_LOCAL24PC 23
+#define R_PPC_UADDR32 24
+#define R_PPC_UADDR16 25
+#define R_PPC_REL32 26
+#define R_PPC_PLT32 27
+#define R_PPC_PLTREL32 28
+#define R_PPC_PLT16_LO 29
+#define R_PPC_PLT16_HI 30
+#define R_PPC_PLT16_HA 31
+#define R_PPC_SDAREL16 32
+#define R_PPC_SECTOFF 33
+#define R_PPC_SECTOFF_LO 34
+#define R_PPC_SECTOFF_HI 35
+#define R_PPC_SECTOFF_HA 36
+/* Keep this the last entry. */
+#ifndef R_PPC_NUM
+#define R_PPC_NUM 37
+#endif
+
+/* ARM specific declarations */
+
+/* Processor specific flags for the ELF header e_flags field. */
+#define EF_ARM_RELEXEC 0x01
+#define EF_ARM_HASENTRY 0x02
+#define EF_ARM_INTERWORK 0x04
+#define EF_ARM_APCS_26 0x08
+#define EF_ARM_APCS_FLOAT 0x10
+#define EF_ARM_PIC 0x20
+#define EF_ALIGN8 0x40 /* 8-bit structure alignment is in use */
+#define EF_NEW_ABI 0x80
+#define EF_OLD_ABI 0x100
+
+/* Additional symbol types for Thumb */
+#define STT_ARM_TFUNC 0xd
+
+/* ARM-specific values for sh_flags */
+#define SHF_ARM_ENTRYSECT 0x10000000 /* Section contains an entry point */
+#define SHF_ARM_COMDEF 0x80000000 /* Section may be multiply defined
+ in the input to a link step */
+
+/* ARM-specific program header flags */
+#define PF_ARM_SB 0x10000000 /* Segment contains the location
+ addressed by the static base */
+
+/* ARM relocs. */
+#define R_ARM_NONE 0 /* No reloc */
+#define R_ARM_PC24 1 /* PC relative 26 bit branch */
+#define R_ARM_ABS32 2 /* Direct 32 bit */
+#define R_ARM_REL32 3 /* PC relative 32 bit */
+#define R_ARM_PC13 4
+#define R_ARM_ABS16 5 /* Direct 16 bit */
+#define R_ARM_ABS12 6 /* Direct 12 bit */
+#define R_ARM_THM_ABS5 7
+#define R_ARM_ABS8 8 /* Direct 8 bit */
+#define R_ARM_SBREL32 9
+#define R_ARM_THM_PC22 10
+#define R_ARM_THM_PC8 11
+#define R_ARM_AMP_VCALL9 12
+#define R_ARM_SWI24 13
+#define R_ARM_THM_SWI8 14
+#define R_ARM_XPC25 15
+#define R_ARM_THM_XPC22 16
+#define R_ARM_COPY 20 /* Copy symbol at runtime */
+#define R_ARM_GLOB_DAT 21 /* Create GOT entry */
+#define R_ARM_JUMP_SLOT 22 /* Create PLT entry */
+#define R_ARM_RELATIVE 23 /* Adjust by program base */
+#define R_ARM_GOTOFF 24 /* 32 bit offset to GOT */
+#define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */
+#define R_ARM_GOT32 26 /* 32 bit GOT entry */
+#define R_ARM_PLT32 27 /* 32 bit PLT address */
+#define R_ARM_CALL 28
+#define R_ARM_JUMP24 29
+#define R_ARM_GNU_VTENTRY 100
+#define R_ARM_GNU_VTINHERIT 101
+#define R_ARM_THM_PC11 102 /* thumb unconditional branch */
+#define R_ARM_THM_PC9 103 /* thumb conditional branch */
+#define R_ARM_RXPC25 249
+#define R_ARM_RSBREL32 250
+#define R_ARM_THM_RPC22 251
+#define R_ARM_RREL32 252
+#define R_ARM_RABS22 253
+#define R_ARM_RPC24 254
+#define R_ARM_RBASE 255
+/* Keep this the last entry. */
+#define R_ARM_NUM 256
+
+/* s390 relocations defined by the ABIs */
+#define R_390_NONE 0 /* No reloc. */
+#define R_390_8 1 /* Direct 8 bit. */
+#define R_390_12 2 /* Direct 12 bit. */
+#define R_390_16 3 /* Direct 16 bit. */
+#define R_390_32 4 /* Direct 32 bit. */
+#define R_390_PC32 5 /* PC relative 32 bit. */
+#define R_390_GOT12 6 /* 12 bit GOT offset. */
+#define R_390_GOT32 7 /* 32 bit GOT offset. */
+#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */
+#define R_390_COPY 9 /* Copy symbol at runtime. */
+#define R_390_GLOB_DAT 10 /* Create GOT entry. */
+#define R_390_JMP_SLOT 11 /* Create PLT entry. */
+#define R_390_RELATIVE 12 /* Adjust by program base. */
+#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */
+#define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */
+#define R_390_GOT16 15 /* 16 bit GOT offset. */
+#define R_390_PC16 16 /* PC relative 16 bit. */
+#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */
+#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */
+#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */
+#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */
+#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */
+#define R_390_64 22 /* Direct 64 bit. */
+#define R_390_PC64 23 /* PC relative 64 bit. */
+#define R_390_GOT64 24 /* 64 bit GOT offset. */
+#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */
+#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */
+#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */
+#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */
+#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */
+#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */
+#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */
+#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */
+#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL 38 /* Tag for function call in general
+ dynamic TLS code. */
+#define R_390_TLS_LDCALL 39 /* Tag for function call in local
+ dynamic TLS code. */
+#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic
+ thread local data. */
+#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic
+ thread local data. */
+#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic
+ thread local data in LD code. */
+#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic
+ thread local data in LD code. */
+#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to
+ static TLS block. */
+#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to
+ static TLS block. */
+#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS
+ block. */
+#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS
+ block. */
+#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */
+#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */
+#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS
+ block. */
+/* Keep this the last entry. */
+#define R_390_NUM 57
+
+/* x86-64 relocation types */
+#define R_X86_64_NONE 0 /* No reloc */
+#define R_X86_64_64 1 /* Direct 64 bit */
+#define R_X86_64_PC32 2 /* PC relative 32 bit signed */
+#define R_X86_64_GOT32 3 /* 32 bit GOT entry */
+#define R_X86_64_PLT32 4 /* 32 bit PLT address */
+#define R_X86_64_COPY 5 /* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */
+#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */
+#define R_X86_64_RELATIVE 8 /* Adjust by program base */
+#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative
+ offset to GOT */
+#define R_X86_64_32 10 /* Direct 32 bit zero extended */
+#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
+#define R_X86_64_16 12 /* Direct 16 bit zero extended */
+#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */
+#define R_X86_64_8 14 /* Direct 8 bit sign extended */
+#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */
+
+#define R_X86_64_NUM 16
+
+/* Legal values for e_flags field of Elf64_Ehdr. */
+
+#define EF_ALPHA_32BIT 1 /* All addresses are below 2GB */
+
+/* HPPA specific definitions. */
+
+/* Legal values for e_flags field of Elf32_Ehdr. */
+
+#define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */
+#define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */
+#define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */
+#define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */
+#define EF_PARISC_NO_KABP 0x00100000 /* No kernel assisted branch
+ prediction. */
+#define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */
+#define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */
+
+/* Defined values for `e_flags & EF_PARISC_ARCH' are: */
+
+#define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */
+#define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */
+#define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */
+
+/* Additional section indeces. */
+
+#define SHN_PARISC_ANSI_COMMON 0xff00 /* Section for tenatively declared
+ symbols in ANSI C. */
+#define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */
+
+/* Legal values for sh_type field of Elf32_Shdr. */
+
+#define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */
+#define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */
+#define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */
+
+/* Legal values for sh_flags field of Elf32_Shdr. */
+
+#define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */
+#define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */
+#define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */
+
+/* Legal values for ST_TYPE subfield of st_info (symbol type). */
+
+#define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */
+
+#define STT_HP_OPAQUE (STT_LOOS + 0x1)
+#define STT_HP_STUB (STT_LOOS + 0x2)
+
+/* HPPA relocs. */
+
+#define R_PARISC_NONE 0 /* No reloc. */
+#define R_PARISC_DIR32 1 /* Direct 32-bit reference. */
+#define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */
+#define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */
+#define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */
+#define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */
+#define R_PARISC_PCREL32 9 /* 32-bit rel. address. */
+#define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */
+#define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */
+#define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */
+#define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */
+#define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */
+#define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */
+#define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */
+#define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */
+#define R_PARISC_LTOFF21L 34 /* LT-relative, left 21 bits. */
+#define R_PARISC_LTOFF14R 38 /* LT-relative, right 14 bits. */
+#define R_PARISC_SECREL32 41 /* 32 bits section rel. address. */
+#define R_PARISC_SEGBASE 48 /* No relocation, set segment base. */
+#define R_PARISC_SEGREL32 49 /* 32 bits segment rel. address. */
+#define R_PARISC_PLTOFF21L 50 /* PLT rel. address, left 21 bits. */
+#define R_PARISC_PLTOFF14R 54 /* PLT rel. address, right 14 bits. */
+#define R_PARISC_LTOFF_FPTR32 57 /* 32 bits LT-rel. function pointer. */
+#define R_PARISC_LTOFF_FPTR21L 58 /* LT-rel. fct ptr, left 21 bits. */
+#define R_PARISC_LTOFF_FPTR14R 62 /* LT-rel. fct ptr, right 14 bits. */
+#define R_PARISC_FPTR64 64 /* 64 bits function address. */
+#define R_PARISC_PLABEL32 65 /* 32 bits function address. */
+#define R_PARISC_PCREL64 72 /* 64 bits PC-rel. address. */
+#define R_PARISC_PCREL22F 74 /* 22 bits PC-rel. address. */
+#define R_PARISC_PCREL14WR 75 /* PC-rel. address, right 14 bits. */
+#define R_PARISC_PCREL14DR 76 /* PC rel. address, right 14 bits. */
+#define R_PARISC_PCREL16F 77 /* 16 bits PC-rel. address. */
+#define R_PARISC_PCREL16WF 78 /* 16 bits PC-rel. address. */
+#define R_PARISC_PCREL16DF 79 /* 16 bits PC-rel. address. */
+#define R_PARISC_DIR64 80 /* 64 bits of eff. address. */
+#define R_PARISC_DIR14WR 83 /* 14 bits of eff. address. */
+#define R_PARISC_DIR14DR 84 /* 14 bits of eff. address. */
+#define R_PARISC_DIR16F 85 /* 16 bits of eff. address. */
+#define R_PARISC_DIR16WF 86 /* 16 bits of eff. address. */
+#define R_PARISC_DIR16DF 87 /* 16 bits of eff. address. */
+#define R_PARISC_GPREL64 88 /* 64 bits of GP-rel. address. */
+#define R_PARISC_GPREL14WR 91 /* GP-rel. address, right 14 bits. */
+#define R_PARISC_GPREL14DR 92 /* GP-rel. address, right 14 bits. */
+#define R_PARISC_GPREL16F 93 /* 16 bits GP-rel. address. */
+#define R_PARISC_GPREL16WF 94 /* 16 bits GP-rel. address. */
+#define R_PARISC_GPREL16DF 95 /* 16 bits GP-rel. address. */
+#define R_PARISC_LTOFF64 96 /* 64 bits LT-rel. address. */
+#define R_PARISC_LTOFF14WR 99 /* LT-rel. address, right 14 bits. */
+#define R_PARISC_LTOFF14DR 100 /* LT-rel. address, right 14 bits. */
+#define R_PARISC_LTOFF16F 101 /* 16 bits LT-rel. address. */
+#define R_PARISC_LTOFF16WF 102 /* 16 bits LT-rel. address. */
+#define R_PARISC_LTOFF16DF 103 /* 16 bits LT-rel. address. */
+#define R_PARISC_SECREL64 104 /* 64 bits section rel. address. */
+#define R_PARISC_SEGREL64 112 /* 64 bits segment rel. address. */
+#define R_PARISC_PLTOFF14WR 115 /* PLT-rel. address, right 14 bits. */
+#define R_PARISC_PLTOFF14DR 116 /* PLT-rel. address, right 14 bits. */
+#define R_PARISC_PLTOFF16F 117 /* 16 bits LT-rel. address. */
+#define R_PARISC_PLTOFF16WF 118 /* 16 bits PLT-rel. address. */
+#define R_PARISC_PLTOFF16DF 119 /* 16 bits PLT-rel. address. */
+#define R_PARISC_LTOFF_FPTR64 120 /* 64 bits LT-rel. function ptr. */
+#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR16F 125 /* 16 bits LT-rel. function ptr. */
+#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr. */
+#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr. */
+#define R_PARISC_LORESERVE 128
+#define R_PARISC_COPY 128 /* Copy relocation. */
+#define R_PARISC_IPLT 129 /* Dynamic reloc, imported PLT */
+#define R_PARISC_EPLT 130 /* Dynamic reloc, exported PLT */
+#define R_PARISC_TPREL32 153 /* 32 bits TP-rel. address. */
+#define R_PARISC_TPREL21L 154 /* TP-rel. address, left 21 bits. */
+#define R_PARISC_TPREL14R 158 /* TP-rel. address, right 14 bits. */
+#define R_PARISC_LTOFF_TP21L 162 /* LT-TP-rel. address, left 21 bits. */
+#define R_PARISC_LTOFF_TP14R 166 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14F 167 /* 14 bits LT-TP-rel. address. */
+#define R_PARISC_TPREL64 216 /* 64 bits TP-rel. address. */
+#define R_PARISC_TPREL14WR 219 /* TP-rel. address, right 14 bits. */
+#define R_PARISC_TPREL14DR 220 /* TP-rel. address, right 14 bits. */
+#define R_PARISC_TPREL16F 221 /* 16 bits TP-rel. address. */
+#define R_PARISC_TPREL16WF 222 /* 16 bits TP-rel. address. */
+#define R_PARISC_TPREL16DF 223 /* 16 bits TP-rel. address. */
+#define R_PARISC_LTOFF_TP64 224 /* 64 bits LT-TP-rel. address. */
+#define R_PARISC_LTOFF_TP14WR 227 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14DR 228 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP16F 229 /* 16 bits LT-TP-rel. address. */
+#define R_PARISC_LTOFF_TP16WF 230 /* 16 bits LT-TP-rel. address. */
+#define R_PARISC_LTOFF_TP16DF 231 /* 16 bits LT-TP-rel. address. */
+#define R_PARISC_HIRESERVE 255
+
+/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */
+
+#define PT_HP_TLS (PT_LOOS + 0x0)
+#define PT_HP_CORE_NONE (PT_LOOS + 0x1)
+#define PT_HP_CORE_VERSION (PT_LOOS + 0x2)
+#define PT_HP_CORE_KERNEL (PT_LOOS + 0x3)
+#define PT_HP_CORE_COMM (PT_LOOS + 0x4)
+#define PT_HP_CORE_PROC (PT_LOOS + 0x5)
+#define PT_HP_CORE_LOADABLE (PT_LOOS + 0x6)
+#define PT_HP_CORE_STACK (PT_LOOS + 0x7)
+#define PT_HP_CORE_SHM (PT_LOOS + 0x8)
+#define PT_HP_CORE_MMF (PT_LOOS + 0x9)
+#define PT_HP_PARALLEL (PT_LOOS + 0x10)
+#define PT_HP_FASTBIND (PT_LOOS + 0x11)
+#define PT_HP_OPT_ANNOT (PT_LOOS + 0x12)
+#define PT_HP_HSL_ANNOT (PT_LOOS + 0x13)
+#define PT_HP_STACK (PT_LOOS + 0x14)
+
+#define PT_PARISC_ARCHEXT 0x70000000
+#define PT_PARISC_UNWIND 0x70000001
+
+/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr. */
+
+#define PF_PARISC_SBP 0x08000000
+
+#define PF_HP_PAGE_SIZE 0x00100000
+#define PF_HP_FAR_SHARED 0x00200000
+#define PF_HP_NEAR_SHARED 0x00400000
+#define PF_HP_CODE 0x01000000
+#define PF_HP_MODIFY 0x02000000
+#define PF_HP_LAZYSWAP 0x04000000
+#define PF_HP_SBP 0x08000000
+
+/* IA-64 specific declarations. */
+
+/* Processor specific flags for the Ehdr e_flags field. */
+#define EF_IA_64_MASKOS 0x0000000f /* os-specific flags */
+#define EF_IA_64_ABI64 0x00000010 /* 64-bit ABI */
+#define EF_IA_64_ARCH 0xff000000 /* arch. version mask */
+
+/* Processor specific values for the Phdr p_type field. */
+#define PT_IA_64_ARCHEXT (PT_LOPROC + 0) /* arch extension bits */
+#define PT_IA_64_UNWIND (PT_LOPROC + 1) /* ia64 unwind bits */
+
+/* Processor specific flags for the Phdr p_flags field. */
+#define PF_IA_64_NORECOV 0x80000000 /* spec insns w/o recovery */
+
+/* Processor specific values for the Shdr sh_type field. */
+#define SHT_IA_64_EXT (SHT_LOPROC + 0) /* extension bits */
+#define SHT_IA_64_UNWIND (SHT_LOPROC + 1) /* unwind bits */
+
+/* Processor specific flags for the Shdr sh_flags field. */
+#define SHF_IA_64_SHORT 0x10000000 /* section near gp */
+#define SHF_IA_64_NORECOV 0x20000000 /* spec insns w/o recovery */
+
+/* Processor specific values for the Dyn d_tag field. */
+#define DT_IA_64_PLT_RESERVE (DT_LOPROC + 0)
+#define DT_IA_64_NUM 1
+
+/* IA-64 relocations. */
+#define R_IA64_NONE 0x00 /* none */
+#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */
+#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */
+#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */
+#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */
+#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */
+#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */
+#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */
+#define R_IA64_GPREL22 0x2a /* @gprel(sym + add), add imm22 */
+#define R_IA64_GPREL64I 0x2b /* @gprel(sym + add), mov imm64 */
+#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym + add), data4 MSB */
+#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym + add), data4 LSB */
+#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym + add), data8 MSB */
+#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF22 0x32 /* @ltoff(sym + add), add imm22 */
+#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym + add), add imm22 */
+#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym + add), data8 MSB */
+#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym + add), data8 LSB */
+#define R_IA64_FPTR64I 0x43 /* @fptr(sym + add), mov imm64 */
+#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym + add), data4 MSB */
+#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym + add), data4 LSB */
+#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym + add), data8 MSB */
+#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym + add), data8 LSB */
+#define R_IA64_PCREL60B 0x48 /* @pcrel(sym + add), brl */
+#define R_IA64_PCREL21B 0x49 /* @pcrel(sym + add), ptb, call */
+#define R_IA64_PCREL21M 0x4a /* @pcrel(sym + add), chk.s */
+#define R_IA64_PCREL21F 0x4b /* @pcrel(sym + add), fchkf */
+#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym + add), data4 MSB */
+#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym + add), data4 LSB */
+#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym + add), data8 MSB */
+#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */
+#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */
+#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), data4 MSB */
+#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), data4 LSB */
+#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), data8 MSB */
+#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), data8 LSB */
+#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym + add), data4 MSB */
+#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym + add), data4 LSB */
+#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym + add), data8 MSB */
+#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym + add), data8 LSB */
+#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym + add), data4 MSB */
+#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym + add), data4 LSB */
+#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym + add), data8 MSB */
+#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym + add), data8 LSB */
+#define R_IA64_REL32MSB 0x6c /* data 4 + REL */
+#define R_IA64_REL32LSB 0x6d /* data 4 + REL */
+#define R_IA64_REL64MSB 0x6e /* data 8 + REL */
+#define R_IA64_REL64LSB 0x6f /* data 8 + REL */
+#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */
+#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */
+#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */
+#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */
+#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym + add), 21bit inst */
+#define R_IA64_PCREL22 0x7a /* @pcrel(sym + add), 22bit inst */
+#define R_IA64_PCREL64I 0x7b /* @pcrel(sym + add), 64bit inst */
+#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */
+#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */
+#define R_IA64_COPY 0x84 /* copy relocation */
+#define R_IA64_SUB 0x85 /* Addend and symbol difference */
+#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */
+#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */
+#define R_IA64_TPREL14 0x91 /* @tprel(sym + add), imm14 */
+#define R_IA64_TPREL22 0x92 /* @tprel(sym + add), imm22 */
+#define R_IA64_TPREL64I 0x93 /* @tprel(sym + add), imm64 */
+#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym + add), data8 MSB */
+#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), imm2 */
+#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym + add), data8 MSB */
+#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(sym + add)), imm22 */
+#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym + add), imm14 */
+#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym + add), imm22 */
+#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym + add), imm64 */
+#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym + add), data4 MSB */
+#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym + add), data4 LSB */
+#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym + add), data8 MSB */
+#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */
+
+typedef struct elf32_rel {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+} Elf32_Rel;
+
+typedef struct elf64_rel {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+} Elf64_Rel;
+
+typedef struct elf32_rela{
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+ Elf32_Sword r_addend;
+} Elf32_Rela;
+
+typedef struct elf64_rela {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+ Elf64_Sxword r_addend; /* Constant addend used to compute value */
+} Elf64_Rela;
+
+typedef struct elf32_sym{
+ Elf32_Word st_name;
+ Elf32_Addr st_value;
+ Elf32_Word st_size;
+ unsigned char st_info;
+ unsigned char st_other;
+ Elf32_Half st_shndx;
+} Elf32_Sym;
+
+typedef struct elf64_sym {
+ Elf64_Word st_name; /* Symbol name, index in string tbl */
+ unsigned char st_info; /* Type and binding attributes */
+ unsigned char st_other; /* No defined meaning, 0 */
+ Elf64_Half st_shndx; /* Associated section index */
+ Elf64_Addr st_value; /* Value of the symbol */
+ Elf64_Xword st_size; /* Associated symbol size */
+} Elf64_Sym;
+
+
+#define EI_NIDENT 16
+
+typedef struct elf32_hdr{
+ unsigned char e_ident[EI_NIDENT];
+ Elf32_Half e_type;
+ Elf32_Half e_machine;
+ Elf32_Word e_version;
+ Elf32_Addr e_entry; /* Entry point */
+ Elf32_Off e_phoff;
+ Elf32_Off e_shoff;
+ Elf32_Word e_flags;
+ Elf32_Half e_ehsize;
+ Elf32_Half e_phentsize;
+ Elf32_Half e_phnum;
+ Elf32_Half e_shentsize;
+ Elf32_Half e_shnum;
+ Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+ unsigned char e_ident[16]; /* ELF "magic number" */
+ Elf64_Half e_type;
+ Elf64_Half e_machine;
+ Elf64_Word e_version;
+ Elf64_Addr e_entry; /* Entry point virtual address */
+ Elf64_Off e_phoff; /* Program header table file offset */
+ Elf64_Off e_shoff; /* Section header table file offset */
+ Elf64_Word e_flags;
+ Elf64_Half e_ehsize;
+ Elf64_Half e_phentsize;
+ Elf64_Half e_phnum;
+ Elf64_Half e_shentsize;
+ Elf64_Half e_shnum;
+ Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+ header, p_flags. */
+#define PF_R 0x4
+#define PF_W 0x2
+#define PF_X 0x1
+
+typedef struct elf32_phdr{
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+ Elf64_Word p_type;
+ Elf64_Word p_flags;
+ Elf64_Off p_offset; /* Segment file offset */
+ Elf64_Addr p_vaddr; /* Segment virtual address */
+ Elf64_Addr p_paddr; /* Segment physical address */
+ Elf64_Xword p_filesz; /* Segment size in file */
+ Elf64_Xword p_memsz; /* Segment size in memory */
+ Elf64_Xword p_align; /* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+/* sh_type */
+#define SHT_NULL 0
+#define SHT_PROGBITS 1
+#define SHT_SYMTAB 2
+#define SHT_STRTAB 3
+#define SHT_RELA 4
+#define SHT_HASH 5
+#define SHT_DYNAMIC 6
+#define SHT_NOTE 7
+#define SHT_NOBITS 8
+#define SHT_REL 9
+#define SHT_SHLIB 10
+#define SHT_DYNSYM 11
+#define SHT_NUM 12
+#define SHT_LOPROC 0x70000000
+#define SHT_HIPROC 0x7fffffff
+#define SHT_LOUSER 0x80000000
+#define SHT_HIUSER 0xffffffff
+#define SHT_MIPS_LIST 0x70000000
+#define SHT_MIPS_CONFLICT 0x70000002
+#define SHT_MIPS_GPTAB 0x70000003
+#define SHT_MIPS_UCODE 0x70000004
+
+/* sh_flags */
+#define SHF_WRITE 0x1
+#define SHF_ALLOC 0x2
+#define SHF_EXECINSTR 0x4
+#define SHF_MASKPROC 0xf0000000
+#define SHF_MIPS_GPREL 0x10000000
+
+/* special section indexes */
+#define SHN_UNDEF 0
+#define SHN_LORESERVE 0xff00
+#define SHN_LOPROC 0xff00
+#define SHN_HIPROC 0xff1f
+#define SHN_ABS 0xfff1
+#define SHN_COMMON 0xfff2
+#define SHN_HIRESERVE 0xffff
+#define SHN_MIPS_ACCOMON 0xff00
+
+typedef struct elf32_shdr {
+ Elf32_Word sh_name;
+ Elf32_Word sh_type;
+ Elf32_Word sh_flags;
+ Elf32_Addr sh_addr;
+ Elf32_Off sh_offset;
+ Elf32_Word sh_size;
+ Elf32_Word sh_link;
+ Elf32_Word sh_info;
+ Elf32_Word sh_addralign;
+ Elf32_Word sh_entsize;
+} Elf32_Shdr;
+
+typedef struct elf64_shdr {
+ Elf64_Word sh_name; /* Section name, index in string tbl */
+ Elf64_Word sh_type; /* Type of section */
+ Elf64_Xword sh_flags; /* Miscellaneous section attributes */
+ Elf64_Addr sh_addr; /* Section virtual addr at execution */
+ Elf64_Off sh_offset; /* Section file offset */
+ Elf64_Xword sh_size; /* Size of section in bytes */
+ Elf64_Word sh_link; /* Index of another section */
+ Elf64_Word sh_info; /* Additional section information */
+ Elf64_Xword sh_addralign; /* Section alignment */
+ Elf64_Xword sh_entsize; /* Entry size if section holds table */
+} Elf64_Shdr;
+
+#define EI_MAG0 0 /* e_ident[] indexes */
+#define EI_MAG1 1
+#define EI_MAG2 2
+#define EI_MAG3 3
+#define EI_CLASS 4
+#define EI_DATA 5
+#define EI_VERSION 6
+#define EI_OSABI 7
+#define EI_PAD 8
+
+#define ELFOSABI_NONE 0 /* UNIX System V ABI */
+#define ELFOSABI_SYSV 0 /* Alias. */
+#define ELFOSABI_HPUX 1 /* HP-UX */
+#define ELFOSABI_NETBSD 2 /* NetBSD. */
+#define ELFOSABI_LINUX 3 /* Linux. */
+#define ELFOSABI_SOLARIS 6 /* Sun Solaris. */
+#define ELFOSABI_AIX 7 /* IBM AIX. */
+#define ELFOSABI_IRIX 8 /* SGI Irix. */
+#define ELFOSABI_FREEBSD 9 /* FreeBSD. */
+#define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */
+#define ELFOSABI_MODESTO 11 /* Novell Modesto. */
+#define ELFOSABI_OPENBSD 12 /* OpenBSD. */
+#define ELFOSABI_ARM 97 /* ARM */
+#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
+
+#define ELFMAG0 0x7f /* EI_MAG */
+#define ELFMAG1 'E'
+#define ELFMAG2 'L'
+#define ELFMAG3 'F'
+#define ELFMAG "\177ELF"
+#define SELFMAG 4
+
+#define ELFCLASSNONE 0 /* EI_CLASS */
+#define ELFCLASS32 1
+#define ELFCLASS64 2
+#define ELFCLASSNUM 3
+
+#define ELFDATANONE 0 /* e_ident[EI_DATA] */
+#define ELFDATA2LSB 1
+#define ELFDATA2MSB 2
+
+#define EV_NONE 0 /* e_version, EI_VERSION */
+#define EV_CURRENT 1
+#define EV_NUM 2
+
+/* Notes used in ET_CORE */
+#define NT_PRSTATUS 1
+#define NT_PRFPREG 2
+#define NT_PRPSINFO 3
+#define NT_TASKSTRUCT 4
+#define NT_AUXV 6
+#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
+
+
+/* Note header in a PT_NOTE section */
+typedef struct elf32_note {
+ Elf32_Word n_namesz; /* Name size */
+ Elf32_Word n_descsz; /* Content size */
+ Elf32_Word n_type; /* Content type */
+} Elf32_Nhdr;
+
+/* Note header in a PT_NOTE section */
+typedef struct elf64_note {
+ Elf64_Word n_namesz; /* Name size */
+ Elf64_Word n_descsz; /* Content size */
+ Elf64_Word n_type; /* Content type */
+} Elf64_Nhdr;
+
+#ifdef ELF_CLASS
+#if ELF_CLASS == ELFCLASS32
+
+#define elfhdr elf32_hdr
+#define elf_phdr elf32_phdr
+#define elf_note elf32_note
+#define elf_shdr elf32_shdr
+#define elf_sym elf32_sym
+#define elf_addr_t Elf32_Off
+
+#ifdef ELF_USES_RELOCA
+# define ELF_RELOC Elf32_Rela
+#else
+# define ELF_RELOC Elf32_Rel
+#endif
+
+#else
+
+#define elfhdr elf64_hdr
+#define elf_phdr elf64_phdr
+#define elf_note elf64_note
+#define elf_shdr elf64_shdr
+#define elf_sym elf64_sym
+#define elf_addr_t Elf64_Off
+
+#ifdef ELF_USES_RELOCA
+# define ELF_RELOC Elf64_Rela
+#else
+# define ELF_RELOC Elf64_Rel
+#endif
+
+#endif /* ELF_CLASS */
+
+#ifndef ElfW
+# if ELF_CLASS == ELFCLASS32
+# define ElfW(x) Elf32_ ## x
+# define ELFW(x) ELF32_ ## x
+# else
+# define ElfW(x) Elf64_ ## x
+# define ELFW(x) ELF64_ ## x
+# endif
+#endif
+
+#endif /* ELF_CLASS */
+
+
+#endif /* _QEMU_ELF_H */
diff --git a/src/recompiler/exec-all.h b/src/recompiler/exec-all.h
new file mode 100644
index 00000000..52145e3e
--- /dev/null
+++ b/src/recompiler/exec-all.h
@@ -0,0 +1,400 @@
+/*
+ * internal execution defines for qemu
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#ifndef _EXEC_ALL_H_
+#define _EXEC_ALL_H_
+
+#include "qemu-common.h"
+#ifdef VBOX
+# include <VBox/vmm/tm.h>
+# include <VBox/vmm/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
+# include <VBox/vmm/em.h> /* EMRemIsLockOwner */
+# ifndef LOG_GROUP
+# define LOG_GROUP LOG_GROUP_REM
+# endif
+# include <VBox/log.h>
+# include "REMInternal.h"
+# include <VBox/vmm/vm.h>
+#endif /* VBOX */
+
+/* allow to see translation results - the slowdown should be negligible, so we leave it */
+#ifndef VBOX
+#define DEBUG_DISAS
+#endif /* !VBOX */
+
+/* Page tracking code uses ram addresses in system mode, and virtual
+ addresses in userspace mode. Define tb_page_addr_t to be an appropriate
+ type. */
+#if defined(CONFIG_USER_ONLY)
+typedef abi_ulong tb_page_addr_t;
+#else
+typedef ram_addr_t tb_page_addr_t;
+#endif
+
+/* is_jmp field values */
+#define DISAS_NEXT 0 /* next instruction can be analyzed */
+#define DISAS_JUMP 1 /* only pc was modified dynamically */
+#define DISAS_UPDATE 2 /* cpu state was modified dynamically */
+#define DISAS_TB_JUMP 3 /* only pc was modified statically */
+
+typedef struct TranslationBlock TranslationBlock;
+
+/* XXX: make safe guess about sizes */
+#define MAX_OP_PER_INSTR 266
+
+#if HOST_LONG_BITS == 32
+#define MAX_OPC_PARAM_PER_ARG 2
+#else
+#define MAX_OPC_PARAM_PER_ARG 1
+#endif
+#define MAX_OPC_PARAM_IARGS 4
+#define MAX_OPC_PARAM_OARGS 1
+#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
+
+/* A Call op needs up to 4 + 2N parameters on 32-bit archs,
+ * and up to 4 + N parameters on 64-bit archs
+ * (N = number of input arguments + output arguments). */
+#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
+#define OPC_BUF_SIZE 640
+#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
+
+/* Maximum size a TCG op can expand to. This is complicated because a
+ single op may require several host instructions and register reloads.
+ For now take a wild guess at 192 bytes, which should allow at least
+ a couple of fixup instructions per argument. */
+#define TCG_MAX_OP_SIZE 192
+
+#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)
+
+extern target_ulong gen_opc_pc[OPC_BUF_SIZE];
+extern uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+extern uint16_t gen_opc_icount[OPC_BUF_SIZE];
+
+#include "qemu-log.h"
+
+void gen_intermediate_code(CPUState *env, struct TranslationBlock *tb);
+void gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb);
+void gen_pc_load(CPUState *env, struct TranslationBlock *tb,
+ uintptr_t searched_pc, int pc_pos, void *puc);
+
+void cpu_gen_init(void);
+int cpu_gen_code(CPUState *env, struct TranslationBlock *tb,
+ int *gen_code_size_ptr);
+int cpu_restore_state(struct TranslationBlock *tb,
+ CPUState *env, uintptr_t searched_pc,
+ void *puc);
+void cpu_resume_from_signal(CPUState *env1, void *puc);
+void cpu_io_recompile(CPUState *env, void *retaddr);
+TranslationBlock *tb_gen_code(CPUState *env,
+ target_ulong pc, target_ulong cs_base, int flags,
+ int cflags);
+void cpu_exec_init(CPUState *env);
+void QEMU_NORETURN cpu_loop_exit(void);
+int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
+void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
+ int is_cpu_write_access);
+void tb_invalidate_page_range(target_ulong start, target_ulong end);
+void tlb_flush_page(CPUState *env, target_ulong addr);
+void tlb_flush(CPUState *env, int flush_global);
+#if !defined(CONFIG_USER_ONLY)
+void tlb_set_page(CPUState *env, target_ulong vaddr,
+ target_phys_addr_t paddr, int prot,
+ int mmu_idx, target_ulong size);
+#endif
+
+#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
+
+#define CODE_GEN_PHYS_HASH_BITS 15
+#define CODE_GEN_PHYS_HASH_SIZE (1 << CODE_GEN_PHYS_HASH_BITS)
+
+#define MIN_CODE_GEN_BUFFER_SIZE (1024 * 1024)
+
+/* estimated block size for TB allocation */
+/* XXX: use a per code average code fragment size and modulate it
+ according to the host CPU */
+#if defined(CONFIG_SOFTMMU)
+#define CODE_GEN_AVG_BLOCK_SIZE 128
+#else
+#define CODE_GEN_AVG_BLOCK_SIZE 64
+#endif
+
+#if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || defined(__i386__)
+#define USE_DIRECT_JUMP
+#endif
+
+#ifdef VBOX /* bird: not safe in next step because of threading & cpu_interrupt. */
+# undef USE_DIRECT_JUMP
+#endif /* VBOX */
+
+struct TranslationBlock {
+ target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
+ target_ulong cs_base; /* CS base for this block */
+ uint64_t flags; /* flags defining in which context the code was generated */
+ uint16_t size; /* size of target code for this block (1 <=
+ size <= TARGET_PAGE_SIZE) */
+ uint16_t cflags; /* compile flags */
+#define CF_COUNT_MASK 0x7fff
+#define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */
+#ifdef VBOX
+# define CF_RAW_MODE 0x0010 /* block was generated in raw mode */
+#endif
+
+ uint8_t *tc_ptr; /* pointer to the translated code */
+ /* next matching tb for physical address. */
+ struct TranslationBlock *phys_hash_next;
+ /* first and second physical page containing code. The lower bit
+ of the pointer tells the index in page_next[] */
+ struct TranslationBlock *page_next[2];
+ tb_page_addr_t page_addr[2];
+
+ /* the following data are used to directly call another TB from
+ the code of this one. */
+ uint16_t tb_next_offset[2]; /* offset of original jump target */
+#ifdef USE_DIRECT_JUMP
+ uint16_t tb_jmp_offset[2]; /* offset of jump instruction */
+#else
+ uintptr_t tb_next[2]; /* address of jump generated code */
+#endif
+ /* list of TBs jumping to this one. This is a circular list using
+ the two least significant bits of the pointers to tell what is
+ the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 =
+ jmp_first */
+ struct TranslationBlock *jmp_next[2];
+ struct TranslationBlock *jmp_first;
+ uint32_t icount;
+};
+
+static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
+{
+ target_ulong tmp;
+ tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
+ return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK;
+}
+
+static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
+{
+ target_ulong tmp;
+ tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
+ return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK)
+ | (tmp & TB_JMP_ADDR_MASK));
+}
+
+static inline unsigned int tb_phys_hash_func(tb_page_addr_t pc)
+{
+ return pc & (CODE_GEN_PHYS_HASH_SIZE - 1);
+}
+
+TranslationBlock *tb_alloc(target_ulong pc);
+void tb_free(TranslationBlock *tb);
+void tb_flush(CPUState *env);
+void tb_link_page(TranslationBlock *tb,
+ tb_page_addr_t phys_pc, tb_page_addr_t phys_page2);
+void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
+
+extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+
+#if defined(USE_DIRECT_JUMP)
+
+#if defined(_ARCH_PPC)
+extern void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
+#define tb_set_jmp_target1 ppc_tb_set_jmp_target
+#elif defined(__i386__) || defined(__x86_64__)
+static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+ /* patch the branch destination */
+ *(uint32_t *)jmp_addr = addr - (jmp_addr + 4);
+ /* no need to flush icache explicitly */
+}
+#elif defined(__arm__)
+static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+#if QEMU_GNUC_PREREQ(4, 1)
+ void __clear_cache(char *beg, char *end);
+#else
+ register unsigned long _beg __asm ("a1");
+ register unsigned long _end __asm ("a2");
+ register unsigned long _flg __asm ("a3");
+#endif
+
+ /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
+ *(uint32_t *)jmp_addr =
+ (*(uint32_t *)jmp_addr & ~0xffffff)
+ | (((addr - (jmp_addr + 8)) >> 2) & 0xffffff);
+
+#if QEMU_GNUC_PREREQ(4, 1)
+ __clear_cache((char *) jmp_addr, (char *) jmp_addr + 4);
+#else
+ /* flush icache */
+ _beg = jmp_addr;
+ _end = jmp_addr + 4;
+ _flg = 0;
+ __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
+#endif
+}
+#endif
+
+static inline void tb_set_jmp_target(TranslationBlock *tb,
+ int n, uintptr_t addr)
+{
+ uintptr_t offset;
+
+ offset = tb->tb_jmp_offset[n];
+ tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr);
+}
+
+#else
+
+/* set the jump target */
+static inline void tb_set_jmp_target(TranslationBlock *tb,
+ int n, uintptr_t addr)
+{
+ tb->tb_next[n] = addr;
+}
+
+#endif
+
+static inline void tb_add_jump(TranslationBlock *tb, int n,
+ TranslationBlock *tb_next)
+{
+ /* NOTE: this test is only needed for thread safety */
+ if (!tb->jmp_next[n]) {
+ /* patch the native jump address */
+ tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr);
+
+ /* add in TB jmp circular list */
+ tb->jmp_next[n] = tb_next->jmp_first;
+ tb_next->jmp_first = (TranslationBlock *)((intptr_t)(tb) | (n));
+ }
+}
+
+TranslationBlock *tb_find_pc(uintptr_t pc_ptr);
+
+#include "qemu-lock.h"
+
+extern spinlock_t tb_lock;
+
+extern int tb_invalidated_flag;
+
+#if !defined(CONFIG_USER_ONLY)
+
+extern CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
+extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
+extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+
+void tlb_fill(target_ulong addr, int is_write, int mmu_idx,
+ void *retaddr);
+
+#include "softmmu_defs.h"
+
+#define ACCESS_TYPE (NB_MMU_MODES + 1)
+#define MEMSUFFIX _code
+#define env cpu_single_env
+
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+#undef env
+
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+static inline tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
+{
+ return addr;
+}
+#else
+# ifdef VBOX
+target_ulong remR3PhysGetPhysicalAddressCode(CPUState *env, target_ulong addr, CPUTLBEntry *pTLBEntry, target_phys_addr_t ioTLBEntry);
+# endif
+/* NOTE: this function can trigger an exception */
+/* NOTE2: the returned address is not exactly the physical address: it
+ is the offset relative to phys_ram_base */
+static inline tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
+{
+ int mmu_idx, page_index, pd;
+# ifndef VBOX
+ void *p;
+# endif
+
+ page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ mmu_idx = cpu_mmu_index(env1);
+ if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
+ (addr & TARGET_PAGE_MASK))) {
+ ldub_code(addr);
+ }
+ pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
+ if (pd > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
+# ifdef VBOX
+ /* deal with non-MMIO access handlers. */
+ return remR3PhysGetPhysicalAddressCode(env1, addr,
+ &env1->tlb_table[mmu_idx][page_index],
+ env1->iotlb[mmu_idx][page_index]);
+# elif defined(TARGET_SPARC) || defined(TARGET_MIPS)
+ do_unassigned_access(addr, 0, 1, 0, 4);
+#else
+ cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
+#endif
+ }
+# if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+ return addr + env1->tlb_table[mmu_idx][page_index].addend;
+# elif defined(VBOX)
+ Assert(env1->phys_addends[mmu_idx][page_index] != -1);
+ return addr + env1->phys_addends[mmu_idx][page_index];
+# else
+ p = (void *)(uintptr_t)addr
+ + env1->tlb_table[mmu_idx][page_index].addend;
+ return qemu_ram_addr_from_host(p);
+# endif
+}
+#endif
+
+typedef void (CPUDebugExcpHandler)(CPUState *env);
+
+CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler);
+
+#ifndef VBOX
+/* vl.c */
+extern int singlestep;
+
+/* cpu-exec.c */
+extern volatile sig_atomic_t exit_request;
+#endif /*!VBOX*/
+
+
+#endif
diff --git a/src/recompiler/exec.c b/src/recompiler/exec.c
new file mode 100644
index 00000000..ca632372
--- /dev/null
+++ b/src/recompiler/exec.c
@@ -0,0 +1,4586 @@
+/*
+ * virtual page mapping and translated block handling
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include "config.h"
+#ifndef VBOX
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <inttypes.h>
+#else /* VBOX */
+# include <stdlib.h>
+# include <stdio.h>
+# include <iprt/alloc.h>
+# include <iprt/string.h>
+# include <iprt/param.h>
+# include <VBox/vmm/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
+# include <iprt/errcore.h>
+#endif /* VBOX */
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "qemu-common.h"
+#include "tcg.h"
+#ifndef VBOX
+#include "hw/hw.h"
+#include "hw/qdev.h"
+#endif /* !VBOX */
+#include "osdep.h"
+#include "kvm.h"
+#include "qemu-timer.h"
+#if defined(CONFIG_USER_ONLY)
+#include <qemu.h>
+#include <signal.h>
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <sys/param.h>
+#if __FreeBSD_version >= 700104
+#define HAVE_KINFO_GETVMMAP
+#define sigqueue sigqueue_freebsd /* avoid redefinition */
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <machine/profile.h>
+#define _KERNEL
+#include <sys/user.h>
+#undef _KERNEL
+#undef sigqueue
+#include <libutil.h>
+#endif
+#endif
+#endif
+
+//#define DEBUG_TB_INVALIDATE
+//#define DEBUG_FLUSH
+//#define DEBUG_TLB
+//#define DEBUG_UNASSIGNED
+
+/* make various TB consistency checks */
+//#define DEBUG_TB_CHECK
+//#define DEBUG_TLB_CHECK
+
+//#define DEBUG_IOPORT
+//#define DEBUG_SUBPAGE
+
+#if !defined(CONFIG_USER_ONLY)
+/* TB consistency checks only implemented for usermode emulation. */
+#undef DEBUG_TB_CHECK
+#endif
+
+#define SMC_BITMAP_USE_THRESHOLD 10
+
+static TranslationBlock *tbs;
+static int code_gen_max_blocks;
+TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+static int nb_tbs;
+/* any access to the tbs or the page table must use this lock */
+spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
+
+#ifndef VBOX
+#if defined(__arm__) || defined(__sparc_v9__)
+/* The prologue must be reachable with a direct jump. ARM and Sparc64
+ have limited branch ranges (possibly also PPC) so place it in a
+ section close to code segment. */
+#define code_gen_section \
+ __attribute__((__section__(".gen_code"))) \
+ __attribute__((aligned (32)))
+#elif defined(_WIN32)
+/* Maximum alignment for Win32 is 16. */
+#define code_gen_section \
+ __attribute__((aligned (16)))
+#else
+#define code_gen_section \
+ __attribute__((aligned (32)))
+#endif
+
+uint8_t code_gen_prologue[1024] code_gen_section;
+#else /* VBOX */
+extern uint8_t *code_gen_prologue;
+#endif /* VBOX */
+static uint8_t *code_gen_buffer;
+static size_t code_gen_buffer_size;
+/* threshold to flush the translated code buffer */
+static size_t code_gen_buffer_max_size;
+static uint8_t *code_gen_ptr;
+
+#if !defined(CONFIG_USER_ONLY)
+# ifndef VBOX
+int phys_ram_fd;
+static int in_migration;
+# endif /* !VBOX */
+
+RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
+#endif
+
+CPUState *first_cpu;
+/* current CPU in the current thread. It is only valid inside
+ cpu_exec() */
+CPUState *cpu_single_env;
+/* 0 = Do not count executed instructions.
+ 1 = Precise instruction counting.
+ 2 = Adaptive rate instruction counting. */
+int use_icount = 0;
+/* Current instruction counter. While executing translated code this may
+ include some instructions that have not yet been executed. */
+int64_t qemu_icount;
+
+typedef struct PageDesc {
+ /* list of TBs intersecting this ram page */
+ TranslationBlock *first_tb;
+ /* in order to optimize self modifying code, we count the number
+ of lookups we do to a given page to use a bitmap */
+ unsigned int code_write_count;
+ uint8_t *code_bitmap;
+#if defined(CONFIG_USER_ONLY)
+ unsigned long flags;
+#endif
+} PageDesc;
+
+/* In system mode we want L1_MAP to be based on ram offsets,
+ while in user mode we want it to be based on virtual addresses. */
+#if !defined(CONFIG_USER_ONLY)
+#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
+# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
+#else
+# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
+#endif
+#else
+# define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
+#endif
+
+/* Size of the L2 (and L3, etc) page tables. */
+#define L2_BITS 10
+#define L2_SIZE (1 << L2_BITS)
+
+/* The bits remaining after N lower levels of page tables. */
+#define P_L1_BITS_REM \
+ ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
+#define V_L1_BITS_REM \
+ ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
+
+/* Size of the L1 page table. Avoid silly small sizes. */
+#if P_L1_BITS_REM < 4
+#define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
+#else
+#define P_L1_BITS P_L1_BITS_REM
+#endif
+
+#if V_L1_BITS_REM < 4
+#define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
+#else
+#define V_L1_BITS V_L1_BITS_REM
+#endif
+
+#define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
+#define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
+
+#define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
+#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
+
+size_t qemu_real_host_page_size;
+size_t qemu_host_page_bits;
+size_t qemu_host_page_size;
+uintptr_t qemu_host_page_mask;
+
+/* This is a multi-level map on the virtual address space.
+ The bottom level has pointers to PageDesc. */
+static void *l1_map[V_L1_SIZE];
+
+#if !defined(CONFIG_USER_ONLY)
+typedef struct PhysPageDesc {
+ /* offset in host memory of the page + io_index in the low bits */
+ ram_addr_t phys_offset;
+ ram_addr_t region_offset;
+} PhysPageDesc;
+
+/* This is a multi-level map on the physical address space.
+ The bottom level has pointers to PhysPageDesc. */
+static void *l1_phys_map[P_L1_SIZE];
+
+static void io_mem_init(void);
+
+/* io memory support */
+CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
+CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
+void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+static char io_mem_used[IO_MEM_NB_ENTRIES];
+static int io_mem_watch;
+#endif
+
+#ifndef VBOX
+/* log support */
+#ifdef WIN32
+static const char *logfilename = "qemu.log";
+#else
+static const char *logfilename = "/tmp/qemu.log";
+#endif
+#endif /* !VBOX */
+FILE *logfile;
+int loglevel;
+#ifndef VBOX
+static int log_append = 0;
+#endif /* !VBOX */
+
+/* statistics */
+#ifndef VBOX
+#if !defined(CONFIG_USER_ONLY)
+static int tlb_flush_count;
+#endif
+static int tb_flush_count;
+static int tb_phys_invalidate_count;
+#else /* VBOX - Resettable U32 stats, see VBoxRecompiler.c. */
+uint32_t tlb_flush_count;
+uint32_t tb_flush_count;
+uint32_t tb_phys_invalidate_count;
+#endif /* VBOX */
+
+#ifndef VBOX
+#ifdef _WIN32
+static void map_exec(void *addr, size_t size)
+{
+ DWORD old_protect;
+ VirtualProtect(addr, size,
+ PAGE_EXECUTE_READWRITE, &old_protect);
+
+}
+#else
+static void map_exec(void *addr, size_t size)
+{
+ uintptr_t start, end, page_size;
+
+ page_size = getpagesize();
+ start = (uintptr_t)addr;
+ start &= ~(page_size - 1);
+
+ end = (uintptr_t)addr + size;
+ end += page_size - 1;
+ end &= ~(page_size - 1);
+
+ mprotect((void *)start, end - start,
+ PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+#endif
+#else /* VBOX */
+static void map_exec(void *addr, size_t size)
+{
+ RTMemProtect(addr, size,
+ RTMEM_PROT_EXEC | RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+}
+#endif /* VBOX */
+
+static void page_init(void)
+{
+ /* NOTE: we can always suppose that qemu_host_page_size >=
+ TARGET_PAGE_SIZE */
+#ifdef VBOX
+ RTMemProtect(code_gen_buffer, code_gen_buffer_size,
+ RTMEM_PROT_EXEC | RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+ qemu_real_host_page_size = PAGE_SIZE;
+#else /* !VBOX */
+#ifdef _WIN32
+ {
+ SYSTEM_INFO system_info;
+
+ GetSystemInfo(&system_info);
+ qemu_real_host_page_size = system_info.dwPageSize;
+ }
+#else
+ qemu_real_host_page_size = getpagesize();
+#endif
+#endif /* !VBOX */
+ if (qemu_host_page_size == 0)
+ qemu_host_page_size = qemu_real_host_page_size;
+ if (qemu_host_page_size < TARGET_PAGE_SIZE)
+ qemu_host_page_size = TARGET_PAGE_SIZE;
+ qemu_host_page_bits = 0;
+ while ((1 << qemu_host_page_bits) < VBOX_ONLY((int))qemu_host_page_size)
+ qemu_host_page_bits++;
+ qemu_host_page_mask = ~(qemu_host_page_size - 1);
+
+#ifndef VBOX /* We use other means to set reserved bit on our pages */
+#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
+ {
+#ifdef HAVE_KINFO_GETVMMAP
+ struct kinfo_vmentry *freep;
+ int i, cnt;
+
+ freep = kinfo_getvmmap(getpid(), &cnt);
+ if (freep) {
+ mmap_lock();
+ for (i = 0; i < cnt; i++) {
+ uintptr_t startaddr, endaddr;
+
+ startaddr = freep[i].kve_start;
+ endaddr = freep[i].kve_end;
+ if (h2g_valid(startaddr)) {
+ startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+ if (h2g_valid(endaddr)) {
+ endaddr = h2g(endaddr);
+ page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+ } else {
+#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
+ endaddr = ~0ul;
+ page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+#endif
+ }
+ }
+ }
+ free(freep);
+ mmap_unlock();
+ }
+#else
+ FILE *f;
+
+ last_brk = (uintptr_t)sbrk(0);
+
+ f = fopen("/compat/linux/proc/self/maps", "r");
+ if (f) {
+ mmap_lock();
+
+ do {
+ uintptr_t startaddr, endaddr;
+ int n;
+
+ n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
+
+ if (n == 2 && h2g_valid(startaddr)) {
+ startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+ if (h2g_valid(endaddr)) {
+ endaddr = h2g(endaddr);
+ } else {
+ endaddr = ~0ul;
+ }
+ page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+ }
+ } while (!feof(f));
+
+ fclose(f);
+ mmap_unlock();
+ }
+#endif
+ }
+#endif
+#endif /* !VBOX */
+}
+
+static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
+{
+ PageDesc *pd;
+ void **lp;
+ int i;
+
+#if defined(CONFIG_USER_ONLY)
+ /* We can't use qemu_malloc because it may recurse into a locked mutex. */
+# define ALLOC(P, SIZE) \
+ do { \
+ P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
+ } while (0)
+#else
+# define ALLOC(P, SIZE) \
+ do { P = qemu_mallocz(SIZE); } while (0)
+#endif
+
+ /* Level 1. Always allocated. */
+ lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
+
+ /* Level 2..N-1. */
+ for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+ void **p = *lp;
+
+ if (p == NULL) {
+ if (!alloc) {
+ return NULL;
+ }
+ ALLOC(p, sizeof(void *) * L2_SIZE);
+ *lp = p;
+ }
+
+ lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+ }
+
+ pd = *lp;
+ if (pd == NULL) {
+ if (!alloc) {
+ return NULL;
+ }
+ ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
+ *lp = pd;
+ }
+
+#undef ALLOC
+
+ return pd + (index & (L2_SIZE - 1));
+}
+
+static inline PageDesc *page_find(tb_page_addr_t index)
+{
+ return page_find_alloc(index, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
+{
+ PhysPageDesc *pd;
+ void **lp;
+ int i;
+
+ /* Level 1. Always allocated. */
+ lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
+
+ /* Level 2..N-1. */
+ for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+ void **p = *lp;
+ if (p == NULL) {
+ if (!alloc) {
+ return NULL;
+ }
+ *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
+ }
+ lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+ }
+
+ pd = *lp;
+ if (pd == NULL) {
+ int i;
+
+ if (!alloc) {
+ return NULL;
+ }
+
+ *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
+
+ for (i = 0; i < L2_SIZE; i++) {
+ pd[i].phys_offset = IO_MEM_UNASSIGNED;
+ pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
+ }
+ }
+
+ return pd + (index & (L2_SIZE - 1));
+}
+
+static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
+{
+ return phys_page_find_alloc(index, 0);
+}
+
+static void tlb_protect_code(ram_addr_t ram_addr);
+static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
+ target_ulong vaddr);
+#define mmap_lock() do { } while(0)
+#define mmap_unlock() do { } while(0)
+#endif
+
+#ifdef VBOX /* We don't need such huge codegen buffer size, as execute
+ most of the code in raw or hm mode. */
+#define DEFAULT_CODE_GEN_BUFFER_SIZE (8 * 1024 * 1024)
+#else /* !VBOX */
+#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
+#endif /* !VBOX */
+
+#if defined(CONFIG_USER_ONLY)
+/* Currently it is not recommended to allocate big chunks of data in
+ user mode. It will change when a dedicated libc will be used */
+#define USE_STATIC_CODE_GEN_BUFFER
+#endif
+
+#if defined(VBOX) && defined(USE_STATIC_CODE_GEN_BUFFER)
+# error "VBox allocates codegen buffer dynamically"
+#endif
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+ __attribute__((aligned (CODE_GEN_ALIGN)));
+#endif
+
+static void code_gen_alloc(uintptr_t tb_size)
+{
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+ code_gen_buffer = static_code_gen_buffer;
+ code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+ map_exec(code_gen_buffer, code_gen_buffer_size);
+#else
+# ifdef VBOX
+ /* We cannot use phys_ram_size here, as it's 0 now,
+ * it only gets initialized once RAM registration callback
+ * (REMR3NotifyPhysRamRegister()) called.
+ */
+ code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+# else /* !VBOX */
+ code_gen_buffer_size = tb_size;
+ if (code_gen_buffer_size == 0) {
+#if defined(CONFIG_USER_ONLY)
+ /* in user mode, phys_ram_size is not meaningful */
+ code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+#else
+ /* XXX: needs adjustments */
+ code_gen_buffer_size = (uintptr_t)(ram_size / 4);
+#endif
+ }
+ if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
+ code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
+# endif /* !VBOX */
+ /* The code gen buffer location may have constraints depending on
+ the host cpu and OS */
+# ifdef VBOX
+ code_gen_buffer = RTMemExecAlloc(code_gen_buffer_size);
+
+ if (!code_gen_buffer) {
+ LogRel(("REM: failed allocate codegen buffer %lld\n",
+ code_gen_buffer_size));
+ return;
+ }
+# else /* !VBOX */
+#if defined(__linux__)
+ {
+ int flags;
+ void *start = NULL;
+
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#if defined(__x86_64__)
+ flags |= MAP_32BIT;
+ /* Cannot map more than that */
+ if (code_gen_buffer_size > (800 * 1024 * 1024))
+ code_gen_buffer_size = (800 * 1024 * 1024);
+#elif defined(__sparc_v9__)
+ // Map the buffer below 2G, so we can use direct calls and branches
+ flags |= MAP_FIXED;
+ start = (void *) 0x60000000UL;
+ if (code_gen_buffer_size > (512 * 1024 * 1024))
+ code_gen_buffer_size = (512 * 1024 * 1024);
+#elif defined(__arm__)
+ /* Map the buffer below 32M, so we can use direct calls and branches */
+ flags |= MAP_FIXED;
+ start = (void *) 0x01000000UL;
+ if (code_gen_buffer_size > 16 * 1024 * 1024)
+ code_gen_buffer_size = 16 * 1024 * 1024;
+#elif defined(__s390x__)
+ /* Map the buffer so that we can use direct calls and branches. */
+ /* We have a +- 4GB range on the branches; leave some slop. */
+ if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
+ code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
+ }
+ start = (void *)0x90000000UL;
+#endif
+ code_gen_buffer = mmap(start, code_gen_buffer_size,
+ PROT_WRITE | PROT_READ | PROT_EXEC,
+ flags, -1, 0);
+ if (code_gen_buffer == MAP_FAILED) {
+ fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+ exit(1);
+ }
+ }
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+ {
+ int flags;
+ void *addr = NULL;
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#if defined(__x86_64__)
+ /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
+ * 0x40000000 is free */
+ flags |= MAP_FIXED;
+ addr = (void *)0x40000000;
+ /* Cannot map more than that */
+ if (code_gen_buffer_size > (800 * 1024 * 1024))
+ code_gen_buffer_size = (800 * 1024 * 1024);
+#endif
+ code_gen_buffer = mmap(addr, code_gen_buffer_size,
+ PROT_WRITE | PROT_READ | PROT_EXEC,
+ flags, -1, 0);
+ if (code_gen_buffer == MAP_FAILED) {
+ fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+ exit(1);
+ }
+ }
+#else
+ code_gen_buffer = qemu_malloc(code_gen_buffer_size);
+ map_exec(code_gen_buffer, code_gen_buffer_size);
+#endif
+# endif /* !VBOX */
+#endif /* !USE_STATIC_CODE_GEN_BUFFER */
+#ifndef VBOX /** @todo r=bird: why are we different? */
+ map_exec(code_gen_prologue, sizeof(code_gen_prologue));
+#else
+ map_exec(code_gen_prologue, _1K);
+#endif
+ code_gen_buffer_max_size = code_gen_buffer_size -
+ (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
+ code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
+ tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+}
+
+/* Must be called before using the QEMU cpus. 'tb_size' is the size
+ (in bytes) allocated to the translation buffer. Zero means default
+ size. */
+void cpu_exec_init_all(uintptr_t tb_size)
+{
+ cpu_gen_init();
+ code_gen_alloc(tb_size);
+ code_gen_ptr = code_gen_buffer;
+ page_init();
+#if !defined(CONFIG_USER_ONLY)
+ io_mem_init();
+#endif
+#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
+ /* There's no guest base to take into account, so go ahead and
+ initialize the prologue now. */
+ tcg_prologue_init(&tcg_ctx);
+#endif
+}
+
+#ifndef VBOX
+#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
+
+static int cpu_common_post_load(void *opaque, int version_id)
+{
+ CPUState *env = opaque;
+
+ /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
+ version_id is increased. */
+ env->interrupt_request &= ~0x01;
+ tlb_flush(env, 1);
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_cpu_common = {
+ .name = "cpu_common",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .post_load = cpu_common_post_load,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(halted, CPUState),
+ VMSTATE_UINT32(interrupt_request, CPUState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+#endif
+
+CPUState *qemu_get_cpu(int cpu)
+{
+ CPUState *env = first_cpu;
+
+ while (env) {
+ if (env->cpu_index == cpu)
+ break;
+ env = env->next_cpu;
+ }
+
+ return env;
+}
+
+#endif /* !VBOX */
+
+void cpu_exec_init(CPUState *env)
+{
+ CPUState **penv;
+ int cpu_index;
+
+#if defined(CONFIG_USER_ONLY)
+ cpu_list_lock();
+#endif
+ env->next_cpu = NULL;
+ penv = &first_cpu;
+ cpu_index = 0;
+ while (*penv != NULL) {
+ penv = &(*penv)->next_cpu;
+ cpu_index++;
+ }
+ env->cpu_index = cpu_index;
+ env->numa_node = 0;
+ QTAILQ_INIT(&env->breakpoints);
+ QTAILQ_INIT(&env->watchpoints);
+ *penv = env;
+#ifndef VBOX
+#if defined(CONFIG_USER_ONLY)
+ cpu_list_unlock();
+#endif
+#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
+ vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
+ register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
+ cpu_save, cpu_load, env);
+#endif
+#endif /* !VBOX */
+}
+
+static inline void invalidate_page_bitmap(PageDesc *p)
+{
+ if (p->code_bitmap) {
+ qemu_free(p->code_bitmap);
+ p->code_bitmap = NULL;
+ }
+ p->code_write_count = 0;
+}
+
+/* Set to NULL all the 'first_tb' fields in all PageDescs. */
+
+static void page_flush_tb_1 (int level, void **lp)
+{
+ int i;
+
+ if (*lp == NULL) {
+ return;
+ }
+ if (level == 0) {
+ PageDesc *pd = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ pd[i].first_tb = NULL;
+ invalidate_page_bitmap(pd + i);
+ }
+ } else {
+ void **pp = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ page_flush_tb_1 (level - 1, pp + i);
+ }
+ }
+}
+
+static void page_flush_tb(void)
+{
+ int i;
+ for (i = 0; i < V_L1_SIZE; i++) {
+ page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+ }
+}
+
+/* flush all the translation blocks */
+/* XXX: tb_flush is currently not thread safe */
+void tb_flush(CPUState *env1)
+{
+ CPUState *env;
+#ifdef VBOX
+ STAM_PROFILE_START(&env1->StatTbFlush, a);
+#endif
+#if defined(DEBUG_FLUSH)
+ printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
+ (unsigned long)(code_gen_ptr - code_gen_buffer),
+ nb_tbs, nb_tbs > 0 ?
+ ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
+#endif
+ if ((uintptr_t)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
+ cpu_abort(env1, "Internal error: code buffer overflow\n");
+
+ nb_tbs = 0;
+
+ for(env = first_cpu; env != NULL; env = env->next_cpu) {
+ memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+ }
+
+ memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
+ page_flush_tb();
+
+ code_gen_ptr = code_gen_buffer;
+ /* XXX: flush processor icache at this point if cache flush is
+ expensive */
+ tb_flush_count++;
+#ifdef VBOX
+ STAM_PROFILE_STOP(&env1->StatTbFlush, a);
+#endif
+}
+
+#ifdef DEBUG_TB_CHECK
+
+static void tb_invalidate_check(target_ulong address)
+{
+ TranslationBlock *tb;
+ int i;
+ address &= TARGET_PAGE_MASK;
+ for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+ for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+ if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
+ address >= tb->pc + tb->size)) {
+ printf("ERROR invalidate: address=" TARGET_FMT_lx
+ " PC=%08lx size=%04x\n",
+ address, (long)tb->pc, tb->size);
+ }
+ }
+ }
+}
+
+/* verify that all the pages have correct rights for code */
+static void tb_page_check(void)
+{
+ TranslationBlock *tb;
+ int i, flags1, flags2;
+
+ for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+ for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+ flags1 = page_get_flags(tb->pc);
+ flags2 = page_get_flags(tb->pc + tb->size - 1);
+ if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+ printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+ (long)tb->pc, tb->size, flags1, flags2);
+ }
+ }
+ }
+}
+
+#endif
+
+/* invalidate one TB */
+static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
+ int next_offset)
+{
+ TranslationBlock *tb1;
+ for(;;) {
+ tb1 = *ptb;
+ if (tb1 == tb) {
+ *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
+ break;
+ }
+ ptb = (TranslationBlock **)((char *)tb1 + next_offset);
+ }
+}
+
+static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
+{
+ TranslationBlock *tb1;
+ unsigned int n1;
+
+ for(;;) {
+ tb1 = *ptb;
+ n1 = (intptr_t)tb1 & 3;
+ tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3);
+ if (tb1 == tb) {
+ *ptb = tb1->page_next[n1];
+ break;
+ }
+ ptb = &tb1->page_next[n1];
+ }
+}
+
+static inline void tb_jmp_remove(TranslationBlock *tb, int n)
+{
+ TranslationBlock *tb1, **ptb;
+ unsigned int n1;
+
+ ptb = &tb->jmp_next[n];
+ tb1 = *ptb;
+ if (tb1) {
+ /* find tb(n) in circular list */
+ for(;;) {
+ tb1 = *ptb;
+ n1 = (intptr_t)tb1 & 3;
+ tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3);
+ if (n1 == n && tb1 == tb)
+ break;
+ if (n1 == 2) {
+ ptb = &tb1->jmp_first;
+ } else {
+ ptb = &tb1->jmp_next[n1];
+ }
+ }
+ /* now we can suppress tb(n) from the list */
+ *ptb = tb->jmp_next[n];
+
+ tb->jmp_next[n] = NULL;
+ }
+}
+
+/* reset the jump entry 'n' of a TB so that it is not chained to
+ another TB */
+static inline void tb_reset_jump(TranslationBlock *tb, int n)
+{
+ tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
+}
+
+void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
+{
+ CPUState *env;
+ PageDesc *p;
+ unsigned int h, n1;
+ tb_page_addr_t phys_pc;
+ TranslationBlock *tb1, *tb2;
+
+ /* remove the TB from the hash list */
+ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+ h = tb_phys_hash_func(phys_pc);
+ tb_remove(&tb_phys_hash[h], tb,
+ offsetof(TranslationBlock, phys_hash_next));
+
+ /* remove the TB from the page list */
+ if (tb->page_addr[0] != page_addr) {
+ p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
+ tb_page_remove(&p->first_tb, tb);
+ invalidate_page_bitmap(p);
+ }
+ if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
+ p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
+ tb_page_remove(&p->first_tb, tb);
+ invalidate_page_bitmap(p);
+ }
+
+ tb_invalidated_flag = 1;
+
+ /* remove the TB from the hash list */
+ h = tb_jmp_cache_hash_func(tb->pc);
+ for(env = first_cpu; env != NULL; env = env->next_cpu) {
+ if (env->tb_jmp_cache[h] == tb)
+ env->tb_jmp_cache[h] = NULL;
+ }
+
+ /* suppress this TB from the two jump lists */
+ tb_jmp_remove(tb, 0);
+ tb_jmp_remove(tb, 1);
+
+ /* suppress any remaining jumps to this TB */
+ tb1 = tb->jmp_first;
+ for(;;) {
+ n1 = (intptr_t)tb1 & 3;
+ if (n1 == 2)
+ break;
+ tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3);
+ tb2 = tb1->jmp_next[n1];
+ tb_reset_jump(tb1, n1);
+ tb1->jmp_next[n1] = NULL;
+ tb1 = tb2;
+ }
+ tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2); /* fail safe */
+
+ tb_phys_invalidate_count++;
+}
+
+#ifdef VBOX
+
+void tb_invalidate_virt(CPUState *env, uint32_t eip)
+{
+# if 1
+ tb_flush(env);
+# else
+ uint8_t *cs_base, *pc;
+ unsigned int flags, h, phys_pc;
+ TranslationBlock *tb, **ptb;
+
+ flags = env->hflags;
+ flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+ cs_base = env->segs[R_CS].base;
+ pc = cs_base + eip;
+
+ tb = tb_find(&ptb, (uintptr_t)pc, (uintptr_t)cs_base,
+ flags);
+
+ if(tb)
+ {
+# ifdef DEBUG
+ printf("invalidating TB (%08X) at %08X\n", tb, eip);
+# endif
+ tb_invalidate(tb);
+ //Note: this will leak TBs, but the whole cache will be flushed
+ // when it happens too often
+ tb->pc = 0;
+ tb->cs_base = 0;
+ tb->flags = 0;
+ }
+# endif
+}
+
+# ifdef VBOX_STRICT
+/**
+ * Gets the page offset.
+ */
+ram_addr_t get_phys_page_offset(target_ulong addr)
+{
+ PhysPageDesc *p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ return p ? p->phys_offset : 0;
+}
+# endif /* VBOX_STRICT */
+
+#endif /* VBOX */
+
+static inline void set_bits(uint8_t *tab, int start, int len)
+{
+ int end, mask, end1;
+
+ end = start + len;
+ tab += start >> 3;
+ mask = 0xff << (start & 7);
+ if ((start & ~7) == (end & ~7)) {
+ if (start < end) {
+ mask &= ~(0xff << (end & 7));
+ *tab |= mask;
+ }
+ } else {
+ *tab++ |= mask;
+ start = (start + 8) & ~7;
+ end1 = end & ~7;
+ while (start < end1) {
+ *tab++ = 0xff;
+ start += 8;
+ }
+ if (start < end) {
+ mask = ~(0xff << (end & 7));
+ *tab |= mask;
+ }
+ }
+}
+
+static void build_page_bitmap(PageDesc *p)
+{
+ int n, tb_start, tb_end;
+ TranslationBlock *tb;
+
+ p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
+
+ tb = p->first_tb;
+ while (tb != NULL) {
+ n = (intptr_t)tb & 3;
+ tb = (TranslationBlock *)((intptr_t)tb & ~3);
+ /* NOTE: this is subtle as a TB may span two physical pages */
+ if (n == 0) {
+ /* NOTE: tb_end may be after the end of the page, but
+ it is not a problem */
+ tb_start = tb->pc & ~TARGET_PAGE_MASK;
+ tb_end = tb_start + tb->size;
+ if (tb_end > TARGET_PAGE_SIZE)
+ tb_end = TARGET_PAGE_SIZE;
+ } else {
+ tb_start = 0;
+ tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+ }
+ set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
+ tb = tb->page_next[n];
+ }
+}
+
+TranslationBlock *tb_gen_code(CPUState *env,
+ target_ulong pc, target_ulong cs_base,
+ int flags, int cflags)
+{
+ TranslationBlock *tb;
+ uint8_t *tc_ptr;
+ tb_page_addr_t phys_pc, phys_page2;
+ target_ulong virt_page2;
+ int code_gen_size;
+
+ phys_pc = get_page_addr_code(env, pc);
+ tb = tb_alloc(pc);
+ if (!tb) {
+ /* flush must be done */
+ tb_flush(env);
+ /* cannot fail at this point */
+ tb = tb_alloc(pc);
+ /* Don't forget to invalidate previous TB info. */
+ tb_invalidated_flag = 1;
+ }
+ tc_ptr = code_gen_ptr;
+ tb->tc_ptr = tc_ptr;
+ tb->cs_base = cs_base;
+ tb->flags = flags;
+ tb->cflags = cflags;
+ cpu_gen_code(env, tb, &code_gen_size);
+ code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+ /* check next page if needed */
+ virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
+ phys_page2 = -1;
+ if ((pc & TARGET_PAGE_MASK) != virt_page2) {
+ phys_page2 = get_page_addr_code(env, virt_page2);
+ }
+ tb_link_page(tb, phys_pc, phys_page2);
+ return tb;
+}
+
+/* invalidate all TBs which intersect with the target physical page
+ starting in range [start;end[. NOTE: start and end must refer to
+ the same physical page. 'is_cpu_write_access' should be true if called
+ from a real cpu write access: the virtual CPU will exit the current
+ TB if code is modified inside this TB. */
+void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
+ int is_cpu_write_access)
+{
+ TranslationBlock *tb, *tb_next, *saved_tb;
+ CPUState *env = cpu_single_env;
+ tb_page_addr_t tb_start, tb_end;
+ PageDesc *p;
+ int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+ int current_tb_not_found = is_cpu_write_access;
+ TranslationBlock *current_tb = NULL;
+ int current_tb_modified = 0;
+ target_ulong current_pc = 0;
+ target_ulong current_cs_base = 0;
+ int current_flags = 0;
+#endif /* TARGET_HAS_PRECISE_SMC */
+
+ p = page_find(start >> TARGET_PAGE_BITS);
+ if (!p)
+ return;
+ if (!p->code_bitmap &&
+ ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
+ is_cpu_write_access) {
+ /* build code bitmap */
+ build_page_bitmap(p);
+ }
+
+ /* we remove all the TBs in the range [start, end[ */
+ /* XXX: see if in some cases it could be faster to invalidate all the code */
+ tb = p->first_tb;
+ while (tb != NULL) {
+ n = (intptr_t)tb & 3;
+ tb = (TranslationBlock *)((intptr_t)tb & ~3);
+ tb_next = tb->page_next[n];
+ /* NOTE: this is subtle as a TB may span two physical pages */
+ if (n == 0) {
+ /* NOTE: tb_end may be after the end of the page, but
+ it is not a problem */
+ tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+ tb_end = tb_start + tb->size;
+ } else {
+ tb_start = tb->page_addr[1];
+ tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+ }
+ if (!(tb_end <= start || tb_start >= end)) {
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb_not_found) {
+ current_tb_not_found = 0;
+ current_tb = NULL;
+ if (env->mem_io_pc) {
+ /* now we have a real cpu fault */
+ current_tb = tb_find_pc(env->mem_io_pc);
+ }
+ }
+ if (current_tb == tb &&
+ (current_tb->cflags & CF_COUNT_MASK) != 1) {
+ /* If we are modifying the current TB, we must stop
+ its execution. We could be more precise by checking
+ that the modification is after the current PC, but it
+ would require a specialized function to partially
+ restore the CPU state */
+
+ current_tb_modified = 1;
+ cpu_restore_state(current_tb, env,
+ env->mem_io_pc, NULL);
+ cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+ &current_flags);
+ }
+#endif /* TARGET_HAS_PRECISE_SMC */
+ /* we need to do that to handle the case where a signal
+ occurs while doing tb_phys_invalidate() */
+ saved_tb = NULL;
+ if (env) {
+ saved_tb = env->current_tb;
+ env->current_tb = NULL;
+ }
+ tb_phys_invalidate(tb, -1);
+ if (env) {
+ env->current_tb = saved_tb;
+ if (env->interrupt_request && env->current_tb)
+ cpu_interrupt(env, env->interrupt_request);
+ }
+ }
+ tb = tb_next;
+ }
+#if !defined(CONFIG_USER_ONLY)
+ /* if no code remaining, no need to continue to use slow writes */
+ if (!p->first_tb) {
+ invalidate_page_bitmap(p);
+ if (is_cpu_write_access) {
+ tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
+ }
+ }
+#endif
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb_modified) {
+ /* we generate a block containing just the instruction
+ modifying the memory. It will ensure that it cannot modify
+ itself */
+ env->current_tb = NULL;
+ tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+ cpu_resume_from_signal(env, NULL);
+ }
+#endif
+}
+
+/* len must be <= 8 and start must be a multiple of len */
+static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
+{
+ PageDesc *p;
+ int offset, b;
+#if 0
+ if (1) {
+ qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
+ cpu_single_env->mem_io_vaddr, len,
+ cpu_single_env->eip,
+ cpu_single_env->eip + (intptr_t)cpu_single_env->segs[R_CS].base);
+ }
+#endif
+ p = page_find(start >> TARGET_PAGE_BITS);
+ if (!p)
+ return;
+ if (p->code_bitmap) {
+ offset = start & ~TARGET_PAGE_MASK;
+ b = p->code_bitmap[offset >> 3] >> (offset & 7);
+ if (b & ((1 << len) - 1))
+ goto do_invalidate;
+ } else {
+ do_invalidate:
+ tb_invalidate_phys_page_range(start, start + len, 1);
+ }
+}
+
+#if !defined(CONFIG_SOFTMMU)
+static void tb_invalidate_phys_page(tb_page_addr_t addr,
+ uintptr_t pc, void *puc)
+{
+ TranslationBlock *tb;
+ PageDesc *p;
+ int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+ TranslationBlock *current_tb = NULL;
+ CPUState *env = cpu_single_env;
+ int current_tb_modified = 0;
+ target_ulong current_pc = 0;
+ target_ulong current_cs_base = 0;
+ int current_flags = 0;
+#endif
+
+ addr &= TARGET_PAGE_MASK;
+ p = page_find(addr >> TARGET_PAGE_BITS);
+ if (!p)
+ return;
+ tb = p->first_tb;
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (tb && pc != 0) {
+ current_tb = tb_find_pc(pc);
+ }
+#endif
+ while (tb != NULL) {
+ n = (intptr_t)tb & 3;
+ tb = (TranslationBlock *)((intptr_t)tb & ~3);
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb == tb &&
+ (current_tb->cflags & CF_COUNT_MASK) != 1) {
+ /* If we are modifying the current TB, we must stop
+ its execution. We could be more precise by checking
+ that the modification is after the current PC, but it
+ would require a specialized function to partially
+ restore the CPU state */
+
+ current_tb_modified = 1;
+ cpu_restore_state(current_tb, env, pc, puc);
+ cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+ &current_flags);
+ }
+#endif /* TARGET_HAS_PRECISE_SMC */
+ tb_phys_invalidate(tb, addr);
+ tb = tb->page_next[n];
+ }
+ p->first_tb = NULL;
+#ifdef TARGET_HAS_PRECISE_SMC
+ if (current_tb_modified) {
+ /* we generate a block containing just the instruction
+ modifying the memory. It will ensure that it cannot modify
+ itself */
+ env->current_tb = NULL;
+ tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+ cpu_resume_from_signal(env, puc);
+ }
+#endif
+}
+#endif
+
+/* add the tb in the target page and protect it if necessary */
+static inline void tb_alloc_page(TranslationBlock *tb,
+ unsigned int n, tb_page_addr_t page_addr)
+{
+ PageDesc *p;
+ TranslationBlock *last_first_tb;
+
+ tb->page_addr[n] = page_addr;
+ p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
+ tb->page_next[n] = p->first_tb;
+ last_first_tb = p->first_tb;
+ p->first_tb = (TranslationBlock *)((intptr_t)tb | n);
+ invalidate_page_bitmap(p);
+
+#if defined(TARGET_HAS_SMC) || 1
+
+#if defined(CONFIG_USER_ONLY)
+ if (p->flags & PAGE_WRITE) {
+ target_ulong addr;
+ PageDesc *p2;
+ int prot;
+
+ /* force the host page as non writable (writes will have a
+ page fault + mprotect overhead) */
+ page_addr &= qemu_host_page_mask;
+ prot = 0;
+ for(addr = page_addr; addr < page_addr + qemu_host_page_size;
+ addr += TARGET_PAGE_SIZE) {
+
+ p2 = page_find (addr >> TARGET_PAGE_BITS);
+ if (!p2)
+ continue;
+ prot |= p2->flags;
+ p2->flags &= ~PAGE_WRITE;
+ }
+ mprotect(g2h(page_addr), qemu_host_page_size,
+ (prot & PAGE_BITS) & ~PAGE_WRITE);
+#ifdef DEBUG_TB_INVALIDATE
+ printf("protecting code page: 0x" TARGET_FMT_lx "\n",
+ page_addr);
+#endif
+ }
+#else
+ /* if some code is already present, then the pages are already
+ protected. So we handle the case where only the first TB is
+ allocated in a physical page */
+ if (!last_first_tb) {
+ tlb_protect_code(page_addr);
+ }
+#endif
+
+#endif /* TARGET_HAS_SMC */
+}
+
+/* Allocate a new translation block. Flush the translation buffer if
+ too many translation blocks or too much generated code. */
+TranslationBlock *tb_alloc(target_ulong pc)
+{
+ TranslationBlock *tb;
+
+ if (nb_tbs >= code_gen_max_blocks ||
+ (code_gen_ptr - code_gen_buffer) >= VBOX_ONLY((uintptr_t))code_gen_buffer_max_size)
+ return NULL;
+ tb = &tbs[nb_tbs++];
+ tb->pc = pc;
+ tb->cflags = 0;
+ return tb;
+}
+
+void tb_free(TranslationBlock *tb)
+{
+ /* In practice this is mostly used for single use temporary TB
+ Ignore the hard cases and just back up if this TB happens to
+ be the last one generated. */
+ if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
+ code_gen_ptr = tb->tc_ptr;
+ nb_tbs--;
+ }
+}
+
+/* add a new TB and link it to the physical page tables. phys_page2 is
+ (-1) to indicate that only one page contains the TB. */
+void tb_link_page(TranslationBlock *tb,
+ tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
+{
+ unsigned int h;
+ TranslationBlock **ptb;
+
+ /* Grab the mmap lock to stop another thread invalidating this TB
+ before we are done. */
+ mmap_lock();
+ /* add in the physical hash table */
+ h = tb_phys_hash_func(phys_pc);
+ ptb = &tb_phys_hash[h];
+ tb->phys_hash_next = *ptb;
+ *ptb = tb;
+
+ /* add in the page list */
+ tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
+ if (phys_page2 != -1)
+ tb_alloc_page(tb, 1, phys_page2);
+ else
+ tb->page_addr[1] = -1;
+
+ tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2);
+ tb->jmp_next[0] = NULL;
+ tb->jmp_next[1] = NULL;
+
+ /* init original jump addresses */
+ if (tb->tb_next_offset[0] != 0xffff)
+ tb_reset_jump(tb, 0);
+ if (tb->tb_next_offset[1] != 0xffff)
+ tb_reset_jump(tb, 1);
+
+#ifdef DEBUG_TB_CHECK
+ tb_page_check();
+#endif
+ mmap_unlock();
+}
+
+/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
+ tb[1].tc_ptr. Return NULL if not found */
+TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
+{
+ int m_min, m_max, m;
+ uintptr_t v;
+ TranslationBlock *tb;
+
+ if (nb_tbs <= 0)
+ return NULL;
+ if (tc_ptr < (uintptr_t)code_gen_buffer ||
+ tc_ptr >= (uintptr_t)code_gen_ptr)
+ return NULL;
+ /* binary search (cf Knuth) */
+ m_min = 0;
+ m_max = nb_tbs - 1;
+ while (m_min <= m_max) {
+ m = (m_min + m_max) >> 1;
+ tb = &tbs[m];
+ v = (uintptr_t)tb->tc_ptr;
+ if (v == tc_ptr)
+ return tb;
+ else if (tc_ptr < v) {
+ m_max = m - 1;
+ } else {
+ m_min = m + 1;
+ }
+ }
+ return &tbs[m_max];
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb);
+
+static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
+{
+ TranslationBlock *tb1, *tb_next, **ptb;
+ unsigned int n1;
+
+ tb1 = tb->jmp_next[n];
+ if (tb1 != NULL) {
+ /* find head of list */
+ for(;;) {
+ n1 = (intptr_t)tb1 & 3;
+ tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3);
+ if (n1 == 2)
+ break;
+ tb1 = tb1->jmp_next[n1];
+ }
+ /* we are now sure now that tb jumps to tb1 */
+ tb_next = tb1;
+
+ /* remove tb from the jmp_first list */
+ ptb = &tb_next->jmp_first;
+ for(;;) {
+ tb1 = *ptb;
+ n1 = (intptr_t)tb1 & 3;
+ tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3);
+ if (n1 == n && tb1 == tb)
+ break;
+ ptb = &tb1->jmp_next[n1];
+ }
+ *ptb = tb->jmp_next[n];
+ tb->jmp_next[n] = NULL;
+
+ /* suppress the jump to next tb in generated code */
+ tb_reset_jump(tb, n);
+
+ /* suppress jumps in the tb on which we could have jumped */
+ tb_reset_jump_recursive(tb_next);
+ }
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb)
+{
+ tb_reset_jump_recursive2(tb, 0);
+ tb_reset_jump_recursive2(tb, 1);
+}
+
+#if defined(TARGET_HAS_ICE)
+#if defined(CONFIG_USER_ONLY)
+static void breakpoint_invalidate(CPUState *env, target_ulong pc)
+{
+ tb_invalidate_phys_page_range(pc, pc + 1, 0);
+}
+#else
+static void breakpoint_invalidate(CPUState *env, target_ulong pc)
+{
+ target_phys_addr_t addr;
+ target_ulong pd;
+ ram_addr_t ram_addr;
+ PhysPageDesc *p;
+
+ addr = cpu_get_phys_page_debug(env, pc);
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+ ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
+ tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
+}
+#endif
+#endif /* TARGET_HAS_ICE */
+
+#if defined(CONFIG_USER_ONLY)
+void cpu_watchpoint_remove_all(CPUState *env, int mask)
+
+{
+}
+
+int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
+ int flags, CPUWatchpoint **watchpoint)
+{
+ return -ENOSYS;
+}
+#else
+/* Add a watchpoint. */
+int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
+ int flags, CPUWatchpoint **watchpoint)
+{
+ target_ulong len_mask = ~(len - 1);
+ CPUWatchpoint *wp;
+
+ /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
+ if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
+ fprintf(stderr, "qemu: tried to set invalid watchpoint at "
+ TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
+#ifndef VBOX
+ return -EINVAL;
+#else
+ return VERR_INVALID_PARAMETER;
+#endif
+ }
+ wp = qemu_malloc(sizeof(*wp));
+
+ wp->vaddr = addr;
+ wp->len_mask = len_mask;
+ wp->flags = flags;
+
+ /* keep all GDB-injected watchpoints in front */
+ if (flags & BP_GDB)
+ QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
+ else
+ QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
+
+ tlb_flush_page(env, addr);
+
+ if (watchpoint)
+ *watchpoint = wp;
+ return 0;
+}
+
+/* Remove a specific watchpoint. */
+int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
+ int flags)
+{
+ target_ulong len_mask = ~(len - 1);
+ CPUWatchpoint *wp;
+
+ QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+ if (addr == wp->vaddr && len_mask == wp->len_mask
+ && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
+ cpu_watchpoint_remove_by_ref(env, wp);
+ return 0;
+ }
+ }
+#ifndef VBOX
+ return -ENOENT;
+#else
+ return VERR_NOT_FOUND;
+#endif
+}
+
+/* Remove a specific watchpoint by reference. */
+void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
+{
+ QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
+
+ tlb_flush_page(env, watchpoint->vaddr);
+
+ qemu_free(watchpoint);
+}
+
+/* Remove all matching watchpoints. */
+void cpu_watchpoint_remove_all(CPUState *env, int mask)
+{
+ CPUWatchpoint *wp, *next;
+
+ QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
+ if (wp->flags & mask)
+ cpu_watchpoint_remove_by_ref(env, wp);
+ }
+}
+#endif
+
+/* Add a breakpoint. */
+int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
+ CPUBreakpoint **breakpoint)
+{
+#if defined(TARGET_HAS_ICE)
+ CPUBreakpoint *bp;
+
+ bp = qemu_malloc(sizeof(*bp));
+
+ bp->pc = pc;
+ bp->flags = flags;
+
+ /* keep all GDB-injected breakpoints in front */
+ if (flags & BP_GDB)
+ QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
+ else
+ QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
+
+ breakpoint_invalidate(env, pc);
+
+ if (breakpoint)
+ *breakpoint = bp;
+ return 0;
+#else
+ return -ENOSYS;
+#endif
+}
+
+/* Remove a specific breakpoint. */
+int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
+{
+#if defined(TARGET_HAS_ICE)
+ CPUBreakpoint *bp;
+
+ QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+ if (bp->pc == pc && bp->flags == flags) {
+ cpu_breakpoint_remove_by_ref(env, bp);
+ return 0;
+ }
+ }
+# ifndef VBOX
+ return -ENOENT;
+# else
+ return VERR_NOT_FOUND;
+# endif
+#else
+ return -ENOSYS;
+#endif
+}
+
+/* Remove a specific breakpoint by reference. */
+void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
+{
+#if defined(TARGET_HAS_ICE)
+ QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
+
+ breakpoint_invalidate(env, breakpoint->pc);
+
+ qemu_free(breakpoint);
+#endif
+}
+
+/* Remove all matching breakpoints. */
+void cpu_breakpoint_remove_all(CPUState *env, int mask)
+{
+#if defined(TARGET_HAS_ICE)
+ CPUBreakpoint *bp, *next;
+
+ QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
+ if (bp->flags & mask)
+ cpu_breakpoint_remove_by_ref(env, bp);
+ }
+#endif
+}
+
+/* enable or disable single step mode. EXCP_DEBUG is returned by the
+ CPU loop after each instruction */
+void cpu_single_step(CPUState *env, int enabled)
+{
+#if defined(TARGET_HAS_ICE)
+ if (env->singlestep_enabled != enabled) {
+ env->singlestep_enabled = enabled;
+ if (kvm_enabled())
+ kvm_update_guest_debug(env, 0);
+ else {
+ /* must flush all the translated code to avoid inconsistencies */
+ /* XXX: only flush what is necessary */
+ tb_flush(env);
+ }
+ }
+#endif
+}
+
+#ifndef VBOX
+
+/* enable or disable low levels log */
+void cpu_set_log(int log_flags)
+{
+ loglevel = log_flags;
+ if (loglevel && !logfile) {
+ logfile = fopen(logfilename, log_append ? "a" : "w");
+ if (!logfile) {
+ perror(logfilename);
+ _exit(1);
+ }
+#if !defined(CONFIG_SOFTMMU)
+ /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
+ {
+ static char logfile_buf[4096];
+ setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
+ }
+#elif !defined(_WIN32)
+ /* Win32 doesn't support line-buffering and requires size >= 2 */
+ setvbuf(logfile, NULL, _IOLBF, 0);
+#endif
+ log_append = 1;
+ }
+ if (!loglevel && logfile) {
+ fclose(logfile);
+ logfile = NULL;
+ }
+}
+
+void cpu_set_log_filename(const char *filename)
+{
+ logfilename = strdup(filename);
+ if (logfile) {
+ fclose(logfile);
+ logfile = NULL;
+ }
+ cpu_set_log(loglevel);
+}
+
+#endif /* !VBOX */
+
+static void cpu_unlink_tb(CPUState *env)
+{
+ /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
+ problem and hope the cpu will stop of its own accord. For userspace
+ emulation this often isn't actually as bad as it sounds. Often
+ signals are used primarily to interrupt blocking syscalls. */
+ TranslationBlock *tb;
+ static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
+
+ spin_lock(&interrupt_lock);
+ tb = env->current_tb;
+ /* if the cpu is currently executing code, we must unlink it and
+ all the potentially executing TB */
+ if (tb) {
+ env->current_tb = NULL;
+ tb_reset_jump_recursive(tb);
+ }
+ spin_unlock(&interrupt_lock);
+}
+
+/* mask must never be zero, except for A20 change call */
+void cpu_interrupt(CPUState *env, int mask)
+{
+ int old_mask;
+
+ old_mask = env->interrupt_request;
+#ifndef VBOX
+ env->interrupt_request |= mask;
+#else /* VBOX */
+ VM_ASSERT_EMT(env->pVM);
+ ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request, mask);
+#endif /* VBOX */
+
+#ifndef VBOX
+#ifndef CONFIG_USER_ONLY
+ /*
+ * If called from iothread context, wake the target cpu in
+ * case its halted.
+ */
+ if (!qemu_cpu_self(env)) {
+ qemu_cpu_kick(env);
+ return;
+ }
+#endif
+#endif /* !VBOX */
+
+ if (use_icount) {
+ env->icount_decr.u16.high = 0xffff;
+#ifndef CONFIG_USER_ONLY
+ if (!can_do_io(env)
+ && (mask & ~old_mask) != 0) {
+ cpu_abort(env, "Raised interrupt while not in I/O function");
+ }
+#endif
+ } else {
+ cpu_unlink_tb(env);
+ }
+}
+
+void cpu_reset_interrupt(CPUState *env, int mask)
+{
+#ifdef VBOX
+ /*
+ * Note: the current implementation can be executed by another thread without problems; make sure this remains true
+ * for future changes!
+ */
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~mask);
+#else /* !VBOX */
+ env->interrupt_request &= ~mask;
+#endif /* !VBOX */
+}
+
+void cpu_exit(CPUState *env)
+{
+ env->exit_request = 1;
+ cpu_unlink_tb(env);
+}
+
+#ifndef VBOX
+const CPULogItem cpu_log_items[] = {
+ { CPU_LOG_TB_OUT_ASM, "out_asm",
+ "show generated host assembly code for each compiled TB" },
+ { CPU_LOG_TB_IN_ASM, "in_asm",
+ "show target assembly code for each compiled TB" },
+ { CPU_LOG_TB_OP, "op",
+ "show micro ops for each compiled TB" },
+ { CPU_LOG_TB_OP_OPT, "op_opt",
+ "show micro ops "
+#ifdef TARGET_I386
+ "before eflags optimization and "
+#endif
+ "after liveness analysis" },
+ { CPU_LOG_INT, "int",
+ "show interrupts/exceptions in short format" },
+ { CPU_LOG_EXEC, "exec",
+ "show trace before each executed TB (lots of logs)" },
+ { CPU_LOG_TB_CPU, "cpu",
+ "show CPU state before block translation" },
+#ifdef TARGET_I386
+ { CPU_LOG_PCALL, "pcall",
+ "show protected mode far calls/returns/exceptions" },
+ { CPU_LOG_RESET, "cpu_reset",
+ "show CPU state before CPU resets" },
+#endif
+#ifdef DEBUG_IOPORT
+ { CPU_LOG_IOPORT, "ioport",
+ "show all i/o ports accesses" },
+#endif
+ { 0, NULL, NULL },
+};
+
+#ifndef CONFIG_USER_ONLY
+static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
+ = QLIST_HEAD_INITIALIZER(memory_client_list);
+
+static void cpu_notify_set_memory(target_phys_addr_t start_addr,
+ ram_addr_t size,
+ ram_addr_t phys_offset)
+{
+ CPUPhysMemoryClient *client;
+ QLIST_FOREACH(client, &memory_client_list, list) {
+ client->set_memory(client, start_addr, size, phys_offset);
+ }
+}
+
+static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
+ target_phys_addr_t end)
+{
+ CPUPhysMemoryClient *client;
+ QLIST_FOREACH(client, &memory_client_list, list) {
+ int r = client->sync_dirty_bitmap(client, start, end);
+ if (r < 0)
+ return r;
+ }
+ return 0;
+}
+
+static int cpu_notify_migration_log(int enable)
+{
+ CPUPhysMemoryClient *client;
+ QLIST_FOREACH(client, &memory_client_list, list) {
+ int r = client->migration_log(client, enable);
+ if (r < 0)
+ return r;
+ }
+ return 0;
+}
+
+static void phys_page_for_each_1(CPUPhysMemoryClient *client,
+ int level, void **lp)
+{
+ int i;
+
+ if (*lp == NULL) {
+ return;
+ }
+ if (level == 0) {
+ PhysPageDesc *pd = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
+ client->set_memory(client, pd[i].region_offset,
+ TARGET_PAGE_SIZE, pd[i].phys_offset);
+ }
+ }
+ } else {
+ void **pp = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ phys_page_for_each_1(client, level - 1, pp + i);
+ }
+ }
+}
+
+static void phys_page_for_each(CPUPhysMemoryClient *client)
+{
+ int i;
+ for (i = 0; i < P_L1_SIZE; ++i) {
+ phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
+ l1_phys_map + 1);
+ }
+}
+
+void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
+{
+ QLIST_INSERT_HEAD(&memory_client_list, client, list);
+ phys_page_for_each(client);
+}
+
+void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
+{
+ QLIST_REMOVE(client, list);
+}
+#endif
+
+static int cmp1(const char *s1, int n, const char *s2)
+{
+ if (strlen(s2) != n)
+ return 0;
+ return memcmp(s1, s2, n) == 0;
+}
+
+/* takes a comma separated list of log masks. Return 0 if error. */
+int cpu_str_to_log_mask(const char *str)
+{
+ const CPULogItem *item;
+ int mask;
+ const char *p, *p1;
+
+ p = str;
+ mask = 0;
+ for(;;) {
+ p1 = strchr(p, ',');
+ if (!p1)
+ p1 = p + strlen(p);
+ if(cmp1(p,p1-p,"all")) {
+ for(item = cpu_log_items; item->mask != 0; item++) {
+ mask |= item->mask;
+ }
+ } else {
+ for(item = cpu_log_items; item->mask != 0; item++) {
+ if (cmp1(p, p1 - p, item->name))
+ goto found;
+ }
+ return 0;
+ }
+ found:
+ mask |= item->mask;
+ if (*p1 != ',')
+ break;
+ p = p1 + 1;
+ }
+ return mask;
+}
+
+void cpu_abort(CPUState *env, const char *fmt, ...)
+{
+ va_list ap;
+ va_list ap2;
+
+ va_start(ap, fmt);
+ va_copy(ap2, ap);
+ fprintf(stderr, "qemu: fatal: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+#ifdef TARGET_I386
+ cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
+#else
+ cpu_dump_state(env, stderr, fprintf, 0);
+#endif
+ if (qemu_log_enabled()) {
+ qemu_log("qemu: fatal: ");
+ qemu_log_vprintf(fmt, ap2);
+ qemu_log("\n");
+#ifdef TARGET_I386
+ log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+#else
+ log_cpu_state(env, 0);
+#endif
+ qemu_log_flush();
+ qemu_log_close();
+ }
+ va_end(ap2);
+ va_end(ap);
+#if defined(CONFIG_USER_ONLY)
+ {
+ struct sigaction act;
+ sigfillset(&act.sa_mask);
+ act.sa_handler = SIG_DFL;
+ sigaction(SIGABRT, &act, NULL);
+ }
+#endif
+ abort();
+}
+
+CPUState *cpu_copy(CPUState *env)
+{
+ CPUState *new_env = cpu_init(env->cpu_model_str);
+ CPUState *next_cpu = new_env->next_cpu;
+ int cpu_index = new_env->cpu_index;
+#if defined(TARGET_HAS_ICE)
+ CPUBreakpoint *bp;
+ CPUWatchpoint *wp;
+#endif
+
+ memcpy(new_env, env, sizeof(CPUState));
+
+ /* Preserve chaining and index. */
+ new_env->next_cpu = next_cpu;
+ new_env->cpu_index = cpu_index;
+
+ /* Clone all break/watchpoints.
+ Note: Once we support ptrace with hw-debug register access, make sure
+ BP_CPU break/watchpoints are handled correctly on clone. */
+ QTAILQ_INIT(&env->breakpoints);
+ QTAILQ_INIT(&env->watchpoints);
+#if defined(TARGET_HAS_ICE)
+ QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+ cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
+ }
+ QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+ cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
+ wp->flags, NULL);
+ }
+#endif
+
+ return new_env;
+}
+
+#endif /* !VBOX */
+#if !defined(CONFIG_USER_ONLY)
+
+static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
+{
+ unsigned int i;
+
+ /* Discard jump cache entries for any tb which might potentially
+ overlap the flushed page. */
+ i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
+ memset (&env->tb_jmp_cache[i], 0,
+ TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+
+ i = tb_jmp_cache_hash_page(addr);
+ memset (&env->tb_jmp_cache[i], 0,
+ TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+#ifdef VBOX
+
+ /* inform raw mode about TLB page flush */
+ remR3FlushPage(env, addr);
+#endif /* VBOX */
+}
+
+static CPUTLBEntry s_cputlb_empty_entry = {
+ .addr_read = -1,
+ .addr_write = -1,
+ .addr_code = -1,
+ .addend = -1,
+};
+
+/* NOTE: if flush_global is true, also flush global entries (not
+ implemented yet) */
+void tlb_flush(CPUState *env, int flush_global)
+{
+ int i;
+
+#ifdef VBOX
+ Assert(EMRemIsLockOwner(env->pVM));
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_FLUSH_TLB);
+#endif
+
+#if defined(DEBUG_TLB)
+ printf("tlb_flush:\n");
+#endif
+ /* must reset current TB so that interrupts cannot modify the
+ links while we are modifying them */
+ env->current_tb = NULL;
+
+ for(i = 0; i < CPU_TLB_SIZE; i++) {
+ int mmu_idx;
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
+ }
+ }
+
+ memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
+
+ env->tlb_flush_addr = -1;
+ env->tlb_flush_mask = 0;
+ tlb_flush_count++;
+#ifdef VBOX
+
+ /* inform raw mode about TLB flush */
+ remR3FlushTLB(env, flush_global);
+#endif /* VBOX */
+}
+
+static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
+{
+ if (addr == (tlb_entry->addr_read &
+ (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
+ addr == (tlb_entry->addr_write &
+ (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
+ addr == (tlb_entry->addr_code &
+ (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ *tlb_entry = s_cputlb_empty_entry;
+ }
+}
+
+void tlb_flush_page(CPUState *env, target_ulong addr)
+{
+ int i;
+ int mmu_idx;
+
+ Assert(EMRemIsLockOwner(env->pVM));
+#if defined(DEBUG_TLB)
+ printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
+#endif
+ /* Check if we need to flush due to large pages. */
+ if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
+#if defined(DEBUG_TLB)
+ printf("tlb_flush_page: forced full flush ("
+ TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+ env->tlb_flush_addr, env->tlb_flush_mask);
+#endif
+ tlb_flush(env, 1);
+ return;
+ }
+ /* must reset current TB so that interrupts cannot modify the
+ links while we are modifying them */
+ env->current_tb = NULL;
+
+ addr &= TARGET_PAGE_MASK;
+ i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
+ tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+
+ tlb_flush_jmp_cache(env, addr);
+}
+
+/* update the TLBs so that writes to code in the virtual page 'addr'
+ can be detected */
+static void tlb_protect_code(ram_addr_t ram_addr)
+{
+ cpu_physical_memory_reset_dirty(ram_addr,
+ ram_addr + TARGET_PAGE_SIZE,
+ CODE_DIRTY_FLAG);
+#if defined(VBOX) && defined(REM_MONITOR_CODE_PAGES)
+ /** @todo Retest this? This function has changed... */
+ remR3ProtectCode(cpu_single_env, ram_addr);
+#endif /* VBOX */
+}
+
+/* update the TLB so that writes in physical page 'phys_addr' are no longer
+ tested for self modifying code */
+static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
+ target_ulong vaddr)
+{
+ cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
+}
+
+static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
+ uintptr_t start, uintptr_t length)
+{
+ uintptr_t addr;
+#ifdef VBOX
+
+ if (start & 3)
+ return;
+#endif /* VBOX */
+ if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
+ addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
+ if ((addr - start) < length) {
+ tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
+ }
+ }
+}
+
+/* Note: start and end must be within the same ram block. */
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
+ int dirty_flags)
+{
+ CPUState *env;
+ uintptr_t length, start1;
+ int i;
+
+ start &= TARGET_PAGE_MASK;
+ end = TARGET_PAGE_ALIGN(end);
+
+ length = end - start;
+ if (length == 0)
+ return;
+ cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
+
+ /* we modify the TLB cache so that the dirty bit will be set again
+ when accessing the range */
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+ start1 = start;
+#elif !defined(VBOX)
+ start1 = (uintptr_t)qemu_get_ram_ptr(start);
+ /* Chek that we don't span multiple blocks - this breaks the
+ address comparisons below. */
+ if ((uintptr_t)qemu_get_ram_ptr(end - 1) - start1
+ != (end - 1) - start) {
+ abort();
+ }
+#else
+ start1 = (uintptr_t)remR3TlbGCPhys2Ptr(first_cpu, start, 1 /*fWritable*/); /** @todo page replacing (sharing or read only) may cause trouble, fix interface/whatever. */
+#endif
+
+ for(env = first_cpu; env != NULL; env = env->next_cpu) {
+ int mmu_idx;
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ for(i = 0; i < CPU_TLB_SIZE; i++)
+ tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
+ start1, length);
+ }
+ }
+}
+
+#ifndef VBOX
+
+int cpu_physical_memory_set_dirty_tracking(int enable)
+{
+ int ret = 0;
+ in_migration = enable;
+ ret = cpu_notify_migration_log(!!enable);
+ return ret;
+}
+
+int cpu_physical_memory_get_dirty_tracking(void)
+{
+ return in_migration;
+}
+
+#endif /* !VBOX */
+
+int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
+ target_phys_addr_t end_addr)
+{
+#ifndef VBOX
+ int ret;
+
+ ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
+ return ret;
+#else /* VBOX */
+ return 0;
+#endif /* VBOX */
+}
+
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+DECLINLINE(void) tlb_update_dirty(CPUTLBEntry *tlb_entry, target_phys_addr_t phys_addend)
+#else
+static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
+#endif
+{
+ ram_addr_t ram_addr;
+#ifndef VBOX
+ void *p;
+#endif
+
+ if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+ ram_addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
+#elif !defined(VBOX)
+ p = (void *)(uintptr_t)((tlb_entry->addr_write & TARGET_PAGE_MASK)
+ + tlb_entry->addend);
+ ram_addr = qemu_ram_addr_from_host(p);
+#else
+ Assert(phys_addend != -1);
+ ram_addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + phys_addend;
+#endif
+ if (!cpu_physical_memory_is_dirty(ram_addr)) {
+ tlb_entry->addr_write |= TLB_NOTDIRTY;
+ }
+ }
+}
+
+/* update the TLB according to the current state of the dirty bits */
+void cpu_tlb_update_dirty(CPUState *env)
+{
+ int i;
+ int mmu_idx;
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ for(i = 0; i < CPU_TLB_SIZE; i++)
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+ tlb_update_dirty(&env->tlb_table[mmu_idx][i], env->phys_addends[mmu_idx][i]);
+#else
+ tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
+#endif
+ }
+}
+
+static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
+{
+ if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
+ tlb_entry->addr_write = vaddr;
+}
+
+/* update the TLB corresponding to virtual page vaddr
+ so that it is no longer dirty */
+static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
+{
+ int i;
+ int mmu_idx;
+
+ vaddr &= TARGET_PAGE_MASK;
+ i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
+ tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
+}
+
+/* Our TLB does not support large pages, so remember the area covered by
+ large pages and trigger a full TLB flush if these are invalidated. */
+static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
+ target_ulong size)
+{
+ target_ulong mask = ~(size - 1);
+
+ if (env->tlb_flush_addr == (target_ulong)-1) {
+ env->tlb_flush_addr = vaddr & mask;
+ env->tlb_flush_mask = mask;
+ return;
+ }
+ /* Extend the existing region to include the new page.
+ This is a compromise between unnecessary flushes and the cost
+ of maintaining a full variable size TLB. */
+ mask &= env->tlb_flush_mask;
+ while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
+ mask <<= 1;
+ }
+ env->tlb_flush_addr &= mask;
+ env->tlb_flush_mask = mask;
+}
+
+/* Add a new TLB entry. At most one entry for a given virtual address
+ is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
+ supplied size is only used by tlb_flush_page. */
+void tlb_set_page(CPUState *env, target_ulong vaddr,
+ target_phys_addr_t paddr, int prot,
+ int mmu_idx, target_ulong size)
+{
+ PhysPageDesc *p;
+ ram_addr_t pd;
+ unsigned int index;
+ target_ulong address;
+ target_ulong code_address;
+ uintptr_t addend;
+ CPUTLBEntry *te;
+ CPUWatchpoint *wp;
+ target_phys_addr_t iotlb;
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+ int read_mods = 0, write_mods = 0, code_mods = 0;
+#endif
+
+ assert(size >= TARGET_PAGE_SIZE);
+ if (size != TARGET_PAGE_SIZE) {
+ tlb_add_large_page(env, vaddr, size);
+ }
+ p = phys_page_find(paddr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+#if defined(DEBUG_TLB)
+ printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x idx=%d size=" TARGET_FMT_lx " pd=0x%08lx\n",
+ vaddr, (int)paddr, prot, mmu_idx, size, (long)pd);
+#endif
+
+ address = vaddr;
+ if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
+ /* IO memory case (romd handled later) */
+ address |= TLB_MMIO;
+ }
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+ addend = pd & TARGET_PAGE_MASK;
+#elif !defined(VBOX)
+ addend = (uintptr_t)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
+#else
+ /** @todo this is racing the phys_page_find call above since it may register
+ * a new chunk of memory... */
+ addend = (uintptr_t)remR3TlbGCPhys2Ptr(env, pd & TARGET_PAGE_MASK, !!(prot & PAGE_WRITE));
+#endif
+
+ if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
+ /* Normal RAM. */
+ iotlb = pd & TARGET_PAGE_MASK;
+ if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
+ iotlb |= IO_MEM_NOTDIRTY;
+ else
+ iotlb |= IO_MEM_ROM;
+ } else {
+ /* IO handlers are currently passed a physical address.
+ It would be nice to pass an offset from the base address
+ of that region. This would avoid having to special case RAM,
+ and avoid full address decoding in every device.
+ We can't use the high bits of pd for this because
+ IO_MEM_ROMD uses these as a ram address. */
+ iotlb = (pd & ~TARGET_PAGE_MASK);
+ if (p) {
+ iotlb += p->region_offset;
+ } else {
+ iotlb += paddr;
+ }
+ }
+
+ code_address = address;
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+
+ if (addend & 0x3)
+ {
+ if (addend & 0x2)
+ {
+ /* catch write */
+ if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM)
+ write_mods |= TLB_MMIO;
+ }
+ else if (addend & 0x1)
+ {
+ /* catch all */
+ if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM)
+ {
+ read_mods |= TLB_MMIO;
+ write_mods |= TLB_MMIO;
+ code_mods |= TLB_MMIO;
+ }
+ }
+ if ((iotlb & ~TARGET_PAGE_MASK) == 0)
+ iotlb = env->pVM->rem.s.iHandlerMemType + paddr;
+ addend &= ~(target_ulong)0x3;
+ }
+
+#endif
+ /* Make accesses to pages with watchpoints go via the
+ watchpoint trap routines. */
+ QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+ if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
+ /* Avoid trapping reads of pages with a write breakpoint. */
+ if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
+ iotlb = io_mem_watch + paddr;
+ address |= TLB_MMIO;
+ break;
+ }
+ }
+ }
+
+ index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ env->iotlb[mmu_idx][index] = iotlb - vaddr;
+ te = &env->tlb_table[mmu_idx][index];
+ te->addend = addend - vaddr;
+ if (prot & PAGE_READ) {
+ te->addr_read = address;
+ } else {
+ te->addr_read = -1;
+ }
+
+ if (prot & PAGE_EXEC) {
+ te->addr_code = code_address;
+ } else {
+ te->addr_code = -1;
+ }
+ if (prot & PAGE_WRITE) {
+ if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
+ (pd & IO_MEM_ROMD)) {
+ /* Write access calls the I/O callback. */
+ te->addr_write = address | TLB_MMIO;
+ } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
+ !cpu_physical_memory_is_dirty(pd)) {
+ te->addr_write = address | TLB_NOTDIRTY;
+ } else {
+ te->addr_write = address;
+ }
+ } else {
+ te->addr_write = -1;
+ }
+
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+ if (prot & PAGE_READ)
+ te->addr_read |= read_mods;
+ if (prot & PAGE_EXEC)
+ te->addr_code |= code_mods;
+ if (prot & PAGE_WRITE)
+ te->addr_write |= write_mods;
+
+ env->phys_addends[mmu_idx][index] = (pd & TARGET_PAGE_MASK)- vaddr;
+#endif
+
+#ifdef VBOX
+ /* inform raw mode about TLB page change */
+ remR3FlushPage(env, vaddr);
+#endif
+}
+
+#else
+
+void tlb_flush(CPUState *env, int flush_global)
+{
+}
+
+void tlb_flush_page(CPUState *env, target_ulong addr)
+{
+}
+
+/*
+ * Walks guest process memory "regions" one by one
+ * and calls callback function 'fn' for each region.
+ */
+
+struct walk_memory_regions_data
+{
+ walk_memory_regions_fn fn;
+ void *priv;
+ uintptr_t start;
+ int prot;
+};
+
+static int walk_memory_regions_end(struct walk_memory_regions_data *data,
+ abi_ulong end, int new_prot)
+{
+ if (data->start != -1ul) {
+ int rc = data->fn(data->priv, data->start, end, data->prot);
+ if (rc != 0) {
+ return rc;
+ }
+ }
+
+ data->start = (new_prot ? end : -1ul);
+ data->prot = new_prot;
+
+ return 0;
+}
+
+static int walk_memory_regions_1(struct walk_memory_regions_data *data,
+ abi_ulong base, int level, void **lp)
+{
+ abi_ulong pa;
+ int i, rc;
+
+ if (*lp == NULL) {
+ return walk_memory_regions_end(data, base, 0);
+ }
+
+ if (level == 0) {
+ PageDesc *pd = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ int prot = pd[i].flags;
+
+ pa = base | (i << TARGET_PAGE_BITS);
+ if (prot != data->prot) {
+ rc = walk_memory_regions_end(data, pa, prot);
+ if (rc != 0) {
+ return rc;
+ }
+ }
+ }
+ } else {
+ void **pp = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ pa = base | ((abi_ulong)i <<
+ (TARGET_PAGE_BITS + L2_BITS * level));
+ rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
+ if (rc != 0) {
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
+{
+ struct walk_memory_regions_data data;
+ target_ulong i;
+
+ data.fn = fn;
+ data.priv = priv;
+ data.start = -1ul;
+ data.prot = 0;
+
+ for (i = 0; i < V_L1_SIZE; i++) {
+ int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
+ V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+ if (rc != 0) {
+ return rc;
+ }
+ }
+
+ return walk_memory_regions_end(&data, 0, 0);
+}
+
+static int dump_region(void *priv, abi_ulong start,
+ abi_ulong end, unsigned long prot)
+{
+ FILE *f = (FILE *)priv;
+
+ (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
+ " "TARGET_ABI_FMT_lx" %c%c%c\n",
+ start, end, end - start,
+ ((prot & PAGE_READ) ? 'r' : '-'),
+ ((prot & PAGE_WRITE) ? 'w' : '-'),
+ ((prot & PAGE_EXEC) ? 'x' : '-'));
+
+ return (0);
+}
+
+/* dump memory mappings */
+void page_dump(FILE *f)
+{
+ (void) fprintf(f, "%-8s %-8s %-8s %s\n",
+ "start", "end", "size", "prot");
+ walk_memory_regions(f, dump_region);
+}
+
+int page_get_flags(target_ulong address)
+{
+ PageDesc *p;
+
+ p = page_find(address >> TARGET_PAGE_BITS);
+ if (!p)
+ return 0;
+ return p->flags;
+}
+
+/* Modify the flags of a page and invalidate the code if necessary.
+ The flag PAGE_WRITE_ORG is positioned automatically depending
+ on PAGE_WRITE. The mmap_lock should already be held. */
+void page_set_flags(target_ulong start, target_ulong end, int flags)
+{
+ target_ulong addr, len;
+
+ /* This function should never be called with addresses outside the
+ guest address space. If this assert fires, it probably indicates
+ a missing call to h2g_valid. */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+ assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+ assert(start < end);
+
+ start = start & TARGET_PAGE_MASK;
+ end = TARGET_PAGE_ALIGN(end);
+
+ if (flags & PAGE_WRITE) {
+ flags |= PAGE_WRITE_ORG;
+ }
+
+#ifdef VBOX
+ AssertMsgFailed(("We shouldn't be here, and if we should, we must have an env to do the proper locking!\n"));
+#endif
+ for (addr = start, len = end - start;
+ len != 0;
+ len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+ PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+
+ /* If the write protection bit is set, then we invalidate
+ the code inside. */
+ if (!(p->flags & PAGE_WRITE) &&
+ (flags & PAGE_WRITE) &&
+ p->first_tb) {
+ tb_invalidate_phys_page(addr, 0, NULL);
+ }
+ p->flags = flags;
+ }
+}
+
+int page_check_range(target_ulong start, target_ulong len, int flags)
+{
+ PageDesc *p;
+ target_ulong end;
+ target_ulong addr;
+
+ /* This function should never be called with addresses outside the
+ guest address space. If this assert fires, it probably indicates
+ a missing call to h2g_valid. */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+ assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+
+ if (len == 0) {
+ return 0;
+ }
+ if (start + len - 1 < start) {
+ /* We've wrapped around. */
+ return -1;
+ }
+
+ end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
+ start = start & TARGET_PAGE_MASK;
+
+ for (addr = start, len = end - start;
+ len != 0;
+ len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+ p = page_find(addr >> TARGET_PAGE_BITS);
+ if( !p )
+ return -1;
+ if( !(p->flags & PAGE_VALID) )
+ return -1;
+
+ if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
+ return -1;
+ if (flags & PAGE_WRITE) {
+ if (!(p->flags & PAGE_WRITE_ORG))
+ return -1;
+ /* unprotect the page if it was put read-only because it
+ contains translated code */
+ if (!(p->flags & PAGE_WRITE)) {
+ if (!page_unprotect(addr, 0, NULL))
+ return -1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/* called from signal handler: invalidate the code and unprotect the
+ page. Return TRUE if the fault was successfully handled. */
+int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
+{
+ unsigned int prot;
+ PageDesc *p;
+ target_ulong host_start, host_end, addr;
+
+ /* Technically this isn't safe inside a signal handler. However we
+ know this only ever happens in a synchronous SEGV handler, so in
+ practice it seems to be ok. */
+ mmap_lock();
+
+ p = page_find(address >> TARGET_PAGE_BITS);
+ if (!p) {
+ mmap_unlock();
+ return 0;
+ }
+
+ /* if the page was really writable, then we change its
+ protection back to writable */
+ if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
+ host_start = address & qemu_host_page_mask;
+ host_end = host_start + qemu_host_page_size;
+
+ prot = 0;
+ for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
+ p = page_find(addr >> TARGET_PAGE_BITS);
+ p->flags |= PAGE_WRITE;
+ prot |= p->flags;
+
+ /* and since the content will be modified, we must invalidate
+ the corresponding translated code. */
+ tb_invalidate_phys_page(addr, pc, puc);
+#ifdef DEBUG_TB_CHECK
+ tb_invalidate_check(addr);
+#endif
+ }
+ mprotect((void *)g2h(host_start), qemu_host_page_size,
+ prot & PAGE_BITS);
+
+ mmap_unlock();
+ return 1;
+ }
+ mmap_unlock();
+ return 0;
+}
+
+static inline void tlb_set_dirty(CPUState *env,
+ uintptr_t addr, target_ulong vaddr)
+{
+}
+#endif /* defined(CONFIG_USER_ONLY) */
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
+typedef struct subpage_t {
+ target_phys_addr_t base;
+ ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
+ ram_addr_t region_offset[TARGET_PAGE_SIZE];
+} subpage_t;
+
+static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
+ ram_addr_t memory, ram_addr_t region_offset);
+static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
+ ram_addr_t orig_memory,
+ ram_addr_t region_offset);
+#define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
+ need_subpage) \
+ do { \
+ if (addr > start_addr) \
+ start_addr2 = 0; \
+ else { \
+ start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
+ if (start_addr2 > 0) \
+ need_subpage = 1; \
+ } \
+ \
+ if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
+ end_addr2 = TARGET_PAGE_SIZE - 1; \
+ else { \
+ end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
+ if (end_addr2 < TARGET_PAGE_SIZE - 1) \
+ need_subpage = 1; \
+ } \
+ } while (0)
+
+/* register physical memory.
+ For RAM, 'size' must be a multiple of the target page size.
+ If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
+ io memory page. The address used when calling the IO function is
+ the offset from the start of the region, plus region_offset. Both
+ start_addr and region_offset are rounded down to a page boundary
+ before calculating this offset. This should not be a problem unless
+ the low bits of start_addr and region_offset differ. */
+void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
+ ram_addr_t size,
+ ram_addr_t phys_offset,
+ ram_addr_t region_offset)
+{
+ target_phys_addr_t addr, end_addr;
+ PhysPageDesc *p;
+ CPUState *env;
+ ram_addr_t orig_size = size;
+ subpage_t *subpage;
+
+#ifndef VBOX
+ cpu_notify_set_memory(start_addr, size, phys_offset);
+#endif /* !VBOX */
+
+ if (phys_offset == IO_MEM_UNASSIGNED) {
+ region_offset = start_addr;
+ }
+ region_offset &= TARGET_PAGE_MASK;
+ size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
+ end_addr = start_addr + (target_phys_addr_t)size;
+ for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
+ ram_addr_t orig_memory = p->phys_offset;
+ target_phys_addr_t start_addr2, end_addr2;
+ int need_subpage = 0;
+
+ CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
+ need_subpage);
+ if (need_subpage) {
+ if (!(orig_memory & IO_MEM_SUBPAGE)) {
+ subpage = subpage_init((addr & TARGET_PAGE_MASK),
+ &p->phys_offset, orig_memory,
+ p->region_offset);
+ } else {
+ subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
+ >> IO_MEM_SHIFT];
+ }
+ subpage_register(subpage, start_addr2, end_addr2, phys_offset,
+ region_offset);
+ p->region_offset = 0;
+ } else {
+ p->phys_offset = phys_offset;
+ if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
+ (phys_offset & IO_MEM_ROMD))
+ phys_offset += TARGET_PAGE_SIZE;
+ }
+ } else {
+ p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+ p->phys_offset = phys_offset;
+ p->region_offset = region_offset;
+ if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
+ (phys_offset & IO_MEM_ROMD)) {
+ phys_offset += TARGET_PAGE_SIZE;
+ } else {
+ target_phys_addr_t start_addr2, end_addr2;
+ int need_subpage = 0;
+
+ CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
+ end_addr2, need_subpage);
+
+ if (need_subpage) {
+ subpage = subpage_init((addr & TARGET_PAGE_MASK),
+ &p->phys_offset, IO_MEM_UNASSIGNED,
+ addr & TARGET_PAGE_MASK);
+ subpage_register(subpage, start_addr2, end_addr2,
+ phys_offset, region_offset);
+ p->region_offset = 0;
+ }
+ }
+ }
+ region_offset += TARGET_PAGE_SIZE;
+ }
+
+ /* since each CPU stores ram addresses in its TLB cache, we must
+ reset the modified entries */
+#ifndef VBOX
+ /* XXX: slow ! */
+ for(env = first_cpu; env != NULL; env = env->next_cpu) {
+ tlb_flush(env, 1);
+ }
+#else
+ /* We have one thread per CPU, so, one of the other EMTs might be executing
+ code right now and flushing the TLB may crash it. */
+ env = first_cpu;
+ if (EMRemIsLockOwner(env->pVM))
+ tlb_flush(env, 1);
+ else
+ ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request,
+ CPU_INTERRUPT_EXTERNAL_FLUSH_TLB);
+#endif
+}
+
+/* XXX: temporary until new memory mapping API */
+ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
+{
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p)
+ return IO_MEM_UNASSIGNED;
+ return p->phys_offset;
+}
+
+#ifndef VBOX
+
+void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
+{
+ if (kvm_enabled())
+ kvm_coalesce_mmio_region(addr, size);
+}
+
+void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
+{
+ if (kvm_enabled())
+ kvm_uncoalesce_mmio_region(addr, size);
+}
+
+void qemu_flush_coalesced_mmio_buffer(void)
+{
+ if (kvm_enabled())
+ kvm_flush_coalesced_mmio_buffer();
+}
+
+#if defined(__linux__) && !defined(TARGET_S390X)
+
+#include <sys/vfs.h>
+
+#define HUGETLBFS_MAGIC 0x958458f6
+
+static size_t gethugepagesize(const char *path)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ perror(path);
+ return 0;
+ }
+
+ if (fs.f_type != HUGETLBFS_MAGIC)
+ fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+
+ return (size_t)fs.f_bsize;
+}
+
+static void *file_ram_alloc(RAMBlock *block,
+ ram_addr_t memory,
+ const char *path)
+{
+ char *filename;
+ void *area;
+ int fd;
+#ifdef MAP_POPULATE
+ int flags;
+#endif
+ size_t hpagesize;
+
+ hpagesize = gethugepagesize(path);
+ if (!hpagesize) {
+ return NULL;
+ }
+
+ if (memory < hpagesize) {
+ return NULL;
+ }
+
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
+ return NULL;
+ }
+
+ if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
+ return NULL;
+ }
+
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ perror("unable to create backing store for hugepages");
+ free(filename);
+ return NULL;
+ }
+ unlink(filename);
+ free(filename);
+
+ memory = (memory+hpagesize-1) & ~(hpagesize-1);
+
+ /*
+ * ftruncate is not supported by hugetlbfs in older
+ * hosts, so don't bother bailing out on errors.
+ * If anything goes wrong with it under other filesystems,
+ * mmap will fail.
+ */
+ if (ftruncate(fd, memory))
+ perror("ftruncate");
+
+#ifdef MAP_POPULATE
+ /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+ * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
+ * to sidestep this quirk.
+ */
+ flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
+ area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
+#else
+ area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+#endif
+ if (area == MAP_FAILED) {
+ perror("file_ram_alloc: can't mmap RAM pages");
+ close(fd);
+ return (NULL);
+ }
+ block->fd = fd;
+ return area;
+}
+#endif
+
+static ram_addr_t find_ram_offset(ram_addr_t size)
+{
+ RAMBlock *block, *next_block;
+ ram_addr_t offset = 0, mingap = ULONG_MAX;
+
+ if (QLIST_EMPTY(&ram_list.blocks))
+ return 0;
+
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ ram_addr_t end, next = ULONG_MAX;
+
+ end = block->offset + block->length;
+
+ QLIST_FOREACH(next_block, &ram_list.blocks, next) {
+ if (next_block->offset >= end) {
+ next = MIN(next, next_block->offset);
+ }
+ }
+ if (next - end >= size && next - end < mingap) {
+ offset = end;
+ mingap = next - end;
+ }
+ }
+ return offset;
+}
+
+static ram_addr_t last_ram_offset(void)
+{
+ RAMBlock *block;
+ ram_addr_t last = 0;
+
+ QLIST_FOREACH(block, &ram_list.blocks, next)
+ last = MAX(last, block->offset + block->length);
+
+ return last;
+}
+
+ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
+ ram_addr_t size, void *host)
+{
+ RAMBlock *new_block, *block;
+
+ size = TARGET_PAGE_ALIGN(size);
+ new_block = qemu_mallocz(sizeof(*new_block));
+
+ if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
+ char *id = dev->parent_bus->info->get_dev_path(dev);
+ if (id) {
+ snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
+ qemu_free(id);
+ }
+ }
+ pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ if (!strcmp(block->idstr, new_block->idstr)) {
+ fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
+ new_block->idstr);
+ abort();
+ }
+ }
+
+ new_block->host = host;
+
+ new_block->offset = find_ram_offset(size);
+ new_block->length = size;
+
+ QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+
+ ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
+ last_ram_offset() >> TARGET_PAGE_BITS);
+ memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
+ 0xff, size >> TARGET_PAGE_BITS);
+
+ if (kvm_enabled())
+ kvm_setup_guest_memory(new_block->host, size);
+
+ return new_block->offset;
+}
+
+ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
+{
+ RAMBlock *new_block, *block;
+
+ size = TARGET_PAGE_ALIGN(size);
+ new_block = qemu_mallocz(sizeof(*new_block));
+
+ if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
+ char *id = dev->parent_bus->info->get_dev_path(dev);
+ if (id) {
+ snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
+ qemu_free(id);
+ }
+ }
+ pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ if (!strcmp(block->idstr, new_block->idstr)) {
+ fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
+ new_block->idstr);
+ abort();
+ }
+ }
+
+ if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+ new_block->host = file_ram_alloc(new_block, size, mem_path);
+ if (!new_block->host) {
+ new_block->host = qemu_vmalloc(size);
+#ifdef MADV_MERGEABLE
+ madvise(new_block->host, size, MADV_MERGEABLE);
+#endif
+ }
+#else
+ fprintf(stderr, "-mem-path option unsupported\n");
+ exit(1);
+#endif
+ } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+ /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
+ new_block->host = mmap((void*)0x1000000, size,
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+#else
+ new_block->host = qemu_vmalloc(size);
+#endif
+#ifdef MADV_MERGEABLE
+ madvise(new_block->host, size, MADV_MERGEABLE);
+#endif
+ }
+ new_block->offset = find_ram_offset(size);
+ new_block->length = size;
+
+ QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+
+ ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
+ last_ram_offset() >> TARGET_PAGE_BITS);
+ memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
+ 0xff, size >> TARGET_PAGE_BITS);
+
+ if (kvm_enabled())
+ kvm_setup_guest_memory(new_block->host, size);
+
+ return new_block->offset;
+}
+
+void qemu_ram_free(ram_addr_t addr)
+{
+ RAMBlock *block;
+
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ if (addr == block->offset) {
+ QLIST_REMOVE(block, next);
+ if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+ if (block->fd) {
+ munmap(block->host, block->length);
+ close(block->fd);
+ } else {
+ qemu_vfree(block->host);
+ }
+#endif
+ } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+ munmap(block->host, block->length);
+#else
+ qemu_vfree(block->host);
+#endif
+ }
+ qemu_free(block);
+ return;
+ }
+ }
+
+}
+
+/* Return a host pointer to ram allocated with qemu_ram_alloc.
+ With the exception of the softmmu code in this file, this should
+ only be used for local memory (e.g. video ram) that the device owns,
+ and knows it isn't going to access beyond the end of the block.
+
+ It should not be used for general purpose DMA.
+ Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
+ */
+void *qemu_get_ram_ptr(ram_addr_t addr)
+{
+ RAMBlock *block;
+
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ if (addr - block->offset < block->length) {
+ QLIST_REMOVE(block, next);
+ QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+ return block->host + (addr - block->offset);
+ }
+ }
+
+ fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+ abort();
+
+ return NULL;
+}
+
+/* Some of the softmmu routines need to translate from a host pointer
+ (typically a TLB entry) back to a ram offset. */
+ram_addr_t qemu_ram_addr_from_host(void *ptr)
+{
+ RAMBlock *block;
+ uint8_t *host = ptr;
+
+ QLIST_FOREACH(block, &ram_list.blocks, next) {
+ if (host - block->host < block->length) {
+ return block->offset + (host - block->host);
+ }
+ }
+
+ fprintf(stderr, "Bad ram pointer %p\n", ptr);
+ abort();
+
+ return 0;
+}
+
+#endif /* !VBOX */
+
+static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+#ifdef DEBUG_UNASSIGNED
+ printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
+#endif
+#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+ do_unassigned_access(addr, 0, 0, 0, 1);
+#endif
+ return 0;
+}
+
+static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+#ifdef DEBUG_UNASSIGNED
+ printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
+#endif
+#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+ do_unassigned_access(addr, 0, 0, 0, 2);
+#endif
+ return 0;
+}
+
+static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+#ifdef DEBUG_UNASSIGNED
+ printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
+#endif
+#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+ do_unassigned_access(addr, 0, 0, 0, 4);
+#endif
+ return 0;
+}
+
+static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef DEBUG_UNASSIGNED
+ printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
+#endif
+#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+ do_unassigned_access(addr, 1, 0, 0, 1);
+#endif
+}
+
+static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef DEBUG_UNASSIGNED
+ printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
+#endif
+#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+ do_unassigned_access(addr, 1, 0, 0, 2);
+#endif
+}
+
+static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef DEBUG_UNASSIGNED
+ printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
+#endif
+#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+ do_unassigned_access(addr, 1, 0, 0, 4);
+#endif
+}
+
+static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
+ unassigned_mem_readb,
+ unassigned_mem_readw,
+ unassigned_mem_readl,
+};
+
+static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
+ unassigned_mem_writeb,
+ unassigned_mem_writew,
+ unassigned_mem_writel,
+};
+
+static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
+ uint32_t val)
+{
+ int dirty_flags;
+ dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
+ if (!(dirty_flags & CODE_DIRTY_FLAG)) {
+#if !defined(CONFIG_USER_ONLY)
+ tb_invalidate_phys_page_fast(ram_addr, 1);
+ dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
+#endif
+ }
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+ remR3PhysWriteU8(ram_addr, val);
+#else
+ stb_p(qemu_get_ram_ptr(ram_addr), val);
+#endif
+ dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
+ cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
+ /* we remove the notdirty callback only if the code has been
+ flushed */
+ if (dirty_flags == 0xff)
+ tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
+}
+
+static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
+ uint32_t val)
+{
+ int dirty_flags;
+ dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
+ if (!(dirty_flags & CODE_DIRTY_FLAG)) {
+#if !defined(CONFIG_USER_ONLY)
+ tb_invalidate_phys_page_fast(ram_addr, 2);
+ dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
+#endif
+ }
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+ remR3PhysWriteU16(ram_addr, val);
+#else
+ stw_p(qemu_get_ram_ptr(ram_addr), val);
+#endif
+ dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
+ cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
+ /* we remove the notdirty callback only if the code has been
+ flushed */
+ if (dirty_flags == 0xff)
+ tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
+}
+
+static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
+ uint32_t val)
+{
+ int dirty_flags;
+ dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
+ if (!(dirty_flags & CODE_DIRTY_FLAG)) {
+#if !defined(CONFIG_USER_ONLY)
+ tb_invalidate_phys_page_fast(ram_addr, 4);
+ dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
+#endif
+ }
+#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
+ remR3PhysWriteU32(ram_addr, val);
+#else
+ stl_p(qemu_get_ram_ptr(ram_addr), val);
+#endif
+ dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
+ cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
+ /* we remove the notdirty callback only if the code has been
+ flushed */
+ if (dirty_flags == 0xff)
+ tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
+}
+
+static CPUReadMemoryFunc * const error_mem_read[3] = {
+ NULL, /* never used */
+ NULL, /* never used */
+ NULL, /* never used */
+};
+
+static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
+ notdirty_mem_writeb,
+ notdirty_mem_writew,
+ notdirty_mem_writel,
+};
+
+/* Generate a debug exception if a watchpoint has been hit. */
+static void check_watchpoint(int offset, int len_mask, int flags)
+{
+ CPUState *env = cpu_single_env;
+ target_ulong pc, cs_base;
+ TranslationBlock *tb;
+ target_ulong vaddr;
+ CPUWatchpoint *wp;
+ int cpu_flags;
+
+ if (env->watchpoint_hit) {
+ /* We re-entered the check after replacing the TB. Now raise
+ * the debug interrupt so that is will trigger after the
+ * current instruction. */
+ cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
+ return;
+ }
+ vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
+ QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+ if ((vaddr == (wp->vaddr & len_mask) ||
+ (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
+ wp->flags |= BP_WATCHPOINT_HIT;
+ if (!env->watchpoint_hit) {
+ env->watchpoint_hit = wp;
+ tb = tb_find_pc(env->mem_io_pc);
+ if (!tb) {
+ cpu_abort(env, "check_watchpoint: could not find TB for "
+ "pc=%p", (void *)env->mem_io_pc);
+ }
+ cpu_restore_state(tb, env, env->mem_io_pc, NULL);
+ tb_phys_invalidate(tb, -1);
+ if (wp->flags & BP_STOP_BEFORE_ACCESS) {
+ env->exception_index = EXCP_DEBUG;
+ } else {
+ cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
+ tb_gen_code(env, pc, cs_base, cpu_flags, 1);
+ }
+ cpu_resume_from_signal(env, NULL);
+ }
+ } else {
+ wp->flags &= ~BP_WATCHPOINT_HIT;
+ }
+ }
+}
+
+/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
+ so these check for a hit then pass through to the normal out-of-line
+ phys routines. */
+static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+ check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
+ return ldub_phys(addr);
+}
+
+static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+ check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
+ return lduw_phys(addr);
+}
+
+static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
+ return ldl_phys(addr);
+}
+
+static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
+ stb_phys(addr, val);
+}
+
+static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
+ stw_phys(addr, val);
+}
+
+static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
+ stl_phys(addr, val);
+}
+
+static CPUReadMemoryFunc * const watch_mem_read[3] = {
+ watch_mem_readb,
+ watch_mem_readw,
+ watch_mem_readl,
+};
+
+static CPUWriteMemoryFunc * const watch_mem_write[3] = {
+ watch_mem_writeb,
+ watch_mem_writew,
+ watch_mem_writel,
+};
+
+static inline uint32_t subpage_readlen (subpage_t *mmio,
+ target_phys_addr_t addr,
+ unsigned int len)
+{
+ unsigned int idx = SUBPAGE_IDX(addr);
+#if defined(DEBUG_SUBPAGE)
+ printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
+ mmio, len, addr, idx);
+#endif
+
+ addr += mmio->region_offset[idx];
+ idx = mmio->sub_io_index[idx];
+ return io_mem_read[idx][len](io_mem_opaque[idx], addr);
+}
+
+static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
+ uint32_t value, unsigned int len)
+{
+ unsigned int idx = SUBPAGE_IDX(addr);
+#if defined(DEBUG_SUBPAGE)
+ printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
+ __func__, mmio, len, addr, idx, value);
+#endif
+
+ addr += mmio->region_offset[idx];
+ idx = mmio->sub_io_index[idx];
+ io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
+}
+
+static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
+{
+ return subpage_readlen(opaque, addr, 0);
+}
+
+static void subpage_writeb (void *opaque, target_phys_addr_t addr,
+ uint32_t value)
+{
+ subpage_writelen(opaque, addr, value, 0);
+}
+
+static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
+{
+ return subpage_readlen(opaque, addr, 1);
+}
+
+static void subpage_writew (void *opaque, target_phys_addr_t addr,
+ uint32_t value)
+{
+ subpage_writelen(opaque, addr, value, 1);
+}
+
+static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
+{
+ return subpage_readlen(opaque, addr, 2);
+}
+
+static void subpage_writel (void *opaque, target_phys_addr_t addr,
+ uint32_t value)
+{
+ subpage_writelen(opaque, addr, value, 2);
+}
+
+static CPUReadMemoryFunc * const subpage_read[] = {
+ &subpage_readb,
+ &subpage_readw,
+ &subpage_readl,
+};
+
+static CPUWriteMemoryFunc * const subpage_write[] = {
+ &subpage_writeb,
+ &subpage_writew,
+ &subpage_writel,
+};
+
+static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
+ ram_addr_t memory, ram_addr_t region_offset)
+{
+ int idx, eidx;
+
+ if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
+ return -1;
+ idx = SUBPAGE_IDX(start);
+ eidx = SUBPAGE_IDX(end);
+#if defined(DEBUG_SUBPAGE)
+ printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
+ mmio, start, end, idx, eidx, memory);
+#endif
+ memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ for (; idx <= eidx; idx++) {
+ mmio->sub_io_index[idx] = memory;
+ mmio->region_offset[idx] = region_offset;
+ }
+
+ return 0;
+}
+
+static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
+ ram_addr_t orig_memory,
+ ram_addr_t region_offset)
+{
+ subpage_t *mmio;
+ int subpage_memory;
+
+ mmio = qemu_mallocz(sizeof(subpage_t));
+
+ mmio->base = base;
+ subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio);
+#if defined(DEBUG_SUBPAGE)
+ printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
+ mmio, base, TARGET_PAGE_SIZE, subpage_memory);
+#endif
+ *phys = subpage_memory | IO_MEM_SUBPAGE;
+ subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
+
+ return mmio;
+}
+
+static int get_free_io_mem_idx(void)
+{
+ int i;
+
+ for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
+ if (!io_mem_used[i]) {
+ io_mem_used[i] = 1;
+ return i;
+ }
+ fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
+ return -1;
+}
+
+/* mem_read and mem_write are arrays of functions containing the
+ function to access byte (index 0), word (index 1) and dword (index
+ 2). Functions can be omitted with a NULL function pointer.
+ If io_index is non zero, the corresponding io zone is
+ modified. If it is zero, a new io zone is allocated. The return
+ value can be used with cpu_register_physical_memory(). (-1) is
+ returned if error. */
+static int cpu_register_io_memory_fixed(int io_index,
+ CPUReadMemoryFunc * const *mem_read,
+ CPUWriteMemoryFunc * const *mem_write,
+ void *opaque)
+{
+ int i;
+
+ if (io_index <= 0) {
+ io_index = get_free_io_mem_idx();
+ if (io_index == -1)
+ return io_index;
+ } else {
+ io_index >>= IO_MEM_SHIFT;
+ if (io_index >= IO_MEM_NB_ENTRIES)
+ return -1;
+ }
+
+ for (i = 0; i < 3; ++i) {
+ io_mem_read[io_index][i]
+ = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
+ }
+ for (i = 0; i < 3; ++i) {
+ io_mem_write[io_index][i]
+ = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
+ }
+ io_mem_opaque[io_index] = opaque;
+
+ return (io_index << IO_MEM_SHIFT);
+}
+
+int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
+ CPUWriteMemoryFunc * const *mem_write,
+ void *opaque)
+{
+ return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
+}
+
+void cpu_unregister_io_memory(int io_table_address)
+{
+ int i;
+ int io_index = io_table_address >> IO_MEM_SHIFT;
+
+ for (i=0;i < 3; i++) {
+ io_mem_read[io_index][i] = unassigned_mem_read[i];
+ io_mem_write[io_index][i] = unassigned_mem_write[i];
+ }
+ io_mem_opaque[io_index] = NULL;
+ io_mem_used[io_index] = 0;
+}
+
+static void io_mem_init(void)
+{
+ int i;
+
+ cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read, unassigned_mem_write, NULL);
+ cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read, unassigned_mem_write, NULL);
+ cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read, notdirty_mem_write, NULL);
+ for (i=0; i<5; i++)
+ io_mem_used[i] = 1;
+
+ io_mem_watch = cpu_register_io_memory(watch_mem_read,
+ watch_mem_write, NULL);
+}
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+/* physical memory access (slow version, mainly for debug) */
+#if defined(CONFIG_USER_ONLY)
+int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
+ uint8_t *buf, int len, int is_write)
+{
+ int l, flags;
+ target_ulong page;
+ void * p;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+ flags = page_get_flags(page);
+ if (!(flags & PAGE_VALID))
+ return -1;
+ if (is_write) {
+ if (!(flags & PAGE_WRITE))
+ return -1;
+ /* XXX: this code should not depend on lock_user */
+ if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
+ return -1;
+ memcpy(p, buf, l);
+ unlock_user(p, addr, l);
+ } else {
+ if (!(flags & PAGE_READ))
+ return -1;
+ /* XXX: this code should not depend on lock_user */
+ if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
+ return -1;
+ memcpy(buf, p, l);
+ unlock_user(p, addr, 0);
+ }
+ len -= l;
+ buf += l;
+ addr += l;
+ }
+ return 0;
+}
+
+#else
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+ int len, int is_write)
+{
+ int l, io_index;
+ uint8_t *ptr;
+ uint32_t val;
+ target_phys_addr_t page;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+ p = phys_page_find(page >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if (is_write) {
+ if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+ target_phys_addr_t addr1 = addr;
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ /* XXX: could force cpu_single_env to NULL to avoid
+ potential bugs */
+ if (l >= 4 && ((addr1 & 3) == 0)) {
+ /* 32 bit write access */
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+ val = ldl_p(buf);
+#else
+ val = *(const uint32_t *)buf;
+#endif
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
+ l = 4;
+ } else if (l >= 2 && ((addr1 & 1) == 0)) {
+ /* 16 bit write access */
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+ val = lduw_p(buf);
+#else
+ val = *(const uint16_t *)buf;
+#endif
+ io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
+ l = 2;
+ } else {
+ /* 8 bit write access */
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+ val = ldub_p(buf);
+#else
+ val = *(const uint8_t *)buf;
+#endif
+ io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
+ l = 1;
+ }
+ } else {
+ ram_addr_t addr1;
+ addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ /* RAM case */
+#ifdef VBOX
+ remR3PhysWrite(addr1, buf, l); NOREF(ptr);
+#else
+ ptr = qemu_get_ram_ptr(addr1);
+ memcpy(ptr, buf, l);
+#endif
+ if (!cpu_physical_memory_is_dirty(addr1)) {
+ /* invalidate code */
+ tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
+ /* set dirty bit */
+ cpu_physical_memory_set_dirty_flags(
+ addr1, (0xff & ~CODE_DIRTY_FLAG));
+ }
+ }
+ } else {
+ if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+ !(pd & IO_MEM_ROMD)) {
+ target_phys_addr_t addr1 = addr;
+ /* I/O case */
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ if (l >= 4 && ((addr1 & 3) == 0)) {
+ /* 32 bit read access */
+ val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+ stl_p(buf, val);
+#else
+ *(uint32_t *)buf = val;
+#endif
+ l = 4;
+ } else if (l >= 2 && ((addr1 & 1) == 0)) {
+ /* 16 bit read access */
+ val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+ stw_p(buf, val);
+#else
+ *(uint16_t *)buf = val;
+#endif
+ l = 2;
+ } else {
+ /* 8 bit read access */
+ val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+ stb_p(buf, val);
+#else
+ *(uint8_t *)buf = val;
+#endif
+ l = 1;
+ }
+ } else {
+ /* RAM case */
+#ifdef VBOX
+ remR3PhysRead((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), buf, l); NOREF(ptr);
+#else
+ ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
+ (addr & ~TARGET_PAGE_MASK);
+ memcpy(buf, ptr, l);
+#endif
+ }
+ }
+ len -= l;
+ buf += l;
+ addr += l;
+ }
+}
+
+#ifndef VBOX
+
+/* used for ROM loading : can write in RAM and ROM */
+void cpu_physical_memory_write_rom(target_phys_addr_t addr,
+ const uint8_t *buf, int len)
+{
+ int l;
+ uint8_t *ptr;
+ target_phys_addr_t page;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+ p = phys_page_find(page >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
+ (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
+ !(pd & IO_MEM_ROMD)) {
+ /* do nothing */
+ } else {
+ ram_addr_t addr1;
+ addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ /* ROM/RAM case */
+ ptr = qemu_get_ram_ptr(addr1);
+ memcpy(ptr, buf, l);
+ }
+ len -= l;
+ buf += l;
+ addr += l;
+ }
+}
+
+typedef struct {
+ void *buffer;
+ target_phys_addr_t addr;
+ target_phys_addr_t len;
+} BounceBuffer;
+
+static BounceBuffer bounce;
+
+typedef struct MapClient {
+ void *opaque;
+ void (*callback)(void *opaque);
+ QLIST_ENTRY(MapClient) link;
+} MapClient;
+
+static QLIST_HEAD(map_client_list, MapClient) map_client_list
+ = QLIST_HEAD_INITIALIZER(map_client_list);
+
+void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
+{
+ MapClient *client = qemu_malloc(sizeof(*client));
+
+ client->opaque = opaque;
+ client->callback = callback;
+ QLIST_INSERT_HEAD(&map_client_list, client, link);
+ return client;
+}
+
+void cpu_unregister_map_client(void *_client)
+{
+ MapClient *client = (MapClient *)_client;
+
+ QLIST_REMOVE(client, link);
+ qemu_free(client);
+}
+
+static void cpu_notify_map_clients(void)
+{
+ MapClient *client;
+
+ while (!QLIST_EMPTY(&map_client_list)) {
+ client = QLIST_FIRST(&map_client_list);
+ client->callback(client->opaque);
+ cpu_unregister_map_client(client);
+ }
+}
+
+/* Map a physical memory region into a host virtual address.
+ * May map a subset of the requested range, given by and returned in *plen.
+ * May return NULL if resources needed to perform the mapping are exhausted.
+ * Use only for reads OR writes - not for read-modify-write operations.
+ * Use cpu_register_map_client() to know when retrying the map operation is
+ * likely to succeed.
+ */
+void *cpu_physical_memory_map(target_phys_addr_t addr,
+ target_phys_addr_t *plen,
+ int is_write)
+{
+ target_phys_addr_t len = *plen;
+ target_phys_addr_t done = 0;
+ int l;
+ uint8_t *ret = NULL;
+ uint8_t *ptr;
+ target_phys_addr_t page;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+ ram_addr_t addr1;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+ p = phys_page_find(page >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+ if (done || bounce.buffer) {
+ break;
+ }
+ bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
+ bounce.addr = addr;
+ bounce.len = l;
+ if (!is_write) {
+ cpu_physical_memory_rw(addr, bounce.buffer, l, 0);
+ }
+ ptr = bounce.buffer;
+ } else {
+ addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ ptr = qemu_get_ram_ptr(addr1);
+ }
+ if (!done) {
+ ret = ptr;
+ } else if (ret + done != ptr) {
+ break;
+ }
+
+ len -= l;
+ addr += l;
+ done += l;
+ }
+ *plen = done;
+ return ret;
+}
+
+/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
+ * Will also mark the memory as dirty if is_write == 1. access_len gives
+ * the amount of memory that was actually read or written by the caller.
+ */
+void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
+ int is_write, target_phys_addr_t access_len)
+{
+ if (buffer != bounce.buffer) {
+ if (is_write) {
+ ram_addr_t addr1 = qemu_ram_addr_from_host(buffer);
+ while (access_len) {
+ unsigned l;
+ l = TARGET_PAGE_SIZE;
+ if (l > access_len)
+ l = access_len;
+ if (!cpu_physical_memory_is_dirty(addr1)) {
+ /* invalidate code */
+ tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
+ /* set dirty bit */
+ cpu_physical_memory_set_dirty_flags(
+ addr1, (0xff & ~CODE_DIRTY_FLAG));
+ }
+ addr1 += l;
+ access_len -= l;
+ }
+ }
+ return;
+ }
+ if (is_write) {
+ cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
+ }
+ qemu_vfree(bounce.buffer);
+ bounce.buffer = NULL;
+ cpu_notify_map_clients();
+}
+
+#endif /* !VBOX */
+
+/* warning: addr must be aligned */
+uint32_t ldl_phys(target_phys_addr_t addr)
+{
+ int io_index;
+ uint8_t *ptr;
+ uint32_t val;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+ !(pd & IO_MEM_ROMD)) {
+ /* I/O case */
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+ } else {
+ /* RAM case */
+#ifndef VBOX
+ ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
+ (addr & ~TARGET_PAGE_MASK);
+ val = ldl_p(ptr);
+#else
+ val = remR3PhysReadU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr);
+#endif
+ }
+ return val;
+}
+
+/* warning: addr must be aligned */
+uint64_t ldq_phys(target_phys_addr_t addr)
+{
+ int io_index;
+ uint8_t *ptr;
+ uint64_t val;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+ !(pd & IO_MEM_ROMD)) {
+ /* I/O case */
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
+ val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
+#else
+ val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+ val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
+#endif
+ } else {
+ /* RAM case */
+#ifndef VBOX
+ ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
+ (addr & ~TARGET_PAGE_MASK);
+ val = ldq_p(ptr);
+#else
+ val = remR3PhysReadU64((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr);
+#endif
+ }
+ return val;
+}
+
+/* XXX: optimize */
+uint32_t ldub_phys(target_phys_addr_t addr)
+{
+ uint8_t val;
+ cpu_physical_memory_read(addr, &val, 1);
+ return val;
+}
+
+/* warning: addr must be aligned */
+uint32_t lduw_phys(target_phys_addr_t addr)
+{
+ int io_index;
+#ifndef VBOX
+ uint8_t *ptr;
+#endif
+ uint64_t val;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
+ !(pd & IO_MEM_ROMD)) {
+ /* I/O case */
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
+ } else {
+ /* RAM case */
+#ifndef VBOX
+ ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
+ (addr & ~TARGET_PAGE_MASK);
+ val = lduw_p(ptr);
+#else
+ val = remR3PhysReadU16((pd & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK));
+#endif
+ }
+ return val;
+}
+
+/* warning: addr must be aligned. The ram page is not masked as dirty
+ and the code inside is not invalidated. It is useful if the dirty
+ bits are used to track modified PTEs */
+void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
+{
+ int io_index;
+ uint8_t *ptr;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+ } else {
+#ifndef VBOX
+ ram_addr_t addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ ptr = qemu_get_ram_ptr(addr1);
+ stl_p(ptr, val);
+#else
+ remR3PhysWriteU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr);
+#endif
+
+#ifndef VBOX
+ if (unlikely(in_migration)) {
+ if (!cpu_physical_memory_is_dirty(addr1)) {
+ /* invalidate code */
+ tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
+ /* set dirty bit */
+ cpu_physical_memory_set_dirty_flags(
+ addr1, (0xff & ~CODE_DIRTY_FLAG));
+ }
+ }
+#endif /* !VBOX */
+ }
+}
+
+void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
+{
+ int io_index;
+ uint8_t *ptr;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+#ifdef TARGET_WORDS_BIGENDIAN
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
+#else
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
+#endif
+ } else {
+#ifndef VBOX
+ ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
+ (addr & ~TARGET_PAGE_MASK);
+ stq_p(ptr, val);
+#else
+ remR3PhysWriteU64((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr);
+#endif
+ }
+}
+
+/* warning: addr must be aligned */
+void stl_phys(target_phys_addr_t addr, uint32_t val)
+{
+ int io_index;
+ uint8_t *ptr;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+ } else {
+ ram_addr_t addr1;
+ addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ /* RAM case */
+#ifndef VBOX
+ ptr = qemu_get_ram_ptr(addr1);
+ stl_p(ptr, val);
+#else
+ remR3PhysWriteU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr);
+#endif
+ if (!cpu_physical_memory_is_dirty(addr1)) {
+ /* invalidate code */
+ tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
+ /* set dirty bit */
+ cpu_physical_memory_set_dirty_flags(addr1,
+ (0xff & ~CODE_DIRTY_FLAG));
+ }
+ }
+}
+
+/* XXX: optimize */
+void stb_phys(target_phys_addr_t addr, uint32_t val)
+{
+ uint8_t v = val;
+ cpu_physical_memory_write(addr, &v, 1);
+}
+
+/* warning: addr must be aligned */
+void stw_phys(target_phys_addr_t addr, uint32_t val)
+{
+ int io_index;
+ uint8_t *ptr;
+ ram_addr_t pd;
+ PhysPageDesc *p;
+
+ p = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!p) {
+ pd = IO_MEM_UNASSIGNED;
+ } else {
+ pd = p->phys_offset;
+ }
+
+ if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
+ io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ if (p)
+ addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
+ } else {
+ ram_addr_t addr1;
+ addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ /* RAM case */
+#ifndef VBOX
+ ptr = qemu_get_ram_ptr(addr1);
+ stw_p(ptr, val);
+#else
+ remR3PhysWriteU16(addr1, val); NOREF(ptr);
+#endif
+ if (!cpu_physical_memory_is_dirty(addr1)) {
+ /* invalidate code */
+ tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
+ /* set dirty bit */
+ cpu_physical_memory_set_dirty_flags(addr1,
+ (0xff & ~CODE_DIRTY_FLAG));
+ }
+ }
+}
+
+/* XXX: optimize */
+void stq_phys(target_phys_addr_t addr, uint64_t val)
+{
+ val = tswap64(val);
+ cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
+}
+
+#ifndef VBOX
+/* virtual memory access for debug (includes writing to ROM) */
+int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
+ uint8_t *buf, int len, int is_write)
+{
+ int l;
+ target_phys_addr_t phys_addr;
+ target_ulong page;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ phys_addr = cpu_get_phys_page_debug(env, page);
+ /* if no physical page mapped, return an error */
+ if (phys_addr == -1)
+ return -1;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+ phys_addr += (addr & ~TARGET_PAGE_MASK);
+ if (is_write)
+ cpu_physical_memory_write_rom(phys_addr, buf, l);
+ else
+ cpu_physical_memory_rw(phys_addr, buf, l, is_write);
+ len -= l;
+ buf += l;
+ addr += l;
+ }
+ return 0;
+}
+#endif /* !VBOX */
+#endif
+
+/* in deterministic execution mode, instructions doing device I/Os
+ must be at the end of the TB */
+void cpu_io_recompile(CPUState *env, void *retaddr)
+{
+ TranslationBlock *tb;
+ uint32_t n, cflags;
+ target_ulong pc, cs_base;
+ uint64_t flags;
+
+ tb = tb_find_pc((uintptr_t)retaddr);
+ if (!tb) {
+ cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
+ retaddr);
+ }
+ n = env->icount_decr.u16.low + tb->icount;
+ cpu_restore_state(tb, env, (uintptr_t)retaddr, NULL);
+ /* Calculate how many instructions had been executed before the fault
+ occurred. */
+ n = n - env->icount_decr.u16.low;
+ /* Generate a new TB ending on the I/O insn. */
+ n++;
+ /* On MIPS and SH, delay slot instructions can only be restarted if
+ they were already the first instruction in the TB. If this is not
+ the first instruction in a TB then re-execute the preceding
+ branch. */
+#if defined(TARGET_MIPS)
+ if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
+ env->active_tc.PC -= 4;
+ env->icount_decr.u16.low++;
+ env->hflags &= ~MIPS_HFLAG_BMASK;
+ }
+#elif defined(TARGET_SH4)
+ if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
+ && n > 1) {
+ env->pc -= 2;
+ env->icount_decr.u16.low++;
+ env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+ }
+#endif
+ /* This should never happen. */
+ if (n > CF_COUNT_MASK)
+ cpu_abort(env, "TB too big during recompile");
+
+ cflags = n | CF_LAST_IO;
+ pc = tb->pc;
+ cs_base = tb->cs_base;
+ flags = tb->flags;
+ tb_phys_invalidate(tb, -1);
+ /* FIXME: In theory this could raise an exception. In practice
+ we have already translated the block once so it's probably ok. */
+ tb_gen_code(env, pc, cs_base, flags, cflags);
+ /** @todo If env->pc != tb->pc (i.e. the faulting instruction was not
+ the first in the TB) then we end up generating a whole new TB and
+ repeating the fault, which is horribly inefficient.
+ Better would be to execute just this insn uncached, or generate a
+ second new TB. */
+ cpu_resume_from_signal(env, NULL);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#ifndef VBOX
+void dump_exec_info(FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+{
+ int i, target_code_size, max_target_code_size;
+ int direct_jmp_count, direct_jmp2_count, cross_page;
+ TranslationBlock *tb;
+
+ target_code_size = 0;
+ max_target_code_size = 0;
+ cross_page = 0;
+ direct_jmp_count = 0;
+ direct_jmp2_count = 0;
+ for(i = 0; i < nb_tbs; i++) {
+ tb = &tbs[i];
+ target_code_size += tb->size;
+ if (tb->size > max_target_code_size)
+ max_target_code_size = tb->size;
+ if (tb->page_addr[1] != -1)
+ cross_page++;
+ if (tb->tb_next_offset[0] != 0xffff) {
+ direct_jmp_count++;
+ if (tb->tb_next_offset[1] != 0xffff) {
+ direct_jmp2_count++;
+ }
+ }
+ }
+ /* XXX: avoid using doubles ? */
+ cpu_fprintf(f, "Translation buffer state:\n");
+ cpu_fprintf(f, "gen code size %ld/%ld\n",
+ code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
+ cpu_fprintf(f, "TB count %d/%d\n",
+ nb_tbs, code_gen_max_blocks);
+ cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
+ nb_tbs ? target_code_size / nb_tbs : 0,
+ max_target_code_size);
+ cpu_fprintf(f, "TB avg host size %d bytes (expansion ratio: %0.1f)\n",
+ nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
+ target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
+ cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
+ cross_page,
+ nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+ cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
+ direct_jmp_count,
+ nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+ direct_jmp2_count,
+ nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+ cpu_fprintf(f, "\nStatistics:\n");
+ cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
+ cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
+ cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
+ tcg_dump_info(f, cpu_fprintf);
+}
+#endif /* !VBOX */
+
+#define MMUSUFFIX _cmmu
+#define GETPC() NULL
+#define env cpu_single_env
+#define SOFTMMU_CODE_ACCESS
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+#undef env
+
+#endif
diff --git a/src/recompiler/fpu/Makefile.kup b/src/recompiler/fpu/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/recompiler/fpu/Makefile.kup
diff --git a/src/recompiler/fpu/softfloat-macros.h b/src/recompiler/fpu/softfloat-macros.h
new file mode 100644
index 00000000..78382282
--- /dev/null
+++ b/src/recompiler/fpu/softfloat-macros.h
@@ -0,0 +1,719 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'. If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1. The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+ bits32 z;
+
+ if ( count == 0 ) {
+ z = a;
+ }
+ else if ( count < 32 ) {
+ z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+ }
+ else {
+ z = ( a != 0 );
+ }
+ *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'. If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1. The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+ bits64 z;
+
+ if ( count == 0 ) {
+ z = a;
+ }
+ else if ( count < 64 ) {
+ z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+ }
+ else {
+ z = ( a != 0 );
+ }
+ *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'. The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
+| bits shifted off form a second 64-bit result as follows: The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero. This extra result is stored in the location
+| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
+| (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'. This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'. The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift64ExtraRightJamming(
+ bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+ bits64 z0, z1;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z1 = a1;
+ z0 = a0;
+ }
+ else if ( count < 64 ) {
+ z1 = ( a0<<negCount ) | ( a1 != 0 );
+ z0 = a0>>count;
+ }
+ else {
+ if ( count == 64 ) {
+ z1 = a0 | ( a1 != 0 );
+ }
+ else {
+ z1 = ( ( a0 | a1 ) != 0 );
+ }
+ z0 = 0;
+ }
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'. Any bits shifted off are lost. The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0. The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128Right(
+ bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+ bits64 z0, z1;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z1 = a1;
+ z0 = a0;
+ }
+ else if ( count < 64 ) {
+ z1 = ( a0<<negCount ) | ( a1>>count );
+ z0 = a0>>count;
+ }
+ else {
+ z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+ z0 = 0;
+ }
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'. If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1. The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero. The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128RightJamming(
+ bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+ bits64 z0, z1;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z1 = a1;
+ z0 = a0;
+ }
+ else if ( count < 64 ) {
+ z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+ z0 = a0>>count;
+ }
+ else {
+ if ( count == 64 ) {
+ z1 = a0 | ( a1 != 0 );
+ }
+ else if ( count < 128 ) {
+ z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+ }
+ else {
+ z1 = ( ( a0 | a1 ) != 0 );
+ }
+ z0 = 0;
+ }
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'. The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
+| off form a third 64-bit result as follows: The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero. This extra result is stored in the location pointed to by
+| `z2Ptr'. The value of `count' can be arbitrarily large.
+| (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'. This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128ExtraRightJamming(
+ bits64 a0,
+ bits64 a1,
+ bits64 a2,
+ int16 count,
+ bits64 *z0Ptr,
+ bits64 *z1Ptr,
+ bits64 *z2Ptr
+ )
+{
+ bits64 z0, z1, z2;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z2 = a2;
+ z1 = a1;
+ z0 = a0;
+ }
+ else {
+ if ( count < 64 ) {
+ z2 = a1<<negCount;
+ z1 = ( a0<<negCount ) | ( a1>>count );
+ z0 = a0>>count;
+ }
+ else {
+ if ( count == 64 ) {
+ z2 = a1;
+ z1 = a0;
+ }
+ else {
+ a2 |= a1;
+ if ( count < 128 ) {
+ z2 = a0<<negCount;
+ z1 = a0>>( count & 63 );
+ }
+ else {
+ z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+ z1 = 0;
+ }
+ }
+ z0 = 0;
+ }
+ z2 |= ( a2 != 0 );
+ }
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'. Any bits shifted off are lost. The value
+| of `count' must be less than 64. The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift128Left(
+ bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+ *z1Ptr = a1<<count;
+ *z0Ptr =
+ ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'. Any bits shifted off are lost.
+| The value of `count' must be less than 64. The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift192Left(
+ bits64 a0,
+ bits64 a1,
+ bits64 a2,
+ int16 count,
+ bits64 *z0Ptr,
+ bits64 *z1Ptr,
+ bits64 *z2Ptr
+ )
+{
+ bits64 z0, z1, z2;
+ int8 negCount;
+
+ z2 = a2<<count;
+ z1 = a1<<count;
+ z0 = a0<<count;
+ if ( 0 < count ) {
+ negCount = ( ( - count ) & 63 );
+ z1 |= a2>>negCount;
+ z0 |= a1>>negCount;
+ }
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
+| any carry out is lost. The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add128(
+ bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+ bits64 z1;
+
+ z1 = a1 + b1;
+ *z1Ptr = z1;
+ *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
+| modulo 2^192, so any carry out is lost. The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add192(
+ bits64 a0,
+ bits64 a1,
+ bits64 a2,
+ bits64 b0,
+ bits64 b1,
+ bits64 b2,
+ bits64 *z0Ptr,
+ bits64 *z1Ptr,
+ bits64 *z2Ptr
+ )
+{
+ bits64 z0, z1, z2;
+ int8 carry0, carry1;
+
+ z2 = a2 + b2;
+ carry1 = ( z2 < a2 );
+ z1 = a1 + b1;
+ carry0 = ( z1 < a1 );
+ z0 = a0 + b0;
+ z1 += carry1;
+ z0 += ( z1 < carry1 );
+ z0 += carry0;
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost. The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub128(
+ bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+ *z1Ptr = a1 - b1;
+ *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub192(
+ bits64 a0,
+ bits64 a1,
+ bits64 a2,
+ bits64 b0,
+ bits64 b1,
+ bits64 b2,
+ bits64 *z0Ptr,
+ bits64 *z1Ptr,
+ bits64 *z2Ptr
+ )
+{
+ bits64 z0, z1, z2;
+ int8 borrow0, borrow1;
+
+ z2 = a2 - b2;
+ borrow1 = ( a2 < b2 );
+ z1 = a1 - b1;
+ borrow0 = ( a1 < b1 );
+ z0 = a0 - b0;
+ z0 -= ( z1 < borrow1 );
+ z1 -= borrow1;
+ z0 -= borrow0;
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+ bits32 aHigh, aLow, bHigh, bLow;
+ bits64 z0, zMiddleA, zMiddleB, z1;
+
+ aLow = a;
+ aHigh = a>>32;
+ bLow = b;
+ bHigh = b>>32;
+ z1 = ( (bits64) aLow ) * bLow;
+ zMiddleA = ( (bits64) aLow ) * bHigh;
+ zMiddleB = ( (bits64) aHigh ) * bLow;
+ z0 = ( (bits64) aHigh ) * bHigh;
+ zMiddleA += zMiddleB;
+ z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+ zMiddleA <<= 32;
+ z1 += zMiddleA;
+ z0 += ( z1 < zMiddleA );
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product. The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128By64To192(
+ bits64 a0,
+ bits64 a1,
+ bits64 b,
+ bits64 *z0Ptr,
+ bits64 *z1Ptr,
+ bits64 *z2Ptr
+ )
+{
+ bits64 z0, z1, z2, more1;
+
+ mul64To128( a1, b, &z1, &z2 );
+ mul64To128( a0, b, &z0, &more1 );
+ add128( z0, more1, 0, z1, &z0, &z1 );
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product. The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128To256(
+ bits64 a0,
+ bits64 a1,
+ bits64 b0,
+ bits64 b1,
+ bits64 *z0Ptr,
+ bits64 *z1Ptr,
+ bits64 *z2Ptr,
+ bits64 *z3Ptr
+ )
+{
+ bits64 z0, z1, z2, z3;
+ bits64 more1, more2;
+
+ mul64To128( a1, b1, &z2, &z3 );
+ mul64To128( a1, b0, &z1, &more2 );
+ add128( z1, more2, 0, z2, &z1, &z2 );
+ mul64To128( a0, b0, &z0, &more1 );
+ add128( z0, more1, 0, z1, &z0, &z1 );
+ mul64To128( a0, b1, &more1, &more2 );
+ add128( more1, more2, 0, z2, &more1, &z2 );
+ add128( z0, z1, 0, more1, &z0, &z1 );
+ *z3Ptr = z3;
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
+| divisor `b' must be at least 2^63. If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+ bits64 b0, b1;
+ bits64 rem0, rem1, term0, term1;
+ bits64 z;
+
+ if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+ b0 = b>>32;
+ z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+ mul64To128( b, z, &term0, &term1 );
+ sub128( a0, a1, term0, term1, &rem0, &rem1 );
+ while ( ( (sbits64) rem0 ) < 0 ) {
+ z -= LIT64( 0x100000000 );
+ b1 = b<<32;
+ add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+ }
+ rem0 = ( rem0<<32 ) | ( rem1>>32 );
+ z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+static bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+ static const bits16 sqrtOddAdjustments[] = {
+ 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+ 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+ };
+ static const bits16 sqrtEvenAdjustments[] = {
+ 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+ 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+ };
+ int8 index;
+ bits32 z;
+
+ index = ( a>>27 ) & 15;
+ if ( aExp & 1 ) {
+ z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
+ z = ( ( a / z )<<14 ) + ( z<<15 );
+ a >>= 1;
+ }
+ else {
+ z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
+ z = a / z + z;
+ z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+ if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+ }
+ return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'. If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros32( bits32 a )
+{
+ static const int8 countLeadingZerosHigh[] = {
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ int8 shiftCount;
+
+ shiftCount = 0;
+ if ( a < 0x10000 ) {
+ shiftCount += 16;
+ a <<= 16;
+ }
+ if ( a < 0x1000000 ) {
+ shiftCount += 8;
+ a <<= 8;
+ }
+ shiftCount += countLeadingZerosHigh[ a>>24 ];
+ return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'. If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros64( bits64 a )
+{
+ int8 shiftCount;
+
+ shiftCount = 0;
+ if ( a < ( (bits64) 1 )<<32 ) {
+ shiftCount += 32;
+ }
+ else {
+ a >>= 32;
+ }
+ shiftCount += countLeadingZeros32( a );
+ return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+ return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+ return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+ return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+ return ( a0 != b0 ) || ( a1 != b1 );
+
+}
diff --git a/src/recompiler/fpu/softfloat-native.c b/src/recompiler/fpu/softfloat-native.c
new file mode 100644
index 00000000..7c7820ab
--- /dev/null
+++ b/src/recompiler/fpu/softfloat-native.c
@@ -0,0 +1,521 @@
+/* Native implementation of soft float functions. Only a single status
+ context is supported */
+#include "softfloat.h"
+#include <math.h>
+#if defined(CONFIG_SOLARIS)
+#include <fenv.h>
+#endif
+
+void set_float_rounding_mode(int val STATUS_PARAM)
+{
+ STATUS(float_rounding_mode) = val;
+#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__)) || \
+ (defined(CONFIG_SOLARIS) && (CONFIG_SOLARIS_VERSION < 10 || CONFIG_SOLARIS_VERSION == 11)) /* VBOX adds sol 11 */
+ fpsetround(val);
+#else
+ fesetround(val);
+#endif
+}
+
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM)
+{
+ STATUS(floatx80_rounding_precision) = val;
+}
+#endif
+
+#if defined(CONFIG_BSD) || \
+ (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10)
+#define lrint(d) ((int32_t)rint(d))
+#define llrint(d) ((int64_t)rint(d))
+#define lrintf(f) ((int32_t)rint(f))
+#define llrintf(f) ((int64_t)rint(f))
+#define sqrtf(f) ((float)sqrt(f))
+#define remainderf(fa, fb) ((float)remainder(fa, fb))
+#define rintf(f) ((float)rint(f))
+# if defined(VBOX) && defined(HOST_BSD) /* Some defines which only apply to *BSD */
+# define lrintl(f) ((int32_t)rint(f))
+# define llrintl(f) ((int64_t)rint(f))
+# define rintl(d) ((int32_t)rint(d))
+# define sqrtl(f) (sqrt(f))
+# define remainderl(fa, fb) (remainder(fa, fb))
+# endif /* VBOX && _BSD */
+#if !defined(__sparc__) && \
+ (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10)
+extern long double rintl(long double);
+extern long double scalbnl(long double, int);
+
+long long
+llrintl(long double x) {
+ return ((long long) rintl(x));
+}
+
+long
+lrintl(long double x) {
+ return ((long) rintl(x));
+}
+
+long double
+ldexpl(long double x, int n) {
+ return (scalbnl(x, n));
+}
+#endif
+#endif
+
+#if defined(_ARCH_PPC)
+
+/* correct (but slow) PowerPC rint() (glibc version is incorrect) */
+static double qemu_rint(double x)
+{
+ double y = 4503599627370496.0;
+ if (fabs(x) >= y)
+ return x;
+ if (x < 0)
+ y = -y;
+ y = (x + y) - y;
+ if (y == 0.0)
+ y = copysign(y, x);
+ return y;
+}
+
+#define rint qemu_rint
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32(int v STATUS_PARAM)
+{
+ return (float32)v;
+}
+
+float32 uint32_to_float32(unsigned int v STATUS_PARAM)
+{
+ return (float32)v;
+}
+
+float64 int32_to_float64(int v STATUS_PARAM)
+{
+ return (float64)v;
+}
+
+float64 uint32_to_float64(unsigned int v STATUS_PARAM)
+{
+ return (float64)v;
+}
+
+#ifdef FLOATX80
+floatx80 int32_to_floatx80(int v STATUS_PARAM)
+{
+ return (floatx80)v;
+}
+#endif
+float32 int64_to_float32( int64_t v STATUS_PARAM)
+{
+ return (float32)v;
+}
+float32 uint64_to_float32( uint64_t v STATUS_PARAM)
+{
+ return (float32)v;
+}
+float64 int64_to_float64( int64_t v STATUS_PARAM)
+{
+ return (float64)v;
+}
+float64 uint64_to_float64( uint64_t v STATUS_PARAM)
+{
+ return (float64)v;
+}
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64_t v STATUS_PARAM)
+{
+ return (floatx80)v;
+}
+#endif
+
+/* XXX: this code implements the x86 behaviour, not the IEEE one. */
+#if HOST_LONG_BITS == 32
+static inline int long_to_int32(long a)
+{
+ return a;
+}
+#else
+static inline int long_to_int32(long a)
+{
+ if (a != (int32_t)a)
+ a = 0x80000000;
+ return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float32_to_int32( float32 a STATUS_PARAM)
+{
+ return long_to_int32(lrintf(a));
+}
+int float32_to_int32_round_to_zero( float32 a STATUS_PARAM)
+{
+ return (int)a;
+}
+int64_t float32_to_int64( float32 a STATUS_PARAM)
+{
+ return llrintf(a);
+}
+
+int64_t float32_to_int64_round_to_zero( float32 a STATUS_PARAM)
+{
+ return (int64_t)a;
+}
+
+float64 float32_to_float64( float32 a STATUS_PARAM)
+{
+ return a;
+}
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 a STATUS_PARAM)
+{
+ return a;
+}
+#endif
+
+unsigned int float32_to_uint32( float32 a STATUS_PARAM)
+{
+ int64_t v;
+ unsigned int res;
+
+ v = llrintf(a);
+ if (v < 0) {
+ res = 0;
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ } else {
+ res = v;
+ }
+ return res;
+}
+unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM)
+{
+ int64_t v;
+ unsigned int res;
+
+ v = (int64_t)a;
+ if (v < 0) {
+ res = 0;
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 a STATUS_PARAM)
+{
+ return rintf(a);
+}
+
+float32 float32_rem( float32 a, float32 b STATUS_PARAM)
+{
+ return remainderf(a, b);
+}
+
+float32 float32_sqrt( float32 a STATUS_PARAM)
+{
+ return sqrtf(a);
+}
+int float32_compare( float32 a, float32 b STATUS_PARAM )
+{
+ if (a < b) {
+ return float_relation_less;
+ } else if (a == b) {
+ return float_relation_equal;
+ } else if (a > b) {
+ return float_relation_greater;
+ } else {
+ return float_relation_unordered;
+ }
+}
+int float32_compare_quiet( float32 a, float32 b STATUS_PARAM )
+{
+ if (isless(a, b)) {
+ return float_relation_less;
+ } else if (a == b) {
+ return float_relation_equal;
+ } else if (isgreater(a, b)) {
+ return float_relation_greater;
+ } else {
+ return float_relation_unordered;
+ }
+}
+int float32_is_signaling_nan( float32 a1)
+{
+ float32u u;
+ uint32_t a;
+ u.f = a1;
+ a = u.i;
+ return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+}
+
+int float32_is_nan( float32 a1 )
+{
+ float32u u;
+ uint64_t a;
+ u.f = a1;
+ a = u.i;
+ return ( 0xFF800000 < ( a<<1 ) );
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float64_to_int32( float64 a STATUS_PARAM)
+{
+ return long_to_int32(lrint(a));
+}
+int float64_to_int32_round_to_zero( float64 a STATUS_PARAM)
+{
+ return (int)a;
+}
+int64_t float64_to_int64( float64 a STATUS_PARAM)
+{
+ return llrint(a);
+}
+int64_t float64_to_int64_round_to_zero( float64 a STATUS_PARAM)
+{
+ return (int64_t)a;
+}
+float32 float64_to_float32( float64 a STATUS_PARAM)
+{
+ return a;
+}
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 a STATUS_PARAM)
+{
+ return a;
+}
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 a STATUS_PARAM)
+{
+ return a;
+}
+#endif
+
+unsigned int float64_to_uint32( float64 a STATUS_PARAM)
+{
+ int64_t v;
+ unsigned int res;
+
+ v = llrint(a);
+ if (v < 0) {
+ res = 0;
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ } else {
+ res = v;
+ }
+ return res;
+}
+unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM)
+{
+ int64_t v;
+ unsigned int res;
+
+ v = (int64_t)a;
+ if (v < 0) {
+ res = 0;
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ } else {
+ res = v;
+ }
+ return res;
+}
+uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
+{
+ int64_t v;
+
+ v = llrint(a + (float64)INT64_MIN);
+
+ return v - INT64_MIN;
+}
+uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
+{
+ int64_t v;
+
+ v = (int64_t)(a + (float64)INT64_MIN);
+
+ return v - INT64_MIN;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+#if defined(__sun__) && \
+ (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10)
+static inline float64 trunc(float64 x)
+{
+ return x < 0 ? -floor(-x) : floor(x);
+}
+#endif
+float64 float64_trunc_to_int( float64 a STATUS_PARAM )
+{
+ return trunc(a);
+}
+
+float64 float64_round_to_int( float64 a STATUS_PARAM )
+{
+ return rint(a);
+}
+
+float64 float64_rem( float64 a, float64 b STATUS_PARAM)
+{
+ return remainder(a, b);
+}
+
+float64 float64_sqrt( float64 a STATUS_PARAM)
+{
+ return sqrt(a);
+}
+int float64_compare( float64 a, float64 b STATUS_PARAM )
+{
+ if (a < b) {
+ return float_relation_less;
+ } else if (a == b) {
+ return float_relation_equal;
+ } else if (a > b) {
+ return float_relation_greater;
+ } else {
+ return float_relation_unordered;
+ }
+}
+int float64_compare_quiet( float64 a, float64 b STATUS_PARAM )
+{
+ if (isless(a, b)) {
+ return float_relation_less;
+ } else if (a == b) {
+ return float_relation_equal;
+ } else if (isgreater(a, b)) {
+ return float_relation_greater;
+ } else {
+ return float_relation_unordered;
+ }
+}
+int float64_is_signaling_nan( float64 a1)
+{
+ float64u u;
+ uint64_t a;
+ u.f = a1;
+ a = u.i;
+ return
+ ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+ && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+int float64_is_nan( float64 a1 )
+{
+ float64u u;
+ uint64_t a;
+ u.f = a1;
+ a = u.i;
+
+ return ( LIT64( 0xFFF0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int floatx80_to_int32( floatx80 a STATUS_PARAM)
+{
+ return long_to_int32(lrintl(a));
+}
+int floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM)
+{
+ return (int)a;
+}
+int64_t floatx80_to_int64( floatx80 a STATUS_PARAM)
+{
+ return llrintl(a);
+}
+int64_t floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM)
+{
+ return (int64_t)a;
+}
+float32 floatx80_to_float32( floatx80 a STATUS_PARAM)
+{
+ return a;
+}
+float64 floatx80_to_float64( floatx80 a STATUS_PARAM)
+{
+ return a;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM)
+{
+ return rintl(a);
+}
+floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return remainderl(a, b);
+}
+floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM)
+{
+ return sqrtl(a);
+}
+int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ if (a < b) {
+ return float_relation_less;
+ } else if (a == b) {
+ return float_relation_equal;
+ } else if (a > b) {
+ return float_relation_greater;
+ } else {
+ return float_relation_unordered;
+ }
+}
+int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ if (isless(a, b)) {
+ return float_relation_less;
+ } else if (a == b) {
+ return float_relation_equal;
+ } else if (isgreater(a, b)) {
+ return float_relation_greater;
+ } else {
+ return float_relation_unordered;
+ }
+}
+int floatx80_is_signaling_nan( floatx80 a1)
+{
+ floatx80u u;
+ uint64_t aLow;
+ u.f = a1;
+
+ aLow = u.i.low & ~ LIT64( 0x4000000000000000 );
+ return
+ ( ( u.i.high & 0x7FFF ) == 0x7FFF )
+ && (bits64) ( aLow<<1 )
+ && ( u.i.low == aLow );
+}
+
+int floatx80_is_nan( floatx80 a1 )
+{
+ floatx80u u;
+ u.f = a1;
+ return ( ( u.i.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( u.i.low<<1 );
+}
+
+#endif
diff --git a/src/recompiler/fpu/softfloat-native.h b/src/recompiler/fpu/softfloat-native.h
new file mode 100644
index 00000000..13be732a
--- /dev/null
+++ b/src/recompiler/fpu/softfloat-native.h
@@ -0,0 +1,493 @@
+/* Native implementation of soft float functions */
+#define __C99FEATURES__
+#include <math.h>
+
+#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__) && !defined(__FreeBSD__)) \
+ || defined(CONFIG_SOLARIS) /* VBox: Added __FreeBSD__ */
+#include <ieeefp.h>
+#define fabsf(f) ((float)fabs(f))
+#else
+#include <fenv.h>
+#endif
+
+#if defined(__OpenBSD__) || defined(__NetBSD__)
+#include <sys/param.h>
+#endif
+
+/*
+ * Define some C99-7.12.3 classification macros and
+ * some C99-.12.4 for Solaris systems OS less than 10,
+ * or Solaris 10 systems running GCC 3.x or less.
+ * Solaris 10 with GCC4 does not need these macros as they
+ * are defined in <iso/math_c99.h> with a compiler directive
+ */
+#if defined(CONFIG_SOLARIS) && \
+ ((CONFIG_SOLARIS_VERSION <= 9 ) || \
+ ((CONFIG_SOLARIS_VERSION == 10) && (__GNUC__ < 4))) \
+ || (defined(__OpenBSD__) && (OpenBSD < 200811))
+/*
+ * C99 7.12.3 classification macros
+ * and
+ * C99 7.12.14 comparison macros
+ *
+ * ... do not work on Solaris 10 using GNU CC 3.4.x.
+ * Try to workaround the missing / broken C99 math macros.
+ */
+#if defined(__OpenBSD__)
+#define unordered(x, y) (isnan(x) || isnan(y))
+#endif
+
+#ifdef __NetBSD__
+#ifndef isgreater
+#define isgreater(x, y) __builtin_isgreater(x, y)
+#endif
+#ifndef isgreaterequal
+#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y)
+#endif
+#ifndef isless
+#define isless(x, y) __builtin_isless(x, y)
+#endif
+#ifndef islessequal
+#define islessequal(x, y) __builtin_islessequal(x, y)
+#endif
+#ifndef isunordered
+#define isunordered(x, y) __builtin_isunordered(x, y)
+#endif
+#endif
+
+
+#define isnormal(x) (fpclass(x) >= FP_NZERO)
+#define isgreater(x, y) ((!unordered(x, y)) && ((x) > (y)))
+#define isgreaterequal(x, y) ((!unordered(x, y)) && ((x) >= (y)))
+#define isless(x, y) ((!unordered(x, y)) && ((x) < (y)))
+#define islessequal(x, y) ((!unordered(x, y)) && ((x) <= (y)))
+#define isunordered(x,y) unordered(x, y)
+#endif
+
+#if defined(__sun__) && !defined(CONFIG_NEEDS_LIBSUNMATH)
+
+#ifndef isnan
+# define isnan(x) \
+ (sizeof (x) == sizeof (long double) ? isnan_ld (x) \
+ : sizeof (x) == sizeof (double) ? isnan_d (x) \
+ : isnan_f (x))
+static inline int isnan_f (float x) { return x != x; }
+static inline int isnan_d (double x) { return x != x; }
+static inline int isnan_ld (long double x) { return x != x; }
+#endif
+
+#ifndef isinf
+# define isinf(x) \
+ (sizeof (x) == sizeof (long double) ? isinf_ld (x) \
+ : sizeof (x) == sizeof (double) ? isinf_d (x) \
+ : isinf_f (x))
+static inline int isinf_f (float x) { return isnan (x - x); }
+static inline int isinf_d (double x) { return isnan (x - x); }
+static inline int isinf_ld (long double x) { return isnan (x - x); }
+#endif
+#endif
+
+typedef float float32;
+typedef double float64;
+#ifdef FLOATX80
+typedef long double floatx80;
+#endif
+
+typedef union {
+ float32 f;
+ uint32_t i;
+} float32u;
+typedef union {
+ float64 f;
+ uint64_t i;
+} float64u;
+#ifdef FLOATX80
+typedef union {
+ floatx80 f;
+ struct {
+ uint64_t low;
+ uint16_t high;
+ } i;
+} floatx80u;
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__)) \
+ || defined(CONFIG_SOLARIS)
+#if defined(__OpenBSD__)
+#define FE_RM FP_RM
+#define FE_RP FP_RP
+#define FE_RZ FP_RZ
+#endif
+enum {
+ float_round_nearest_even = FP_RN,
+ float_round_down = FP_RM,
+ float_round_up = FP_RP,
+ float_round_to_zero = FP_RZ
+};
+#else
+enum {
+ float_round_nearest_even = FE_TONEAREST,
+ float_round_down = FE_DOWNWARD,
+ float_round_up = FE_UPWARD,
+ float_round_to_zero = FE_TOWARDZERO
+};
+#endif
+
+typedef struct float_status {
+ int float_rounding_mode;
+#ifdef FLOATX80
+ int floatx80_rounding_precision;
+#endif
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int STATUS_PARAM);
+float32 uint32_to_float32( unsigned int STATUS_PARAM);
+float64 int32_to_float64( int STATUS_PARAM);
+float64 uint32_to_float64( unsigned int STATUS_PARAM);
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int STATUS_PARAM);
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( int STATUS_PARAM);
+#endif
+float32 int64_to_float32( int64_t STATUS_PARAM);
+float32 uint64_to_float32( uint64_t STATUS_PARAM);
+float64 int64_to_float64( int64_t STATUS_PARAM);
+float64 uint64_to_float64( uint64_t v STATUS_PARAM);
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64_t STATUS_PARAM);
+#endif
+#ifdef FLOAT128
+float128 int64_to_float128( int64_t STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float32_to_int32( float32 STATUS_PARAM);
+int float32_to_int32_round_to_zero( float32 STATUS_PARAM);
+unsigned int float32_to_uint32( float32 a STATUS_PARAM);
+unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM);
+int64_t float32_to_int64( float32 STATUS_PARAM);
+int64_t float32_to_int64_round_to_zero( float32 STATUS_PARAM);
+float64 float32_to_float64( float32 STATUS_PARAM);
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 STATUS_PARAM);
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32 STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 STATUS_PARAM);
+INLINE float32 float32_add( float32 a, float32 b STATUS_PARAM)
+{
+ return a + b;
+}
+INLINE float32 float32_sub( float32 a, float32 b STATUS_PARAM)
+{
+ return a - b;
+}
+INLINE float32 float32_mul( float32 a, float32 b STATUS_PARAM)
+{
+ return a * b;
+}
+INLINE float32 float32_div( float32 a, float32 b STATUS_PARAM)
+{
+ return a / b;
+}
+float32 float32_rem( float32, float32 STATUS_PARAM);
+float32 float32_sqrt( float32 STATUS_PARAM);
+INLINE int float32_eq( float32 a, float32 b STATUS_PARAM)
+{
+ return a == b;
+}
+INLINE int float32_le( float32 a, float32 b STATUS_PARAM)
+{
+ return a <= b;
+}
+INLINE int float32_lt( float32 a, float32 b STATUS_PARAM)
+{
+ return a < b;
+}
+INLINE int float32_eq_signaling( float32 a, float32 b STATUS_PARAM)
+{
+ return a <= b && a >= b;
+}
+INLINE int float32_le_quiet( float32 a, float32 b STATUS_PARAM)
+{
+ return islessequal(a, b);
+}
+INLINE int float32_lt_quiet( float32 a, float32 b STATUS_PARAM)
+{
+ return isless(a, b);
+}
+INLINE int float32_unordered( float32 a, float32 b STATUS_PARAM)
+{
+ return isunordered(a, b);
+
+}
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+int float32_is_signaling_nan( float32 );
+int float32_is_nan( float32 );
+
+INLINE float32 float32_abs(float32 a)
+{
+ return fabsf(a);
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+ return -a;
+}
+
+INLINE float32 float32_is_infinity(float32 a)
+{
+ return fpclassify(a) == FP_INFINITE;
+}
+
+INLINE float32 float32_is_neg(float32 a)
+{
+ float32u u;
+ u.f = a;
+ return u.i >> 31;
+}
+
+INLINE float32 float32_is_zero(float32 a)
+{
+ return fpclassify(a) == FP_ZERO;
+}
+
+INLINE float32 float32_scalbn(float32 a, int n)
+{
+ return scalbnf(a, n);
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float64_to_int32( float64 STATUS_PARAM );
+int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+unsigned int float64_to_uint32( float64 STATUS_PARAM );
+unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64_t float64_to_int64( float64 STATUS_PARAM );
+int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64_t float64_to_uint64( float64 STATUS_PARAM );
+uint64_t float64_to_uint64_round_to_zero( float64 STATUS_PARAM );
+float32 float64_to_float32( float64 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+INLINE float64 float64_add( float64 a, float64 b STATUS_PARAM)
+{
+ return a + b;
+}
+INLINE float64 float64_sub( float64 a, float64 b STATUS_PARAM)
+{
+ return a - b;
+}
+INLINE float64 float64_mul( float64 a, float64 b STATUS_PARAM)
+{
+ return a * b;
+}
+INLINE float64 float64_div( float64 a, float64 b STATUS_PARAM)
+{
+ return a / b;
+}
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_sqrt( float64 STATUS_PARAM );
+INLINE int float64_eq( float64 a, float64 b STATUS_PARAM)
+{
+ return a == b;
+}
+INLINE int float64_le( float64 a, float64 b STATUS_PARAM)
+{
+ return a <= b;
+}
+INLINE int float64_lt( float64 a, float64 b STATUS_PARAM)
+{
+ return a < b;
+}
+INLINE int float64_eq_signaling( float64 a, float64 b STATUS_PARAM)
+{
+ return a <= b && a >= b;
+}
+INLINE int float64_le_quiet( float64 a, float64 b STATUS_PARAM)
+{
+ return islessequal(a, b);
+}
+INLINE int float64_lt_quiet( float64 a, float64 b STATUS_PARAM)
+{
+ return isless(a, b);
+
+}
+INLINE int float64_unordered( float64 a, float64 b STATUS_PARAM)
+{
+ return isunordered(a, b);
+
+}
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+int float64_is_signaling_nan( float64 );
+int float64_is_nan( float64 );
+
+INLINE float64 float64_abs(float64 a)
+{
+ return fabs(a);
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+ return -a;
+}
+
+INLINE float64 float64_is_infinity(float64 a)
+{
+ return fpclassify(a) == FP_INFINITE;
+}
+
+INLINE float64 float64_is_neg(float64 a)
+{
+ float64u u;
+ u.f = a;
+ return u.i >> 63;
+}
+
+INLINE float64 float64_is_zero(float64 a)
+{
+ return fpclassify(a) == FP_ZERO;
+}
+
+INLINE float64 float64_scalbn(float64 a, int n)
+{
+ return scalbn(a, n);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int floatx80_to_int32( floatx80 STATUS_PARAM );
+int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64_t floatx80_to_int64( floatx80 STATUS_PARAM);
+int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM);
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+INLINE floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a + b;
+}
+INLINE floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a - b;
+}
+INLINE floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a * b;
+}
+INLINE floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a / b;
+}
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+INLINE int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a == b;
+}
+INLINE int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a <= b;
+}
+INLINE int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a < b;
+}
+INLINE int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return a <= b && a >= b;
+}
+INLINE int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return islessequal(a, b);
+}
+INLINE int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return isless(a, b);
+
+}
+INLINE int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ return isunordered(a, b);
+
+}
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_signaling_nan( floatx80 );
+int floatx80_is_nan( floatx80 );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+ return fabsl(a);
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+ return -a;
+}
+
+INLINE floatx80 floatx80_is_infinity(floatx80 a)
+{
+ return fpclassify(a) == FP_INFINITE;
+}
+
+INLINE floatx80 floatx80_is_neg(floatx80 a)
+{
+ floatx80u u;
+ u.f = a;
+ return u.i.high >> 15;
+}
+
+INLINE floatx80 floatx80_is_zero(floatx80 a)
+{
+ return fpclassify(a) == FP_ZERO;
+}
+
+INLINE floatx80 floatx80_scalbn(floatx80 a, int n)
+{
+ return scalbnl(a, n);
+}
+
+#endif
diff --git a/src/recompiler/fpu/softfloat-specialize.h b/src/recompiler/fpu/softfloat-specialize.h
new file mode 100644
index 00000000..8e6aceb5
--- /dev/null
+++ b/src/recompiler/fpu/softfloat-specialize.h
@@ -0,0 +1,581 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#if defined(TARGET_MIPS) || defined(TARGET_HPPA)
+#define SNAN_BIT_IS_ONE 1
+#else
+#define SNAN_BIT_IS_ONE 0
+#endif
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'. Floating-point traps can be
+| defined here if desired. It is currently not possible for such a trap
+| to substitute a result value. If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void float_raise( int8 flags STATUS_PARAM )
+{
+ STATUS(float_exception_flags) |= flags;
+}
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+typedef struct {
+ flag sign;
+ bits64 high, low;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float32_default_nan make_float32(0x7FFFFFFF)
+#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float32_default_nan make_float32(0x7FC00000)
+#elif defined(TARGET_HPPA)
+#define float32_default_nan make_float32(0x7FA00000)
+#elif SNAN_BIT_IS_ONE
+#define float32_default_nan make_float32(0x7FBFFFFF)
+#else
+#define float32_default_nan make_float32(0xFFC00000)
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float32_is_nan( float32 a_ )
+{
+ uint32_t a = float32_val(a_);
+#if SNAN_BIT_IS_ONE
+ return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+#else
+ return ( 0xFF800000 <= (bits32) ( a<<1 ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float32_is_signaling_nan( float32 a_ )
+{
+ uint32_t a = float32_val(a_);
+#if SNAN_BIT_IS_ONE
+ return ( 0xFF800000 <= (bits32) ( a<<1 ) );
+#else
+ return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float32ToCommonNaN( float32 a STATUS_PARAM )
+{
+ commonNaNT z;
+
+ if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
+ z.sign = float32_val(a)>>31;
+ z.low = 0;
+ z.high = ( (bits64) float32_val(a) )<<41;
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+ bits32 mantissa = a.high>>41;
+ if ( mantissa )
+ return make_float32(
+ ( ( (bits32) a.sign )<<31 ) | 0x7F800000 | ( a.high>>41 ) );
+ else
+ return float32_default_nan;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32NaN( float32 a, float32 b STATUS_PARAM)
+{
+ flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+ bits32 av, bv, res;
+
+ if ( STATUS(default_nan_mode) )
+ return float32_default_nan;
+
+ aIsNaN = float32_is_nan( a );
+ aIsSignalingNaN = float32_is_signaling_nan( a );
+ bIsNaN = float32_is_nan( b );
+ bIsSignalingNaN = float32_is_signaling_nan( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+#if SNAN_BIT_IS_ONE
+ av &= ~0x00400000;
+ bv &= ~0x00400000;
+#else
+ av |= 0x00400000;
+ bv |= 0x00400000;
+#endif
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+ if ( aIsSignalingNaN ) {
+ if ( bIsSignalingNaN ) goto returnLargerSignificand;
+ res = bIsNaN ? bv : av;
+ }
+ else if ( aIsNaN ) {
+ if ( bIsSignalingNaN || ! bIsNaN )
+ res = av;
+ else {
+ returnLargerSignificand:
+ if ( (bits32) ( av<<1 ) < (bits32) ( bv<<1 ) )
+ res = bv;
+ else if ( (bits32) ( bv<<1 ) < (bits32) ( av<<1 ) )
+ res = av;
+ else
+ res = ( av < bv ) ? av : bv;
+ }
+ }
+ else {
+ res = bv;
+ }
+ return make_float32(res);
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF ))
+#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 ))
+#elif defined(TARGET_HPPA)
+#define float64_default_nan make_float64(LIT64( 0x7FF4000000000000 ))
+#elif SNAN_BIT_IS_ONE
+#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF ))
+#else
+#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 ))
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float64_is_nan( float64 a_ )
+{
+ bits64 a = float64_val(a_);
+#if SNAN_BIT_IS_ONE
+ return
+ ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+ && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+#else
+ return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float64_is_signaling_nan( float64 a_ )
+{
+ bits64 a = float64_val(a_);
+#if SNAN_BIT_IS_ONE
+ return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) );
+#else
+ return
+ ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+ && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float64ToCommonNaN( float64 a STATUS_PARAM)
+{
+ commonNaNT z;
+
+ if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+ z.sign = float64_val(a)>>63;
+ z.low = 0;
+ z.high = float64_val(a)<<12;
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+ bits64 mantissa = a.high>>12;
+
+ if ( mantissa )
+ return make_float64(
+ ( ( (bits64) a.sign )<<63 )
+ | LIT64( 0x7FF0000000000000 )
+ | ( a.high>>12 ));
+ else
+ return float64_default_nan;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64NaN( float64 a, float64 b STATUS_PARAM)
+{
+ flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+ bits64 av, bv, res;
+
+ if ( STATUS(default_nan_mode) )
+ return float64_default_nan;
+
+ aIsNaN = float64_is_nan( a );
+ aIsSignalingNaN = float64_is_signaling_nan( a );
+ bIsNaN = float64_is_nan( b );
+ bIsSignalingNaN = float64_is_signaling_nan( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+#if SNAN_BIT_IS_ONE
+ av &= ~LIT64( 0x0008000000000000 );
+ bv &= ~LIT64( 0x0008000000000000 );
+#else
+ av |= LIT64( 0x0008000000000000 );
+ bv |= LIT64( 0x0008000000000000 );
+#endif
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+ if ( aIsSignalingNaN ) {
+ if ( bIsSignalingNaN ) goto returnLargerSignificand;
+ res = bIsNaN ? bv : av;
+ }
+ else if ( aIsNaN ) {
+ if ( bIsSignalingNaN || ! bIsNaN )
+ res = av;
+ else {
+ returnLargerSignificand:
+ if ( (bits64) ( av<<1 ) < (bits64) ( bv<<1 ) )
+ res = bv;
+ else if ( (bits64) ( bv<<1 ) < (bits64) ( av<<1 ) )
+ res = av;
+ else
+ res = ( av < bv ) ? av : bv;
+ }
+ }
+ else {
+ res = bv;
+ }
+ return make_float64(res);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN. The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define floatx80_default_nan_high 0x7FFF
+#define floatx80_default_nan_low LIT64( 0xBFFFFFFFFFFFFFFF )
+#else
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low LIT64( 0xC000000000000000 )
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| quiet NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int floatx80_is_nan( floatx80 a )
+{
+#if SNAN_BIT_IS_ONE
+ bits64 aLow;
+
+ aLow = a.low & ~ LIT64( 0x4000000000000000 );
+ return
+ ( ( a.high & 0x7FFF ) == 0x7FFF )
+ && (bits64) ( aLow<<1 )
+ && ( a.low == aLow );
+#else
+ return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int floatx80_is_signaling_nan( floatx80 a )
+{
+#if SNAN_BIT_IS_ONE
+ return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+#else
+ bits64 aLow;
+
+ aLow = a.low & ~ LIT64( 0x4000000000000000 );
+ return
+ ( ( a.high & 0x7FFF ) == 0x7FFF )
+ && (bits64) ( aLow<<1 )
+ && ( a.low == aLow );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT floatx80ToCommonNaN( floatx80 a STATUS_PARAM)
+{
+ commonNaNT z;
+
+ if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+ z.sign = a.high>>15;
+ z.low = 0;
+ z.high = a.low;
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+ floatx80 z;
+
+ if (a.high)
+ z.low = a.high;
+ else
+ z.low = floatx80_default_nan_low;
+ z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result. If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+ if ( STATUS(default_nan_mode) ) {
+ a.low = floatx80_default_nan_low;
+ a.high = floatx80_default_nan_high;
+ return a;
+ }
+
+ aIsNaN = floatx80_is_nan( a );
+ aIsSignalingNaN = floatx80_is_signaling_nan( a );
+ bIsNaN = floatx80_is_nan( b );
+ bIsSignalingNaN = floatx80_is_signaling_nan( b );
+#if SNAN_BIT_IS_ONE
+ a.low &= ~LIT64( 0xC000000000000000 );
+ b.low &= ~LIT64( 0xC000000000000000 );
+#else
+ a.low |= LIT64( 0xC000000000000000 );
+ b.low |= LIT64( 0xC000000000000000 );
+#endif
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+ if ( aIsSignalingNaN ) {
+ if ( bIsSignalingNaN ) goto returnLargerSignificand;
+ return bIsNaN ? b : a;
+ }
+ else if ( aIsNaN ) {
+ if ( bIsSignalingNaN || ! bIsNaN ) return a;
+ returnLargerSignificand:
+ if ( a.low < b.low ) return b;
+ if ( b.low < a.low ) return a;
+ return ( a.high < b.high ) ? a : b;
+ }
+ else {
+ return b;
+ }
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN. The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
+#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
+#else
+#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
+#define float128_default_nan_low LIT64( 0x0000000000000000 )
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float128_is_nan( float128 a )
+{
+#if SNAN_BIT_IS_ONE
+ return
+ ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+ && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+#else
+ return
+ ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+ && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float128_is_signaling_nan( float128 a )
+{
+#if SNAN_BIT_IS_ONE
+ return
+ ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+ && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+#else
+ return
+ ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+ && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float128ToCommonNaN( float128 a STATUS_PARAM)
+{
+ commonNaNT z;
+
+ if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+ z.sign = a.high>>63;
+ shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float128 commonNaNToFloat128( commonNaNT a )
+{
+ float128 z;
+
+ shift128Right( a.high, a.low, 16, &z.high, &z.low );
+ z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF000000000000 );
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result. If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float128 propagateFloat128NaN( float128 a, float128 b STATUS_PARAM)
+{
+ flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+ if ( STATUS(default_nan_mode) ) {
+ a.low = float128_default_nan_low;
+ a.high = float128_default_nan_high;
+ return a;
+ }
+
+ aIsNaN = float128_is_nan( a );
+ aIsSignalingNaN = float128_is_signaling_nan( a );
+ bIsNaN = float128_is_nan( b );
+ bIsSignalingNaN = float128_is_signaling_nan( b );
+#if SNAN_BIT_IS_ONE
+ a.high &= ~LIT64( 0x0000800000000000 );
+ b.high &= ~LIT64( 0x0000800000000000 );
+#else
+ a.high |= LIT64( 0x0000800000000000 );
+ b.high |= LIT64( 0x0000800000000000 );
+#endif
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+ if ( aIsSignalingNaN ) {
+ if ( bIsSignalingNaN ) goto returnLargerSignificand;
+ return bIsNaN ? b : a;
+ }
+ else if ( aIsNaN ) {
+ if ( bIsSignalingNaN || ! bIsNaN ) return a;
+ returnLargerSignificand:
+ if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;
+ if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;
+ return ( a.high < b.high ) ? a : b;
+ }
+ else {
+ return b;
+ }
+}
+
+#endif
diff --git a/src/recompiler/fpu/softfloat.c b/src/recompiler/fpu/softfloat.c
new file mode 100644
index 00000000..6806ebc2
--- /dev/null
+++ b/src/recompiler/fpu/softfloat.c
@@ -0,0 +1,5883 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/* FIXME: Flush-To-Zero only effects results. Denormal inputs should also
+ be flushed to zero. */
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine: (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output. These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+void set_float_rounding_mode(int val STATUS_PARAM)
+{
+ STATUS(float_rounding_mode) = val;
+}
+
+void set_float_exception_flags(int val STATUS_PARAM)
+{
+ STATUS(float_exception_flags) = val;
+}
+
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM)
+{
+ STATUS(floatx80_rounding_precision) = val;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input. If `zSign' is 1, the input is negated before being converted to an
+| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer. However, if the fixed-
+| point input is too large, the invalid exception is raised and the largest
+| positive or negative integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven;
+ int8 roundIncrement, roundBits;
+ int32 z;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ roundIncrement = 0x40;
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = 0x7F;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = absZ & 0x7F;
+ absZ = ( absZ + roundIncrement )>>7;
+ absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+ z = absZ;
+ if ( zSign ) z = - z;
+ if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer. However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest positive or negative integer is
+| returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment;
+ int64 z;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ increment = ( (sbits64) absZ1 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && absZ1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && absZ1;
+ }
+ }
+ }
+ if ( increment ) {
+ ++absZ0;
+ if ( absZ0 == 0 ) goto overflow;
+ absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+ }
+ z = absZ0;
+ if ( zSign ) z = - z;
+ if ( z && ( ( z < 0 ) ^ zSign ) ) {
+ overflow:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return
+ zSign ? (sbits64) LIT64( 0x8000000000000000 )
+ : LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits32 extractFloat32Frac( float32 a )
+{
+
+ return float32_val(a) & 0x007FFFFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat32Exp( float32 a )
+{
+
+ return ( float32_val(a)>>23 ) & 0xFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat32Sign( float32 a )
+{
+
+ return float32_val(a)>>31;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'. The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros32( aSig ) - 8;
+ *zSigPtr = aSig<<shiftCount;
+ *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result. After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result. This means that any integer portion of `zSig'
+| will be added into the exponent. Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+
+ return make_float32(
+ ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input. Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+| The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location. This shifted
+| significand must be normalized or smaller. If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding. In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven;
+ int8 roundIncrement, roundBits;
+ flag isTiny;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ roundIncrement = 0x40;
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = 0x7F;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = zSig & 0x7F;
+ if ( 0xFD <= (bits16) zExp ) {
+ if ( ( 0xFD < zExp )
+ || ( ( zExp == 0xFD )
+ && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+ ) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
+ }
+ if ( zExp < 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < -1 )
+ || ( zSig + roundIncrement < 0x80000000 );
+ shift32RightJamming( zSig, - zExp, &zSig );
+ zExp = 0;
+ roundBits = zSig & 0x7F;
+ if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+ }
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig = ( zSig + roundIncrement )>>7;
+ zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+ if ( zSig == 0 ) zExp = 0;
+ return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input. This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float32
+ normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros32( zSig ) - 1;
+ return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat64Frac( float64 a )
+{
+
+ return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat64Exp( float64 a )
+{
+
+ return ( float64_val(a)>>52 ) & 0x7FF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat64Sign( float64 a )
+{
+
+ return float64_val(a)>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'. The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros64( aSig ) - 11;
+ *zSigPtr = aSig<<shiftCount;
+ *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result. After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result. This means that any integer portion of `zSig'
+| will be added into the exponent. Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+
+ return make_float64(
+ ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input. Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+| The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location. This shifted
+| significand must be normalized or smaller. If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding. In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven;
+ int16 roundIncrement, roundBits;
+ flag isTiny;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ roundIncrement = 0x200;
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = 0x3FF;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = zSig & 0x3FF;
+ if ( 0x7FD <= (bits16) zExp ) {
+ if ( ( 0x7FD < zExp )
+ || ( ( zExp == 0x7FD )
+ && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+ ) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
+ }
+ if ( zExp < 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < -1 )
+ || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+ shift64RightJamming( zSig, - zExp, &zSig );
+ zExp = 0;
+ roundBits = zSig & 0x3FF;
+ if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+ }
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig = ( zSig + roundIncrement )>>10;
+ zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+ if ( zSig == 0 ) zExp = 0;
+ return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input. This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float64
+ normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros64( zSig ) - 1;
+ return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+
+ return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+
+ return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+
+ return a.high>>15;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'. The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros64( aSig );
+ *zSigPtr = aSig<<shiftCount;
+ *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+ floatx80 z;
+
+ z.low = zSig;
+ z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input. Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+| If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively. Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+| The input significand must be normalized or smaller. If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding. The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ roundAndPackFloatx80(
+ int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment, isTiny;
+ int64 roundIncrement, roundMask, roundBits;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ if ( roundingPrecision == 80 ) goto precision80;
+ if ( roundingPrecision == 64 ) {
+ roundIncrement = LIT64( 0x0000000000000400 );
+ roundMask = LIT64( 0x00000000000007FF );
+ }
+ else if ( roundingPrecision == 32 ) {
+ roundIncrement = LIT64( 0x0000008000000000 );
+ roundMask = LIT64( 0x000000FFFFFFFFFF );
+ }
+ else {
+ goto precision80;
+ }
+ zSig0 |= ( zSig1 != 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = roundMask;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = zSig0 & roundMask;
+ if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+ if ( ( 0x7FFE < zExp )
+ || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+ ) {
+ goto overflow;
+ }
+ if ( zExp <= 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < 0 )
+ || ( zSig0 <= zSig0 + roundIncrement );
+ shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+ zExp = 0;
+ roundBits = zSig0 & roundMask;
+ if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig0 += roundIncrement;
+ if ( (sbits64) zSig0 < 0 ) zExp = 1;
+ roundIncrement = roundMask + 1;
+ if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+ roundMask |= roundIncrement;
+ }
+ zSig0 &= ~ roundMask;
+ return packFloatx80( zSign, zExp, zSig0 );
+ }
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig0 += roundIncrement;
+ if ( zSig0 < roundIncrement ) {
+ ++zExp;
+ zSig0 = LIT64( 0x8000000000000000 );
+ }
+ roundIncrement = roundMask + 1;
+ if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+ roundMask |= roundIncrement;
+ }
+ zSig0 &= ~ roundMask;
+ if ( zSig0 == 0 ) zExp = 0;
+ return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+ increment = ( (sbits64) zSig1 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig1;
+ }
+ }
+ }
+ if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+ if ( ( 0x7FFE < zExp )
+ || ( ( zExp == 0x7FFE )
+ && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+ && increment
+ )
+ ) {
+ roundMask = 0;
+ overflow:
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ if ( ( roundingMode == float_round_to_zero )
+ || ( zSign && ( roundingMode == float_round_up ) )
+ || ( ! zSign && ( roundingMode == float_round_down ) )
+ ) {
+ return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( zExp <= 0 ) {
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < 0 )
+ || ! increment
+ || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+ shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+ zExp = 0;
+ if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
+ if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ if ( roundNearestEven ) {
+ increment = ( (sbits64) zSig1 < 0 );
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig1;
+ }
+ }
+ if ( increment ) {
+ ++zSig0;
+ zSig0 &=
+ ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+ if ( (sbits64) zSig0 < 0 ) zExp = 1;
+ }
+ return packFloatx80( zSign, zExp, zSig0 );
+ }
+ }
+ if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ if ( increment ) {
+ ++zSig0;
+ if ( zSig0 == 0 ) {
+ ++zExp;
+ zSig0 = LIT64( 0x8000000000000000 );
+ }
+ else {
+ zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+ }
+ }
+ else {
+ if ( zSig0 == 0 ) zExp = 0;
+ }
+ return packFloatx80( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input. This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ normalizeRoundAndPackFloatx80(
+ int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ STATUS_PARAM)
+{
+ int8 shiftCount;
+
+ if ( zSig0 == 0 ) {
+ zSig0 = zSig1;
+ zSig1 = 0;
+ zExp -= 64;
+ }
+ shiftCount = countLeadingZeros64( zSig0 );
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ zExp -= shiftCount;
+ return
+ roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac1( float128 a )
+{
+
+ return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac0( float128 a )
+{
+
+ return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloat128Exp( float128 a )
+{
+
+ return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat128Sign( float128 a )
+{
+
+ return a.high>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'. The normalized exponent is stored at the location
+| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat128Subnormal(
+ bits64 aSig0,
+ bits64 aSig1,
+ int32 *zExpPtr,
+ bits64 *zSig0Ptr,
+ bits64 *zSig1Ptr
+ )
+{
+ int8 shiftCount;
+
+ if ( aSig0 == 0 ) {
+ shiftCount = countLeadingZeros64( aSig1 ) - 15;
+ if ( shiftCount < 0 ) {
+ *zSig0Ptr = aSig1>>( - shiftCount );
+ *zSig1Ptr = aSig1<<( shiftCount & 63 );
+ }
+ else {
+ *zSig0Ptr = aSig1<<shiftCount;
+ *zSig1Ptr = 0;
+ }
+ *zExpPtr = - shiftCount - 63;
+ }
+ else {
+ shiftCount = countLeadingZeros64( aSig0 ) - 15;
+ shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+ *zExpPtr = 1 - shiftCount;
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result. After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result. This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float128
+ packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+ float128 z;
+
+ z.low = zSig1;
+ z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input. Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+| The input significand must be normalized or smaller. If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding. In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent. The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128
+ roundAndPackFloat128(
+ flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment, isTiny;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ increment = ( (sbits64) zSig2 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig2;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig2;
+ }
+ }
+ }
+ if ( 0x7FFD <= (bits32) zExp ) {
+ if ( ( 0x7FFD < zExp )
+ || ( ( zExp == 0x7FFD )
+ && eq128(
+ LIT64( 0x0001FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF ),
+ zSig0,
+ zSig1
+ )
+ && increment
+ )
+ ) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ if ( ( roundingMode == float_round_to_zero )
+ || ( zSign && ( roundingMode == float_round_up ) )
+ || ( ! zSign && ( roundingMode == float_round_down ) )
+ ) {
+ return
+ packFloat128(
+ zSign,
+ 0x7FFE,
+ LIT64( 0x0000FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF )
+ );
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( zExp < 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < -1 )
+ || ! increment
+ || lt128(
+ zSig0,
+ zSig1,
+ LIT64( 0x0001FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF )
+ );
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+ zExp = 0;
+ if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
+ if ( roundNearestEven ) {
+ increment = ( (sbits64) zSig2 < 0 );
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig2;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig2;
+ }
+ }
+ }
+ }
+ if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ if ( increment ) {
+ add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+ zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+ }
+ else {
+ if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+ }
+ return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input. This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+static float128
+ normalizeRoundAndPackFloat128(
+ flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
+{
+ int8 shiftCount;
+ bits64 zSig2;
+
+ if ( zSig0 == 0 ) {
+ zSig0 = zSig1;
+ zSig1 = 0;
+ zExp -= 64;
+ }
+ shiftCount = countLeadingZeros64( zSig0 ) - 15;
+ if ( 0 <= shiftCount ) {
+ zSig2 = 0;
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ }
+ else {
+ shift128ExtraRightJamming(
+ zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+ }
+ zExp -= shiftCount;
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int32_to_float32( int32 a STATUS_PARAM )
+{
+ flag zSign;
+
+ if ( a == 0 ) return float32_zero;
+ if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+ zSign = ( a < 0 );
+ return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64( int32 a STATUS_PARAM )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ bits64 zSig;
+
+ if ( a == 0 ) return float64_zero;
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 21;
+ zSig = absA;
+ return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ bits64 zSig;
+
+ if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 32;
+ zSig = absA;
+ return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a' to
+| the quadruple-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int32_to_float128( int32 a STATUS_PARAM )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ bits64 zSig0;
+
+ if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 17;
+ zSig0 = absA;
+ return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32( int64 a STATUS_PARAM )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+
+ if ( a == 0 ) return float32_zero;
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA ) - 40;
+ if ( 0 <= shiftCount ) {
+ return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
+ }
+ else {
+ shiftCount += 7;
+ if ( shiftCount < 0 ) {
+ shift64RightJamming( absA, - shiftCount, &absA );
+ }
+ else {
+ absA <<= shiftCount;
+ }
+ return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
+ }
+
+}
+
+float32 uint64_to_float32( uint64 a STATUS_PARAM )
+{
+ int8 shiftCount;
+
+ if ( a == 0 ) return float32_zero;
+ shiftCount = countLeadingZeros64( a ) - 40;
+ if ( 0 <= shiftCount ) {
+ return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
+ }
+ else {
+ shiftCount += 7;
+ if ( shiftCount < 0 ) {
+ shift64RightJamming( a, - shiftCount, &a );
+ }
+ else {
+ a <<= shiftCount;
+ }
+ return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64( int64 a STATUS_PARAM )
+{
+ flag zSign;
+
+ if ( a == 0 ) return float64_zero;
+ if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
+ return packFloat64( 1, 0x43E, 0 );
+ }
+ zSign = ( a < 0 );
+ return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
+
+}
+
+float64 uint64_to_float64( uint64 a STATUS_PARAM )
+{
+ if ( a == 0 ) return float64_zero;
+ return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+
+ if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA );
+ return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128( int64 a STATUS_PARAM )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+ int32 zExp;
+ bits64 zSig0, zSig1;
+
+ if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA ) + 49;
+ zExp = 0x406E - shiftCount;
+ if ( 64 <= shiftCount ) {
+ zSig1 = 0;
+ zSig0 = absA;
+ shiftCount -= 64;
+ }
+ else {
+ zSig1 = absA;
+ zSig0 = 0;
+ }
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits32 aSig;
+ bits64 aSig64;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
+ if ( aExp ) aSig |= 0x00800000;
+ shiftCount = 0xAF - aExp;
+ aSig64 = aSig;
+ aSig64 <<= 32;
+ if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
+ return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits32 aSig;
+ int32 z;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = aExp - 0x9E;
+ if ( 0 <= shiftCount ) {
+ if ( float32_val(a) != 0xCF000000 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+ }
+ return (sbits32) 0x80000000;
+ }
+ else if ( aExp <= 0x7E ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig = ( aSig | 0x00800000 )<<8;
+ z = aSig>>( - shiftCount );
+ if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits32 aSig;
+ bits64 aSig64, aSigExtra;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = 0xBE - aExp;
+ if ( shiftCount < 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ if ( aExp ) aSig |= 0x00800000;
+ aSig64 = aSig;
+ aSig64 <<= 40;
+ shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
+ return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero. If
+| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits32 aSig;
+ bits64 aSig64;
+ int64 z;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = aExp - 0xBE;
+ if ( 0 <= shiftCount ) {
+ if ( float32_val(a) != 0xDF000000 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ else if ( aExp <= 0x7E ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig64 = aSig | 0x00800000;
+ aSig64 <<= 40;
+ z = aSig64>>( - shiftCount );
+ if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ aSig |= 0x00800000;
+ return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float32_to_float128( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
+ return packFloat128( aSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int( float32 a STATUS_PARAM)
+{
+ flag aSign;
+ int16 aExp;
+ bits32 lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ bits32 z;
+
+ aExp = extractFloat32Exp( a );
+ if ( 0x96 <= aExp ) {
+ if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+ return propagateFloat32NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp <= 0x7E ) {
+ if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloat32Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+ return packFloat32( aSign, 0x7F, 0 );
+ }
+ break;
+ case float_round_down:
+ return make_float32(aSign ? 0xBF800000 : 0);
+ case float_round_up:
+ return make_float32(aSign ? 0x80000000 : 0x3F800000);
+ }
+ return packFloat32( aSign, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x96 - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z = float32_val(a);
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z += lastBitMask>>1;
+ if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
+ z += roundBitsMask;
+ }
+ }
+ z &= ~ roundBitsMask;
+ if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return make_float32(z);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
+{
+ int16 aExp, bExp, zExp;
+ bits32 aSig, bSig, zSig;
+ int16 expDiff;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 6;
+ bSig <<= 6;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= 0x20000000;
+ }
+ shift32RightJamming( bSig, expDiff, &bSig );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= 0x20000000;
+ }
+ shift32RightJamming( aSig, - expDiff, &aSig );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0xFF ) {
+ if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( aExp == 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
+ return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+ }
+ zSig = 0x40000000 + aSig + bSig;
+ zExp = aExp;
+ goto roundAndPack;
+ }
+ aSig |= 0x20000000;
+ zSig = ( aSig + bSig )<<1;
+ --zExp;
+ if ( (sbits32) zSig < 0 ) {
+ zSig = aSig + bSig;
+ ++zExp;
+ }
+ roundAndPack:
+ return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
+{
+ int16 aExp, bExp, zExp;
+ bits32 aSig, bSig, zSig;
+ int16 expDiff;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 7;
+ bSig <<= 7;
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0xFF ) {
+ if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ if ( bSig < aSig ) goto aBigger;
+ if ( aSig < bSig ) goto bBigger;
+ return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return packFloat32( zSign ^ 1, 0xFF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= 0x40000000;
+ }
+ shift32RightJamming( aSig, - expDiff, &aSig );
+ bSig |= 0x40000000;
+ bBigger:
+ zSig = bSig - aSig;
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= 0x40000000;
+ }
+ shift32RightJamming( bSig, expDiff, &bSig );
+ aSig |= 0x40000000;
+ aBigger:
+ zSig = aSig - bSig;
+ zExp = aExp;
+ normalizeRoundAndPack:
+ --zExp;
+ return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ if ( aSign == bSign ) {
+ return addFloat32Sigs( a, b, aSign STATUS_VAR);
+ }
+ else {
+ return subFloat32Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ if ( aSign == bSign ) {
+ return subFloat32Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloat32Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int16 aExp, bExp, zExp;
+ bits32 aSig, bSig;
+ bits64 zSig64;
+ bits32 zSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ bSign = extractFloat32Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0xFF ) {
+ if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+ return propagateFloat32NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+ normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+ }
+ zExp = aExp + bExp - 0x7F;
+ aSig = ( aSig | 0x00800000 )<<7;
+ bSig = ( bSig | 0x00800000 )<<8;
+ shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+ zSig = zSig64;
+ if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+ zSig <<= 1;
+ --zExp;
+ }
+ return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int16 aExp, bExp, zExp;
+ bits32 aSig, bSig, zSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ bSign = extractFloat32Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return packFloat32( zSign, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = aExp - bExp + 0x7D;
+ aSig = ( aSig | 0x00800000 )<<7;
+ bSig = ( bSig | 0x00800000 )<<8;
+ if ( bSig <= ( aSig + aSig ) ) {
+ aSig >>= 1;
+ ++zExp;
+ }
+ zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+ if ( ( zSig & 0x3F ) == 0 ) {
+ zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
+ }
+ return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the single-precision floating-point value `a'
+| with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_rem( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int16 aExp, bExp, expDiff;
+ bits32 aSig, bSig;
+ bits32 q;
+ bits64 aSig64, bSig64, q64;
+ bits32 alternateASig;
+ sbits32 sigMean;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ if ( aExp == 0xFF ) {
+ if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+ return propagateFloat32NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return a;
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ expDiff = aExp - bExp;
+ aSig |= 0x00800000;
+ bSig |= 0x00800000;
+ if ( expDiff < 32 ) {
+ aSig <<= 8;
+ bSig <<= 8;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ aSig >>= 1;
+ }
+ q = ( bSig <= aSig );
+ if ( q ) aSig -= bSig;
+ if ( 0 < expDiff ) {
+ q = ( ( (bits64) aSig )<<32 ) / bSig;
+ q >>= 32 - expDiff;
+ bSig >>= 2;
+ aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+ }
+ else {
+ aSig >>= 2;
+ bSig >>= 2;
+ }
+ }
+ else {
+ if ( bSig <= aSig ) aSig -= bSig;
+ aSig64 = ( (bits64) aSig )<<40;
+ bSig64 = ( (bits64) bSig )<<40;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+ q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+ aSig64 = - ( ( bSig * q64 )<<38 );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+ q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+ q = q64>>( 64 - expDiff );
+ bSig <<= 6;
+ aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+ }
+ do {
+ alternateASig = aSig;
+ ++q;
+ aSig -= bSig;
+ } while ( 0 <= (sbits32) aSig );
+ sigMean = aSig + alternateASig;
+ if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+ aSig = alternateASig;
+ }
+ zSign = ( (sbits32) aSig < 0 );
+ if ( zSign ) aSig = - aSig;
+ return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, zExp;
+ bits32 aSig, zSig;
+ bits64 rem, term;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+ if ( ! aSign ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig ) == 0 ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return float32_zero;
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+ aSig = ( aSig | 0x00800000 )<<8;
+ zSig = estimateSqrt32( aExp, aSig ) + 2;
+ if ( ( zSig & 0x7F ) <= 5 ) {
+ if ( zSig < 2 ) {
+ zSig = 0x7FFFFFFF;
+ goto roundAndPack;
+ }
+ aSig >>= aExp & 1;
+ term = ( (bits64) zSig ) * zSig;
+ rem = ( ( (bits64) aSig )<<32 ) - term;
+ while ( (sbits64) rem < 0 ) {
+ --zSig;
+ rem += ( ( (bits64) zSig )<<1 ) | 1;
+ }
+ zSig |= ( rem != 0 );
+ }
+ shift32RightJamming( zSig, 1, &zSig );
+ roundAndPack:
+ return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the binary exponential of the single-precision floating-point value
+| `a'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+|
+| Uses the following identities:
+|
+| 1. -------------------------------------------------------------------------
+| x x*ln(2)
+| 2 = e
+|
+| 2. -------------------------------------------------------------------------
+| 2 3 4 5 n
+| x x x x x x x
+| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
+| 1! 2! 3! 4! 5! n!
+*----------------------------------------------------------------------------*/
+
+static const float64 float32_exp2_coefficients[15] =
+{
+ make_float64( 0x3ff0000000000000ll ), /* 1 */
+ make_float64( 0x3fe0000000000000ll ), /* 2 */
+ make_float64( 0x3fc5555555555555ll ), /* 3 */
+ make_float64( 0x3fa5555555555555ll ), /* 4 */
+ make_float64( 0x3f81111111111111ll ), /* 5 */
+ make_float64( 0x3f56c16c16c16c17ll ), /* 6 */
+ make_float64( 0x3f2a01a01a01a01all ), /* 7 */
+ make_float64( 0x3efa01a01a01a01all ), /* 8 */
+ make_float64( 0x3ec71de3a556c734ll ), /* 9 */
+ make_float64( 0x3e927e4fb7789f5cll ), /* 10 */
+ make_float64( 0x3e5ae64567f544e4ll ), /* 11 */
+ make_float64( 0x3e21eed8eff8d898ll ), /* 12 */
+ make_float64( 0x3de6124613a86d09ll ), /* 13 */
+ make_float64( 0x3da93974a8c07c9dll ), /* 14 */
+ make_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
+};
+
+float32 float32_exp2( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+ float64 r, x, xn;
+ int i;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+
+ if ( aExp == 0xFF) {
+ if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+ return (aSign) ? float32_zero : a;
+ }
+ if (aExp == 0) {
+ if (aSig == 0) return float32_one;
+ }
+
+ float_raise( float_flag_inexact STATUS_VAR);
+
+ /* ******************************* */
+ /* using float64 for approximation */
+ /* ******************************* */
+ x = float32_to_float64(a STATUS_VAR);
+ x = float64_mul(x, float64_ln2 STATUS_VAR);
+
+ xn = x;
+ r = float64_one;
+ for (i = 0 ; i < 15 ; i++) {
+ float64 f;
+
+ f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
+ r = float64_add(r, f STATUS_VAR);
+
+ xn = float64_mul(xn, x STATUS_VAR);
+ }
+
+ return float64_to_float32(r, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the binary log of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float32 float32_log2( float32 a STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int16 aExp;
+ bits32 aSig, zSig, i;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( aSign ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+ return a;
+ }
+
+ aExp -= 0x7F;
+ aSig |= 0x00800000;
+ zSign = aExp < 0;
+ zSig = aExp << 23;
+
+ for (i = 1 << 22; i > 0; i >>= 1) {
+ aSig = ( (bits64)aSig * aSig ) >> 23;
+ if ( aSig & 0x01000000 ) {
+ aSig >>= 1;
+ zSig |= i;
+ }
+ }
+
+ if ( zSign )
+ zSig = -zSig;
+
+ return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq( float32 a, float32 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ return ( float32_val(a) == float32_val(b) ) ||
+ ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_le( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits32 av, bv;
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_lt( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits32 av, bv;
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
+{
+ bits32 av, bv;
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ av = float32_val(a);
+ bv = float32_val(b);
+ return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits32 av, bv;
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits32 av, bv;
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits64 aSig;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+ if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x42C - aExp;
+ if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+ return roundAndPackInt32( aSign, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits64 aSig, savedASig;
+ int32 z;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( 0x41E < aExp ) {
+ if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FF ) {
+ if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x433 - aExp;
+ savedASig = aSig;
+ aSig >>= shiftCount;
+ z = aSig;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits64 aSig, aSigExtra;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x433 - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( 0x43E < aExp ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FF )
+ && ( aSig != LIT64( 0x0010000000000000 ) ) )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ aSigExtra = 0;
+ aSig <<= - shiftCount;
+ }
+ else {
+ shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+ }
+ return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits64 aSig;
+ int64 z;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = aExp - 0x433;
+ if ( 0 <= shiftCount ) {
+ if ( 0x43E <= aExp ) {
+ if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FF )
+ && ( aSig != LIT64( 0x0010000000000000 ) ) )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ z = aSig<<shiftCount;
+ }
+ else {
+ if ( aExp < 0x3FE ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ z = aSig>>( - shiftCount );
+ if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig;
+ bits32 zSig;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ shift64RightJamming( aSig, 22, &aSig );
+ zSig = aSig;
+ if ( aExp || zSig ) {
+ zSig |= 0x40000000;
+ aExp -= 0x381;
+ }
+ return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| half-precision floating-point value, returning the result. After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result. This means that any integer portion of `zSig'
+| will be added into the exponent. Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
+{
+ return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
+}
+
+/* Half precision floats come in two formats: standard IEEE and "ARM" format.
+ The latter gains extra exponent range by omitting the NaN/Inf encodings. */
+
+float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+
+ aSign = a >> 15;
+ aExp = (a >> 10) & 0x1f;
+ aSig = a & 0x3ff;
+
+ if (aExp == 0x1f && ieee) {
+ if (aSig) {
+ /* Make sure correct exceptions are raised. */
+ float32ToCommonNaN(a STATUS_VAR);
+ aSig |= 0x200;
+ }
+ return packFloat32(aSign, 0xff, aSig << 13);
+ }
+ if (aExp == 0) {
+ int8 shiftCount;
+
+ if (aSig == 0) {
+ return packFloat32(aSign, 0, 0);
+ }
+
+ shiftCount = countLeadingZeros32( aSig ) - 21;
+ aSig = aSig << shiftCount;
+ aExp = -shiftCount;
+ }
+ return packFloat32( aSign, aExp + 0x70, aSig << 13);
+}
+
+bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+ bits32 mask;
+ bits32 increment;
+ int8 roundingMode;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if (aSig) {
+ /* Make sure correct exceptions are raised. */
+ float32ToCommonNaN(a STATUS_VAR);
+ aSig |= 0x00400000;
+ }
+ return packFloat16(aSign, 0x1f, aSig >> 13);
+ }
+ if (aExp == 0 && aSign == 0) {
+ return packFloat16(aSign, 0, 0);
+ }
+ /* Decimal point between bits 22 and 23. */
+ aSig |= 0x00800000;
+ aExp -= 0x7f;
+ if (aExp < -14) {
+ mask = 0x007fffff;
+ if (aExp < -24) {
+ aExp = -25;
+ } else {
+ mask >>= 24 + aExp;
+ }
+ } else {
+ mask = 0x00001fff;
+ }
+ if (aSig & mask) {
+ float_raise( float_flag_underflow STATUS_VAR );
+ roundingMode = STATUS(float_rounding_mode);
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ increment = (mask + 1) >> 1;
+ if ((aSig & mask) == increment) {
+ increment = aSig & (increment << 1);
+ }
+ break;
+ case float_round_up:
+ increment = aSign ? 0 : mask;
+ break;
+ case float_round_down:
+ increment = aSign ? mask : 0;
+ break;
+ default: /* round_to_zero */
+ increment = 0;
+ break;
+ }
+ aSig += increment;
+ if (aSig >= 0x01000000) {
+ aSig >>= 1;
+ aExp++;
+ }
+ } else if (aExp < -14
+ && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
+ float_raise( float_flag_underflow STATUS_VAR);
+ }
+
+ if (ieee) {
+ if (aExp > 15) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat16(aSign, 0x1f, 0);
+ }
+ } else {
+ if (aExp > 16) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat16(aSign, 0x1f, 0x3ff);
+ }
+ }
+ if (aExp < -24) {
+ return packFloat16(aSign, 0, 0);
+ }
+ if (aExp < -14) {
+ aSig >>= -14 - aExp;
+ aExp = -14;
+ }
+ return packFloat16(aSign, aExp + 14, aSig >> 13);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ return
+ packFloatx80(
+ aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float64_to_float128( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig, zSig0, zSig1;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
+ return packFloat128( aSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+ return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ bits64 z;
+
+ aExp = extractFloat64Exp( a );
+ if ( 0x433 <= aExp ) {
+ if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+ return propagateFloat64NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp < 0x3FF ) {
+ if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloat64Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+ return packFloat64( aSign, 0x3FF, 0 );
+ }
+ break;
+ case float_round_down:
+ return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
+ case float_round_up:
+ return make_float64(
+ aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
+ }
+ return packFloat64( aSign, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x433 - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z = float64_val(a);
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z += lastBitMask>>1;
+ if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
+ z += roundBitsMask;
+ }
+ }
+ z &= ~ roundBitsMask;
+ if ( z != float64_val(a) )
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ return make_float64(z);
+
+}
+
+float64 float64_trunc_to_int( float64 a STATUS_PARAM)
+{
+ int oldmode;
+ float64 res;
+ oldmode = STATUS(float_rounding_mode);
+ STATUS(float_rounding_mode) = float_round_to_zero;
+ res = float64_round_to_int(a STATUS_VAR);
+ STATUS(float_rounding_mode) = oldmode;
+ return res;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
+{
+ int16 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig;
+ int16 expDiff;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 9;
+ bSig <<= 9;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= LIT64( 0x2000000000000000 );
+ }
+ shift64RightJamming( bSig, expDiff, &bSig );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= LIT64( 0x2000000000000000 );
+ }
+ shift64RightJamming( aSig, - expDiff, &aSig );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FF ) {
+ if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( aExp == 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
+ return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+ }
+ zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+ zExp = aExp;
+ goto roundAndPack;
+ }
+ aSig |= LIT64( 0x2000000000000000 );
+ zSig = ( aSig + bSig )<<1;
+ --zExp;
+ if ( (sbits64) zSig < 0 ) {
+ zSig = aSig + bSig;
+ ++zExp;
+ }
+ roundAndPack:
+ return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
+{
+ int16 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig;
+ int16 expDiff;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 10;
+ bSig <<= 10;
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FF ) {
+ if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ if ( bSig < aSig ) goto aBigger;
+ if ( aSig < bSig ) goto bBigger;
+ return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return packFloat64( zSign ^ 1, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= LIT64( 0x4000000000000000 );
+ }
+ shift64RightJamming( aSig, - expDiff, &aSig );
+ bSig |= LIT64( 0x4000000000000000 );
+ bBigger:
+ zSig = bSig - aSig;
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= LIT64( 0x4000000000000000 );
+ }
+ shift64RightJamming( bSig, expDiff, &bSig );
+ aSig |= LIT64( 0x4000000000000000 );
+ aBigger:
+ zSig = aSig - bSig;
+ zExp = aExp;
+ normalizeRoundAndPack:
+ --zExp;
+ return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ if ( aSign == bSign ) {
+ return addFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return subFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ if ( aSign == bSign ) {
+ return subFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int16 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ bSign = extractFloat64Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FF ) {
+ if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+ return propagateFloat64NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+ normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+ }
+ zExp = aExp + bExp - 0x3FF;
+ aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+ bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+ mul64To128( aSig, bSig, &zSig0, &zSig1 );
+ zSig0 |= ( zSig1 != 0 );
+ if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+ zSig0 <<= 1;
+ --zExp;
+ }
+ return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'. The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int16 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig;
+ bits64 rem0, rem1;
+ bits64 term0, term1;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ bSign = extractFloat64Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return packFloat64( zSign, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = aExp - bExp + 0x3FD;
+ aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+ bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+ if ( bSig <= ( aSig + aSig ) ) {
+ aSig >>= 1;
+ ++zExp;
+ }
+ zSig = estimateDiv128To64( aSig, 0, bSig );
+ if ( ( zSig & 0x1FF ) <= 2 ) {
+ mul64To128( bSig, zSig, &term0, &term1 );
+ sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig;
+ add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+ }
+ zSig |= ( rem1 != 0 );
+ }
+ return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the double-precision floating-point value `a'
+| with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_rem( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int16 aExp, bExp, expDiff;
+ bits64 aSig, bSig;
+ bits64 q, alternateASig;
+ sbits64 sigMean;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ if ( aExp == 0x7FF ) {
+ if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+ return propagateFloat64NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return a;
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ expDiff = aExp - bExp;
+ aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+ bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ aSig >>= 1;
+ }
+ q = ( bSig <= aSig );
+ if ( q ) aSig -= bSig;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig, 0, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ aSig = - ( ( bSig>>2 ) * q );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ if ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig, 0, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ q >>= 64 - expDiff;
+ bSig >>= 2;
+ aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+ }
+ else {
+ aSig >>= 2;
+ bSig >>= 2;
+ }
+ do {
+ alternateASig = aSig;
+ ++q;
+ aSig -= bSig;
+ } while ( 0 <= (sbits64) aSig );
+ sigMean = aSig + alternateASig;
+ if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+ aSig = alternateASig;
+ }
+ zSign = ( (sbits64) aSig < 0 );
+ if ( zSign ) aSig = - aSig;
+ return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, zExp;
+ bits64 aSig, zSig, doubleZSig;
+ bits64 rem0, rem1, term0, term1;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
+ if ( ! aSign ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig ) == 0 ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return float64_zero;
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+ aSig |= LIT64( 0x0010000000000000 );
+ zSig = estimateSqrt32( aExp, aSig>>21 );
+ aSig <<= 9 - ( aExp & 1 );
+ zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
+ if ( ( zSig & 0x1FF ) <= 5 ) {
+ doubleZSig = zSig<<1;
+ mul64To128( zSig, zSig, &term0, &term1 );
+ sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig;
+ doubleZSig -= 2;
+ add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
+ }
+ zSig |= ( ( rem0 | rem1 ) != 0 );
+ }
+ return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the binary log of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float64 float64_log2( float64 a STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int16 aExp;
+ bits64 aSig, aSig0, aSig1, zSig, i;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( aSign ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
+ return a;
+ }
+
+ aExp -= 0x3FF;
+ aSig |= LIT64( 0x0010000000000000 );
+ zSign = aExp < 0;
+ zSig = (bits64)aExp << 52;
+ for (i = 1LL << 51; i > 0; i >>= 1) {
+ mul64To128( aSig, aSig, &aSig0, &aSig1 );
+ aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
+ if ( aSig & LIT64( 0x0020000000000000 ) ) {
+ aSig >>= 1;
+ zSig |= i;
+ }
+ }
+
+ if ( zSign )
+ zSig = -zSig;
+ return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq( float64 a, float64 b STATUS_PARAM )
+{
+ bits64 av, bv;
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ av = float64_val(a);
+ bv = float64_val(b);
+ return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise. The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_le( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits64 av, bv;
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_lt( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits64 av, bv;
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise. The invalid exception is raised
+| if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
+{
+ bits64 av, bv;
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ av = float64_val(a);
+ bv = float64_val(b);
+ return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits64 av, bv;
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ bits64 av, bv;
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN, the
+| largest positive integer is returned. Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+ shiftCount = 0x4037 - aExp;
+ if ( shiftCount <= 0 ) shiftCount = 1;
+ shift64RightJamming( aSig, shiftCount, &aSig );
+ return roundAndPackInt32( aSign, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig, savedASig;
+ int32 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( 0x401E < aExp ) {
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ shiftCount = 0x403E - aExp;
+ savedASig = aSig;
+ aSig >>= shiftCount;
+ z = aSig;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN,
+| the largest positive integer is returned. Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig, aSigExtra;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ shiftCount = 0x403E - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( shiftCount ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FFF )
+ && ( aSig != LIT64( 0x8000000000000000 ) ) )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ aSigExtra = 0;
+ }
+ else {
+ shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+ }
+ return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig;
+ int64 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ shiftCount = aExp - 0x403E;
+ if ( 0 <= shiftCount ) {
+ aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+ if ( ( a.high != 0xC03E ) || aSig ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ z = aSig>>( - shiftCount );
+ if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) {
+ return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
+ }
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ shift64RightJamming( aSig, 33, &aSig );
+ if ( aExp || aSig ) aExp -= 0x3F81;
+ return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig, zSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) {
+ return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
+ }
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ shift64RightJamming( aSig, 1, &zSig );
+ if ( aExp || aSig ) aExp -= 0x3C01;
+ return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig, zSig0, zSig1;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
+ return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
+ }
+ shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+ return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended quadruple-precision floating-point
+| value. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ floatx80 z;
+
+ aExp = extractFloatx80Exp( a );
+ if ( 0x403E <= aExp ) {
+ if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+ return propagateFloatx80NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp < 0x3FFF ) {
+ if ( ( aExp == 0 )
+ && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+ return a;
+ }
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloatx80Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+ ) {
+ return
+ packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ }
+ break;
+ case float_round_down:
+ return
+ aSign ?
+ packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+ : packFloatx80( 0, 0, 0 );
+ case float_round_up:
+ return
+ aSign ? packFloatx80( 1, 0, 0 )
+ : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ }
+ return packFloatx80( aSign, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x403E - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z = a;
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z.low += lastBitMask>>1;
+ if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+ z.low += roundBitsMask;
+ }
+ }
+ z.low &= ~ roundBitsMask;
+ if ( z.low == 0 ) {
+ ++z.high;
+ z.low = LIT64( 0x8000000000000000 );
+ }
+ if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
+| negated before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ int32 expDiff;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) --expDiff;
+ shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) ++expDiff;
+ shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ return a;
+ }
+ zSig1 = 0;
+ zSig0 = aSig + bSig;
+ if ( aExp == 0 ) {
+ normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+ goto roundAndPack;
+ }
+ zExp = aExp;
+ goto shiftRight1;
+ }
+ zSig0 = aSig + bSig;
+ if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+ shiftRight1:
+ shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+ zSig0 |= LIT64( 0x8000000000000000 );
+ ++zExp;
+ roundAndPack:
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ int32 expDiff;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ zSig1 = 0;
+ if ( bSig < aSig ) goto aBigger;
+ if ( aSig < bSig ) goto bBigger;
+ return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) ++expDiff;
+ shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+ sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) --expDiff;
+ shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+ sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+ zExp = aExp;
+ normalizeRoundAndPack:
+ return
+ normalizeRoundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign == bSign ) {
+ return addFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return subFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign == bSign ) {
+ return subFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ bSign = extractFloatx80Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 )
+ || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig ) == 0 ) goto invalid;
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ zExp = aExp + bExp - 0x3FFE;
+ mul64To128( aSig, bSig, &zSig0, &zSig1 );
+ if ( 0 < (sbits64) zSig0 ) {
+ shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+ --zExp;
+ }
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ bits64 rem0, rem1, rem2, term0, term1, term2;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ bSign = extractFloatx80Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ goto invalid;
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return packFloatx80( zSign, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = aExp - bExp + 0x3FFE;
+ rem1 = 0;
+ if ( bSig <= aSig ) {
+ shift128Right( aSig, 0, 1, &aSig, &rem1 );
+ ++zExp;
+ }
+ zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+ mul64To128( bSig, zSig0, &term0, &term1 );
+ sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, bSig );
+ if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+ mul64To128( bSig, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+ }
+ zSig1 |= ( ( rem1 | rem2 ) != 0 );
+ }
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int32 aExp, bExp, expDiff;
+ bits64 aSig0, aSig1, bSig;
+ bits64 q, term0, term1, alternateASig0, alternateASig1;
+ floatx80 z;
+
+ aSig0 = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig0<<1 )
+ || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ goto invalid;
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+ }
+ bSig |= LIT64( 0x8000000000000000 );
+ zSign = aSign;
+ expDiff = aExp - bExp;
+ aSig1 = 0;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+ expDiff = 0;
+ }
+ q = ( bSig <= aSig0 );
+ if ( q ) aSig0 -= bSig;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ mul64To128( bSig, q, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ if ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ q >>= 64 - expDiff;
+ mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+ while ( le128( term0, term1, aSig0, aSig1 ) ) {
+ ++q;
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ }
+ }
+ else {
+ term1 = 0;
+ term0 = bSig;
+ }
+ sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+ if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+ || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+ && ( q & 1 ) )
+ ) {
+ aSig0 = alternateASig0;
+ aSig1 = alternateASig1;
+ zSign = ! zSign;
+ }
+ return
+ normalizeRoundAndPackFloatx80(
+ 80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, zExp;
+ bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ floatx80 z;
+
+ aSig0 = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
+ if ( ! aSign ) return a;
+ goto invalid;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig0 ) == 0 ) return a;
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+ }
+ zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+ zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+ shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
+ zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+ doubleZSig0 = zSig0<<1;
+ mul64To128( zSig0, zSig0, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ doubleZSig0 -= 2;
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+ if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+ if ( zSig1 == 0 ) zSig1 = 1;
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ mul64To128( zSig1, zSig1, &term2, &term3 );
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+ term3 |= 1;
+ term2 |= doubleZSig0;
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+ zSig0 |= doubleZSig0;
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise. The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise. The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise. The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
+| do not cause an exception. Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
+| an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= ( aSig1 != 0 );
+ shiftCount = 0x4028 - aExp;
+ if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+ return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero. If
+| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1, savedASig;
+ int32 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ aSig0 |= ( aSig1 != 0 );
+ if ( 0x401E < aExp ) {
+ if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = 0x402F - aExp;
+ savedASig = aSig0;
+ aSig0 >>= shiftCount;
+ z = aSig0;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig0<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = 0x402F - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( 0x403E < aExp ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FFF )
+ && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+ )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+ }
+ else {
+ shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+ }
+ return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1;
+ int64 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = aExp - 0x402F;
+ if ( 0 < shiftCount ) {
+ if ( 0x403E <= aExp ) {
+ aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
+ if ( ( a.high == LIT64( 0xC03E000000000000 ) )
+ && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+ if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ else {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
+ if ( (bits64) ( aSig1<<shiftCount ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ }
+ else {
+ if ( aExp < 0x3FFF ) {
+ if ( aExp | aSig0 | aSig1 ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return 0;
+ }
+ z = aSig0>>( - shiftCount );
+ if ( aSig1
+ || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float128_to_float32( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig0, aSig1;
+ bits32 zSig;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
+ }
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ aSig0 |= ( aSig1 != 0 );
+ shift64RightJamming( aSig0, 18, &aSig0 );
+ zSig = aSig0;
+ if ( aExp || zSig ) {
+ zSig |= 0x40000000;
+ aExp -= 0x3F81;
+ }
+ return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float128_to_float64( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
+ }
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+ aSig0 |= ( aSig1 != 0 );
+ if ( aExp || aSig0 ) {
+ aSig0 |= LIT64( 0x4000000000000000 );
+ aExp -= 0x3C01;
+ }
+ return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
+ }
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ else {
+ aSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+ return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| returns the result as a quadruple-precision floating-point value. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_round_to_int( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ float128 z;
+
+ aExp = extractFloat128Exp( a );
+ if ( 0x402F <= aExp ) {
+ if ( 0x406F <= aExp ) {
+ if ( ( aExp == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+ ) {
+ return propagateFloat128NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ lastBitMask = 1;
+ lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+ roundBitsMask = lastBitMask - 1;
+ z = a;
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ if ( lastBitMask ) {
+ add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+ if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+ }
+ else {
+ if ( (sbits64) z.low < 0 ) {
+ ++z.high;
+ if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+ }
+ }
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat128Sign( z )
+ ^ ( roundingMode == float_round_up ) ) {
+ add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+ }
+ }
+ z.low &= ~ roundBitsMask;
+ }
+ else {
+ if ( aExp < 0x3FFF ) {
+ if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloat128Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FFE )
+ && ( extractFloat128Frac0( a )
+ | extractFloat128Frac1( a ) )
+ ) {
+ return packFloat128( aSign, 0x3FFF, 0, 0 );
+ }
+ break;
+ case float_round_down:
+ return
+ aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+ : packFloat128( 0, 0, 0, 0 );
+ case float_round_up:
+ return
+ aSign ? packFloat128( 1, 0, 0, 0 )
+ : packFloat128( 0, 0x3FFF, 0, 0 );
+ }
+ return packFloat128( aSign, 0, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x402F - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z.low = 0;
+ z.high = a.high;
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z.high += lastBitMask>>1;
+ if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+ z.high &= ~ lastBitMask;
+ }
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat128Sign( z )
+ ^ ( roundingMode == float_round_up ) ) {
+ z.high |= ( a.low != 0 );
+ z.high += roundBitsMask;
+ }
+ }
+ z.high &= ~ roundBitsMask;
+ }
+ if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+ int32 expDiff;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shift128ExtraRightJamming(
+ bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shift128ExtraRightJamming(
+ aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ return a;
+ }
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ if ( aExp == 0 ) {
+ if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
+ return packFloat128( zSign, 0, zSig0, zSig1 );
+ }
+ zSig2 = 0;
+ zSig0 |= LIT64( 0x0002000000000000 );
+ zExp = aExp;
+ goto shiftRight1;
+ }
+ aSig0 |= LIT64( 0x0001000000000000 );
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ --zExp;
+ if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+ ++zExp;
+ shiftRight1:
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ roundAndPack:
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+ int32 expDiff;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ expDiff = aExp - bExp;
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+ shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ if ( bSig0 < aSig0 ) goto aBigger;
+ if ( aSig0 < bSig0 ) goto bBigger;
+ if ( bSig1 < aSig1 ) goto aBigger;
+ if ( aSig1 < bSig1 ) goto bBigger;
+ return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig0 |= LIT64( 0x4000000000000000 );
+ }
+ shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+ bSig0 |= LIT64( 0x4000000000000000 );
+ bBigger:
+ sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig0 |= LIT64( 0x4000000000000000 );
+ }
+ shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+ aSig0 |= LIT64( 0x4000000000000000 );
+ aBigger:
+ sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ zExp = aExp;
+ normalizeRoundAndPack:
+ --zExp;
+ return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign == bSign ) {
+ return addFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return subFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign == bSign ) {
+ return subFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ bSign = extractFloat128Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( ( aSig0 | aSig1 )
+ || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ zExp = aExp + bExp - 0x4000;
+ aSig0 |= LIT64( 0x0001000000000000 );
+ shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+ mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+ add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+ zSig2 |= ( zSig3 != 0 );
+ if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ ++zExp;
+ }
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'. The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ bSign = extractFloat128Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ goto invalid;
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return packFloat128( zSign, 0, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) {
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ zExp = aExp - bExp + 0x3FFD;
+ shortShift128Left(
+ aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+ shortShift128Left(
+ bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+ shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+ ++zExp;
+ }
+ zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+ sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+ }
+ zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+ if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+ mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+ sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_rem( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int32 aExp, bExp, expDiff;
+ bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+ bits64 allZero, alternateASig0, alternateASig1, sigMean1;
+ sbits64 sigMean0;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ if ( aExp == 0x7FFF ) {
+ if ( ( aSig0 | aSig1 )
+ || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ goto invalid;
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return a;
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ expDiff = aExp - bExp;
+ if ( expDiff < -1 ) return a;
+ shortShift128Left(
+ aSig0 | LIT64( 0x0001000000000000 ),
+ aSig1,
+ 15 - ( expDiff < 0 ),
+ &aSig0,
+ &aSig1
+ );
+ shortShift128Left(
+ bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ q = le128( bSig0, bSig1, aSig0, aSig1 );
+ if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ q = ( 4 < q ) ? q - 4 : 0;
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+ shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+ shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+ sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+ expDiff -= 61;
+ }
+ if ( -64 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ q = ( 4 < q ) ? q - 4 : 0;
+ q >>= - expDiff;
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+ expDiff += 52;
+ if ( expDiff < 0 ) {
+ shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+ }
+ else {
+ shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+ }
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+ sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+ }
+ else {
+ shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+ }
+ do {
+ alternateASig0 = aSig0;
+ alternateASig1 = aSig1;
+ ++q;
+ sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+ } while ( 0 <= (sbits64) aSig0 );
+ add128(
+ aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
+ if ( ( sigMean0 < 0 )
+ || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+ aSig0 = alternateASig0;
+ aSig1 = alternateASig1;
+ }
+ zSign = ( (sbits64) aSig0 < 0 );
+ if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+ return
+ normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sqrt( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, zExp;
+ bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
+ if ( ! aSign ) return a;
+ goto invalid;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+ aSig0 |= LIT64( 0x0001000000000000 );
+ zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+ shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+ zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+ doubleZSig0 = zSig0<<1;
+ mul64To128( zSig0, zSig0, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ doubleZSig0 -= 2;
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+ if ( ( zSig1 & 0x1FFF ) <= 5 ) {
+ if ( zSig1 == 0 ) zSig1 = 1;
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ mul64To128( zSig1, zSig1, &term2, &term3 );
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+ term3 |= 1;
+ term2 |= doubleZSig0;
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+ return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_eq( float128 a, float128 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_le( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_lt( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+/* misc functions */
+float32 uint32_to_float32( unsigned int a STATUS_PARAM )
+{
+ return int64_to_float32(a STATUS_VAR);
+}
+
+float64 uint32_to_float64( unsigned int a STATUS_PARAM )
+{
+ return int64_to_float64(a STATUS_VAR);
+}
+
+unsigned int float32_to_uint32( float32 a STATUS_PARAM )
+{
+ int64_t v;
+ unsigned int res;
+
+ v = float32_to_int64(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
+{
+ int64_t v;
+ unsigned int res;
+
+ v = float32_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+unsigned int float64_to_uint32( float64 a STATUS_PARAM )
+{
+ int64_t v;
+ unsigned int res;
+
+ v = float64_to_int64(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
+{
+ int64_t v;
+ unsigned int res;
+
+ v = float64_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+/* FIXME: This looks broken. */
+uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
+{
+ int64_t v;
+
+ v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
+ v += float64_val(a);
+ v = float64_to_int64(make_float64(v) STATUS_VAR);
+
+ return v - INT64_MIN;
+}
+
+uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
+{
+ int64_t v;
+
+ v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
+ v += float64_val(a);
+ v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
+
+ return v - INT64_MIN;
+}
+
+#define COMPARE(s, nan_exp) \
+INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
+ int is_quiet STATUS_PARAM ) \
+{ \
+ flag aSign, bSign; \
+ bits ## s av, bv; \
+ \
+ if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
+ extractFloat ## s ## Frac( a ) ) || \
+ ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
+ extractFloat ## s ## Frac( b ) )) { \
+ if (!is_quiet || \
+ float ## s ## _is_signaling_nan( a ) || \
+ float ## s ## _is_signaling_nan( b ) ) { \
+ float_raise( float_flag_invalid STATUS_VAR); \
+ } \
+ return float_relation_unordered; \
+ } \
+ aSign = extractFloat ## s ## Sign( a ); \
+ bSign = extractFloat ## s ## Sign( b ); \
+ av = float ## s ## _val(a); \
+ bv = float ## s ## _val(b); \
+ if ( aSign != bSign ) { \
+ if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) { \
+ /* zero case */ \
+ return float_relation_equal; \
+ } else { \
+ return 1 - (2 * aSign); \
+ } \
+ } else { \
+ if (av == bv) { \
+ return float_relation_equal; \
+ } else { \
+ return 1 - 2 * (aSign ^ ( av < bv )); \
+ } \
+ } \
+} \
+ \
+int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
+{ \
+ return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
+} \
+ \
+int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
+{ \
+ return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
+}
+
+COMPARE(32, 0xff)
+COMPARE(64, 0x7ff)
+
+INLINE int float128_compare_internal( float128 a, float128 b,
+ int is_quiet STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
+ ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
+ ( ( extractFloat128Exp( b ) == 0x7fff ) &&
+ ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
+ if (!is_quiet ||
+ float128_is_signaling_nan( a ) ||
+ float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return float_relation_unordered;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
+ /* zero case */
+ return float_relation_equal;
+ } else {
+ return 1 - (2 * aSign);
+ }
+ } else {
+ if (a.low == b.low && a.high == b.high) {
+ return float_relation_equal;
+ } else {
+ return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
+ }
+ }
+}
+
+int float128_compare( float128 a, float128 b STATUS_PARAM )
+{
+ return float128_compare_internal(a, b, 0 STATUS_VAR);
+}
+
+int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
+{
+ return float128_compare_internal(a, b, 1 STATUS_VAR);
+}
+
+/* Multiply A by 2 raised to the power N. */
+float32 float32_scalbn( float32 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+
+ if ( aExp == 0xFF ) {
+ return a;
+ }
+ if ( aExp != 0 )
+ aSig |= 0x00800000;
+ else if ( aSig == 0 )
+ return a;
+
+ aExp += n - 1;
+ aSig <<= 7;
+ return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
+}
+
+float64 float64_scalbn( float64 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+
+ if ( aExp == 0x7FF ) {
+ return a;
+ }
+ if ( aExp != 0 )
+ aSig |= LIT64( 0x0010000000000000 );
+ else if ( aSig == 0 )
+ return a;
+
+ aExp += n - 1;
+ aSig <<= 10;
+ return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
+}
+
+#ifdef FLOATX80
+floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+
+ if ( aExp == 0x7FF ) {
+ return a;
+ }
+ if (aExp == 0 && aSig == 0)
+ return a;
+
+ aExp += n;
+ return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
+ aSign, aExp, aSig, 0 STATUS_VAR );
+}
+#endif
+
+#ifdef FLOAT128
+float128 float128_scalbn( float128 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ return a;
+ }
+ if ( aExp != 0 )
+ aSig0 |= LIT64( 0x0001000000000000 );
+ else if ( aSig0 == 0 && aSig1 == 0 )
+ return a;
+
+ aExp += n - 1;
+ return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
+ STATUS_VAR );
+
+}
+#endif
diff --git a/src/recompiler/fpu/softfloat.h b/src/recompiler/fpu/softfloat.h
new file mode 100644
index 00000000..c209cd97
--- /dev/null
+++ b/src/recompiler/fpu/softfloat.h
@@ -0,0 +1,537 @@
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
+#ifdef VBOX
+#include <VBox/types.h>
+#endif
+
+#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
+#include <sunmath.h>
+#endif
+
+#include <inttypes.h>
+#include "config.h"
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified. For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+typedef uint8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+#ifndef _AIX
+typedef int uint16;
+typedef int int16;
+#endif
+typedef unsigned int uint32;
+typedef signed int int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines a type that holds integers
+| of _exactly_ the number of bits specified. For instance, for most
+| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+| `unsigned short int' and `signed short int' (or `short int'), respectively.
+*----------------------------------------------------------------------------*/
+typedef uint8_t bits8;
+typedef int8_t sbits8;
+typedef uint16_t bits16;
+typedef int16_t sbits16;
+typedef uint32_t bits32;
+typedef int32_t sbits32;
+typedef uint64_t bits64;
+typedef int64_t sbits64;
+
+#define LIT64( a ) a##LL
+#define INLINE static inline
+
+/*----------------------------------------------------------------------------
+| The macro `FLOATX80' must be defined to enable the extended double-precision
+| floating-point format `floatx80'. If this macro is not defined, the
+| `floatx80' type will not be defined, and none of the functions that either
+| input or output the `floatx80' type will be defined. The same applies to
+| the `FLOAT128' macro and the quadruple-precision format `float128'.
+*----------------------------------------------------------------------------*/
+#ifdef CONFIG_SOFTFLOAT
+/* bit exact soft float support */
+#define FLOATX80
+#define FLOAT128
+#else
+/* native float support */
+#if (defined(__i386__) || defined(__x86_64__)) && (!defined(CONFIG_BSD) || defined(VBOX)) /** @todo VBOX: not correct on windows */
+#define FLOATX80
+#endif
+#endif /* !CONFIG_SOFTFLOAT */
+
+#if defined(VBOX) && (!defined(FLOATX80) || defined(CONFIG_SOFTFLOAT))
+# error misconfigured
+#endif
+
+#define STATUS_PARAM , float_status *status
+#define STATUS(field) status->field
+#define STATUS_VAR , status
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+ float_relation_less = -1,
+ float_relation_equal = 0,
+ float_relation_greater = 1,
+ float_relation_unordered = 2
+};
+
+#ifdef CONFIG_SOFTFLOAT
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+/* Use structures for soft-float types. This prevents accidentally mixing
+ them with native int/float types. A sufficiently clever compiler and
+ sane ABI should be able to see though these structs. However
+ x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
+//#define USE_SOFTFLOAT_STRUCT_TYPES
+#ifdef USE_SOFTFLOAT_STRUCT_TYPES
+typedef struct {
+ uint32_t v;
+} float32;
+/* The cast ensures an error if the wrong type is passed. */
+#define float32_val(x) (((float32)(x)).v)
+#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
+typedef struct {
+ uint64_t v;
+} float64;
+#define float64_val(x) (((float64)(x)).v)
+#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
+#else
+typedef uint32_t float32;
+typedef uint64_t float64;
+#define float32_val(x) (x)
+#define float64_val(x) (x)
+#define make_float32(x) (x)
+#define make_float64(x) (x)
+#endif
+#ifdef FLOATX80
+typedef struct {
+ uint64_t low;
+ uint16_t high;
+} floatx80;
+#endif
+#ifdef FLOAT128
+typedef struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint64_t high, low;
+#else
+ uint64_t low, high;
+#endif
+} float128;
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_tininess_after_rounding = 0,
+ float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_round_nearest_even = 0,
+ float_round_down = 1,
+ float_round_up = 2,
+ float_round_to_zero = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum {
+ float_flag_invalid = 1,
+ float_flag_divbyzero = 4,
+ float_flag_overflow = 8,
+ float_flag_underflow = 16,
+ float_flag_inexact = 32
+};
+
+typedef struct float_status {
+ signed char float_detect_tininess;
+ signed char float_rounding_mode;
+ signed char float_exception_flags;
+#ifdef FLOATX80
+ signed char floatx80_rounding_precision;
+#endif
+ flag flush_to_zero;
+ flag default_nan_mode;
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+void set_float_exception_flags(int val STATUS_PARAM);
+INLINE void set_flush_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_to_zero) = val;
+}
+INLINE void set_default_nan_mode(flag val STATUS_PARAM)
+{
+ STATUS(default_nan_mode) = val;
+}
+INLINE int get_float_exception_flags(float_status *status)
+{
+ return STATUS(float_exception_flags);
+}
+#ifdef FLOATX80
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+#endif
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 flags STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int STATUS_PARAM );
+float64 int32_to_float64( int STATUS_PARAM );
+float32 uint32_to_float32( unsigned int STATUS_PARAM );
+float64 uint32_to_float64( unsigned int STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( int STATUS_PARAM );
+#endif
+float32 int64_to_float32( int64_t STATUS_PARAM );
+float32 uint64_to_float32( uint64_t STATUS_PARAM );
+float64 int64_to_float64( int64_t STATUS_PARAM );
+float64 uint64_to_float64( uint64_t STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64_t STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 int64_to_float128( int64_t STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software half-precision conversion routines.
+*----------------------------------------------------------------------------*/
+bits16 float32_to_float16( float32, flag STATUS_PARAM );
+float32 float16_to_float32( bits16, flag STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float32_to_int32( float32 STATUS_PARAM );
+int float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+unsigned int float32_to_uint32( float32 STATUS_PARAM );
+unsigned int float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64_t float32_to_int64( float32 STATUS_PARAM );
+int64_t float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+float64 float32_to_float64( float32 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 STATUS_PARAM );
+float32 float32_add( float32, float32 STATUS_PARAM );
+float32 float32_sub( float32, float32 STATUS_PARAM );
+float32 float32_mul( float32, float32 STATUS_PARAM );
+float32 float32_div( float32, float32 STATUS_PARAM );
+float32 float32_rem( float32, float32 STATUS_PARAM );
+float32 float32_sqrt( float32 STATUS_PARAM );
+float32 float32_exp2( float32 STATUS_PARAM );
+float32 float32_log2( float32 STATUS_PARAM );
+int float32_eq( float32, float32 STATUS_PARAM );
+int float32_le( float32, float32 STATUS_PARAM );
+int float32_lt( float32, float32 STATUS_PARAM );
+int float32_eq_signaling( float32, float32 STATUS_PARAM );
+int float32_le_quiet( float32, float32 STATUS_PARAM );
+int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+int float32_is_nan( float32 );
+int float32_is_signaling_nan( float32 );
+float32 float32_scalbn( float32, int STATUS_PARAM );
+
+INLINE float32 float32_abs(float32 a)
+{
+ return make_float32(float32_val(a) & 0x7fffffff);
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+ return make_float32(float32_val(a) ^ 0x80000000);
+}
+
+INLINE int float32_is_infinity(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0x7f800000;
+}
+
+INLINE int float32_is_neg(float32 a)
+{
+ return float32_val(a) >> 31;
+}
+
+INLINE int float32_is_zero(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0;
+}
+
+#define float32_zero make_float32(0)
+#define float32_one make_float32(0x3f800000)
+#define float32_ln2 make_float32(0x3f317218)
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float64_to_int32( float64 STATUS_PARAM );
+int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+unsigned int float64_to_uint32( float64 STATUS_PARAM );
+unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64_t float64_to_int64( float64 STATUS_PARAM );
+int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64_t float64_to_uint64 (float64 a STATUS_PARAM);
+uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
+float32 float64_to_float32( float64 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+float64 float64_add( float64, float64 STATUS_PARAM );
+float64 float64_sub( float64, float64 STATUS_PARAM );
+float64 float64_mul( float64, float64 STATUS_PARAM );
+float64 float64_div( float64, float64 STATUS_PARAM );
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_sqrt( float64 STATUS_PARAM );
+float64 float64_log2( float64 STATUS_PARAM );
+int float64_eq( float64, float64 STATUS_PARAM );
+int float64_le( float64, float64 STATUS_PARAM );
+int float64_lt( float64, float64 STATUS_PARAM );
+int float64_eq_signaling( float64, float64 STATUS_PARAM );
+int float64_le_quiet( float64, float64 STATUS_PARAM );
+int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+int float64_is_nan( float64 a );
+int float64_is_signaling_nan( float64 );
+float64 float64_scalbn( float64, int STATUS_PARAM );
+
+INLINE float64 float64_abs(float64 a)
+{
+ return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+ return make_float64(float64_val(a) ^ 0x8000000000000000LL);
+}
+
+INLINE int float64_is_infinity(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
+}
+
+INLINE int float64_is_neg(float64 a)
+{
+ return float64_val(a) >> 63;
+}
+
+INLINE int float64_is_zero(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
+}
+
+#define float64_zero make_float64(0)
+#define float64_one make_float64(0x3ff0000000000000LL)
+#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int floatx80_to_int32( floatx80 STATUS_PARAM );
+int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64_t floatx80_to_int64( floatx80 STATUS_PARAM );
+int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_signaling( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_nan( floatx80 );
+int floatx80_is_signaling_nan( floatx80 );
+floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+ a.high &= 0x7fff;
+ return a;
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+ a.high ^= 0x8000;
+ return a;
+}
+
+INLINE int floatx80_is_infinity(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0x7fff && a.low == 0;
+}
+
+INLINE int floatx80_is_neg(floatx80 a)
+{
+ return a.high >> 15;
+}
+
+INLINE int floatx80_is_zero(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0 && a.low == 0;
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int float128_to_int32( float128 STATUS_PARAM );
+int float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64_t float128_to_int64( float128 STATUS_PARAM );
+int64_t float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+float32 float128_to_float32( float128 STATUS_PARAM );
+float64 float128_to_float64( float128 STATUS_PARAM );
+#ifdef FLOATX80
+floatx80 float128_to_floatx80( float128 STATUS_PARAM );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 STATUS_PARAM );
+float128 float128_add( float128, float128 STATUS_PARAM );
+float128 float128_sub( float128, float128 STATUS_PARAM );
+float128 float128_mul( float128, float128 STATUS_PARAM );
+float128 float128_div( float128, float128 STATUS_PARAM );
+float128 float128_rem( float128, float128 STATUS_PARAM );
+float128 float128_sqrt( float128 STATUS_PARAM );
+int float128_eq( float128, float128 STATUS_PARAM );
+int float128_le( float128, float128 STATUS_PARAM );
+int float128_lt( float128, float128 STATUS_PARAM );
+int float128_eq_signaling( float128, float128 STATUS_PARAM );
+int float128_le_quiet( float128, float128 STATUS_PARAM );
+int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_compare( float128, float128 STATUS_PARAM );
+int float128_compare_quiet( float128, float128 STATUS_PARAM );
+int float128_is_nan( float128 );
+int float128_is_signaling_nan( float128 );
+float128 float128_scalbn( float128, int STATUS_PARAM );
+
+INLINE float128 float128_abs(float128 a)
+{
+ a.high &= 0x7fffffffffffffffLL;
+ return a;
+}
+
+INLINE float128 float128_chs(float128 a)
+{
+ a.high ^= 0x8000000000000000LL;
+ return a;
+}
+
+INLINE int float128_is_infinity(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
+}
+
+INLINE int float128_is_neg(float128 a)
+{
+ return a.high >> 63;
+}
+
+INLINE int float128_is_zero(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
+}
+
+#endif
+
+#else /* CONFIG_SOFTFLOAT */
+
+#include "softfloat-native.h"
+
+#endif /* !CONFIG_SOFTFLOAT */
+
+#endif /* !SOFTFLOAT_H */
diff --git a/src/recompiler/gen-icount.h b/src/recompiler/gen-icount.h
new file mode 100644
index 00000000..13512960
--- /dev/null
+++ b/src/recompiler/gen-icount.h
@@ -0,0 +1,48 @@
+#include "qemu-timer.h"
+
+/* Helpers for instruction counting code generation. */
+
+static TCGArg *icount_arg;
+static int icount_label;
+
+static inline void gen_icount_start(void)
+{
+ TCGv_i32 count;
+
+ if (!use_icount)
+ return;
+
+ icount_label = gen_new_label();
+ count = tcg_temp_local_new_i32();
+ tcg_gen_ld_i32(count, cpu_env, offsetof(CPUState, icount_decr.u32));
+ /* This is a horrid hack to allow fixing up the value later. */
+ icount_arg = gen_opparam_ptr + 1;
+ tcg_gen_subi_i32(count, count, 0xdeadbeef);
+
+ tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label);
+ tcg_gen_st16_i32(count, cpu_env, offsetof(CPUState, icount_decr.u16.low));
+ tcg_temp_free_i32(count);
+}
+
+static void gen_icount_end(TranslationBlock *tb, int num_insns)
+{
+ if (use_icount) {
+ *icount_arg = num_insns;
+ gen_set_label(icount_label);
+ tcg_gen_exit_tb((uintptr_t)(tb + 2));
+ }
+}
+
+static inline void gen_io_start(void)
+{
+ TCGv_i32 tmp = tcg_const_i32(1);
+ tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, can_do_io));
+ tcg_temp_free_i32(tmp);
+}
+
+static inline void gen_io_end(void)
+{
+ TCGv_i32 tmp = tcg_const_i32(0);
+ tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, can_do_io));
+ tcg_temp_free_i32(tmp);
+}
diff --git a/src/recompiler/host-utils.c b/src/recompiler/host-utils.c
new file mode 100644
index 00000000..4afee79e
--- /dev/null
+++ b/src/recompiler/host-utils.c
@@ -0,0 +1,109 @@
+/*
+ * Utility compute operations used by translated code.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2007 Aurelien Jarno
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#ifndef VBOX
+#include <stdint.h>
+#else
+# include <iprt/types.h>
+#endif
+#include "host-utils.h"
+
+//#define DEBUG_MULDIV
+
+/* Long integer helpers */
+#if !defined(__x86_64__)
+static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+ *plow += a;
+ /* carry test */
+ if (*plow < a)
+ (*phigh)++;
+ *phigh += b;
+}
+
+static void neg128 (uint64_t *plow, uint64_t *phigh)
+{
+ *plow = ~*plow;
+ *phigh = ~*phigh;
+ add128(plow, phigh, 1, 0);
+}
+
+static void mul64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+ uint32_t a0, a1, b0, b1;
+ uint64_t v;
+
+ a0 = a;
+ a1 = a >> 32;
+
+ b0 = b;
+ b1 = b >> 32;
+
+ v = (uint64_t)a0 * (uint64_t)b0;
+ *plow = v;
+ *phigh = 0;
+
+ v = (uint64_t)a0 * (uint64_t)b1;
+ add128(plow, phigh, v << 32, v >> 32);
+
+ v = (uint64_t)a1 * (uint64_t)b0;
+ add128(plow, phigh, v << 32, v >> 32);
+
+ v = (uint64_t)a1 * (uint64_t)b1;
+ *phigh += v;
+}
+
+/* Unsigned 64x64 -> 128 multiplication */
+void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+ mul64(plow, phigh, a, b);
+#if defined(DEBUG_MULDIV)
+ printf("mulu64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n",
+ a, b, *phigh, *plow);
+#endif
+}
+
+/* Signed 64x64 -> 128 multiplication */
+void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
+{
+ int sa, sb;
+
+ sa = (a < 0);
+ if (sa)
+ a = -a;
+ sb = (b < 0);
+ if (sb)
+ b = -b;
+ mul64(plow, phigh, a, b);
+ if (sa ^ sb) {
+ neg128(plow, phigh);
+ }
+#if defined(DEBUG_MULDIV)
+ printf("muls64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n",
+ a, b, *phigh, *plow);
+#endif
+}
+#endif /* !defined(__x86_64__) */
diff --git a/src/recompiler/host-utils.h b/src/recompiler/host-utils.h
new file mode 100644
index 00000000..0ddc1765
--- /dev/null
+++ b/src/recompiler/host-utils.h
@@ -0,0 +1,236 @@
+/*
+ * Utility compute operations used by translated code.
+ *
+ * Copyright (c) 2007 Thiemo Seufer
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "osdep.h"
+
+#if defined(__x86_64__)
+#define __HAVE_FAST_MULU64__
+static inline void mulu64(uint64_t *plow, uint64_t *phigh,
+ uint64_t a, uint64_t b)
+{
+ __asm__ ("mul %0\n\t"
+ : "=d" (*phigh), "=a" (*plow)
+ : "a" (a), "0" (b));
+}
+#define __HAVE_FAST_MULS64__
+static inline void muls64(uint64_t *plow, uint64_t *phigh,
+ int64_t a, int64_t b)
+{
+ __asm__ ("imul %0\n\t"
+ : "=d" (*phigh), "=a" (*plow)
+ : "a" (a), "0" (b));
+}
+#else
+void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
+void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
+#endif
+
+/* Binary search for leading zeros. */
+
+static inline int clz32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ if (val)
+ return __builtin_clz(val);
+ else
+ return 32;
+#else
+ int cnt = 0;
+
+ if (!(val & 0xFFFF0000U)) {
+ cnt += 16;
+ val <<= 16;
+ }
+ if (!(val & 0xFF000000U)) {
+ cnt += 8;
+ val <<= 8;
+ }
+ if (!(val & 0xF0000000U)) {
+ cnt += 4;
+ val <<= 4;
+ }
+ if (!(val & 0xC0000000U)) {
+ cnt += 2;
+ val <<= 2;
+ }
+ if (!(val & 0x80000000U)) {
+ cnt++;
+ val <<= 1;
+ }
+ if (!(val & 0x80000000U)) {
+ cnt++;
+ }
+ return cnt;
+#endif
+}
+
+static inline int clo32(uint32_t val)
+{
+ return clz32(~val);
+}
+
+static inline int clz64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ if (val)
+ return __builtin_clzll(val);
+ else
+ return 64;
+#else
+ int cnt = 0;
+
+ if (!(val >> 32)) {
+ cnt += 32;
+ } else {
+ val >>= 32;
+ }
+
+ return cnt + clz32(val);
+#endif
+}
+
+static inline int clo64(uint64_t val)
+{
+ return clz64(~val);
+}
+
+static inline int ctz32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ if (val)
+ return __builtin_ctz(val);
+ else
+ return 32;
+#else
+ int cnt;
+
+ cnt = 0;
+ if (!(val & 0x0000FFFFUL)) {
+ cnt += 16;
+ val >>= 16;
+ }
+ if (!(val & 0x000000FFUL)) {
+ cnt += 8;
+ val >>= 8;
+ }
+ if (!(val & 0x0000000FUL)) {
+ cnt += 4;
+ val >>= 4;
+ }
+ if (!(val & 0x00000003UL)) {
+ cnt += 2;
+ val >>= 2;
+ }
+ if (!(val & 0x00000001UL)) {
+ cnt++;
+ val >>= 1;
+ }
+ if (!(val & 0x00000001UL)) {
+ cnt++;
+ }
+
+ return cnt;
+#endif
+}
+
+static inline int cto32(uint32_t val)
+{
+ return ctz32(~val);
+}
+
+static inline int ctz64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ if (val)
+ return __builtin_ctzll(val);
+ else
+ return 64;
+#else
+ int cnt;
+
+ cnt = 0;
+ if (!((uint32_t)val)) {
+ cnt += 32;
+ val >>= 32;
+ }
+
+ return cnt + ctz32(val);
+#endif
+}
+
+static inline int cto64(uint64_t val)
+{
+ return ctz64(~val);
+}
+
+static inline int ctpop8(uint8_t val)
+{
+ val = (val & 0x55) + ((val >> 1) & 0x55);
+ val = (val & 0x33) + ((val >> 2) & 0x33);
+ val = (val & 0x0f) + ((val >> 4) & 0x0f);
+
+ return val;
+}
+
+static inline int ctpop16(uint16_t val)
+{
+ val = (val & 0x5555) + ((val >> 1) & 0x5555);
+ val = (val & 0x3333) + ((val >> 2) & 0x3333);
+ val = (val & 0x0f0f) + ((val >> 4) & 0x0f0f);
+ val = (val & 0x00ff) + ((val >> 8) & 0x00ff);
+
+ return val;
+}
+
+static inline int ctpop32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
+ val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
+ val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
+ val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
+ val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
+
+ return val;
+#endif
+}
+
+static inline int ctpop64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcountll(val);
+#else
+ val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL);
+ val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
+ val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 0x0f0f0f0f0f0f0f0fULL);
+ val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 0x00ff00ff00ff00ffULL);
+ val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 0x0000ffff0000ffffULL);
+ val = (val & 0x00000000ffffffffULL) + ((val >> 32) & 0x00000000ffffffffULL);
+
+ return val;
+#endif
+}
diff --git a/src/recompiler/hostregs_helper.h b/src/recompiler/hostregs_helper.h
new file mode 100644
index 00000000..c6c1ddb9
--- /dev/null
+++ b/src/recompiler/hostregs_helper.h
@@ -0,0 +1,70 @@
+/*
+ * Save/restore host registers.
+ *
+ * Copyright (c) 2007 CodeSourcery
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+/* The GCC global register variable extension is used to reserve some
+ host registers for use by generated code. However only the core parts of
+ the translation engine are compiled with these settings. We must manually
+ save/restore these registers when called from regular code.
+ It is not sufficient to save/restore T0 et. al. as these may be declared
+ with a datatype smaller than the actual register. */
+
+#if defined(DECLARE_HOST_REGS)
+
+#define DO_REG(REG) \
+ register host_reg_t reg_AREG##REG asm(AREG##REG); \
+ volatile host_reg_t saved_AREG##REG;
+
+#elif defined(SAVE_HOST_REGS)
+
+#define DO_REG(REG) \
+ __asm__ __volatile__ ("" : "=r" (reg_AREG##REG)); \
+ saved_AREG##REG = reg_AREG##REG;
+
+#else
+
+#define DO_REG(REG) \
+ reg_AREG##REG = saved_AREG##REG; \
+ __asm__ __volatile__ ("" : : "r" (reg_AREG##REG));
+
+#endif
+
+#ifdef AREG0
+DO_REG(0)
+#endif
+
+#ifdef AREG1
+DO_REG(1)
+#endif
+
+#ifdef AREG2
+DO_REG(2)
+#endif
+
+#undef SAVE_HOST_REGS
+#undef DECLARE_HOST_REGS
+#undef DO_REG
diff --git a/src/recompiler/ioport.h b/src/recompiler/ioport.h
new file mode 100644
index 00000000..8df11ce0
--- /dev/null
+++ b/src/recompiler/ioport.h
@@ -0,0 +1,71 @@
+/*
+ * defines ioport related functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+/**************************************************************************
+ * IO ports API
+ */
+
+#ifndef IOPORT_H
+#define IOPORT_H
+
+#include "qemu-common.h"
+
+typedef uint32_t pio_addr_t;
+#define FMT_pioaddr PRIx32
+
+#define MAX_IOPORTS (64 * 1024)
+#define IOPORTS_MASK (MAX_IOPORTS - 1)
+
+/* These should really be in isa.h, but are here to make pc.h happy. */
+typedef void (IOPortWriteFunc)(void *opaque, uint32_t address, uint32_t data);
+typedef uint32_t (IOPortReadFunc)(void *opaque, uint32_t address);
+
+int register_ioport_read(pio_addr_t start, int length, int size,
+ IOPortReadFunc *func, void *opaque);
+int register_ioport_write(pio_addr_t start, int length, int size,
+ IOPortWriteFunc *func, void *opaque);
+void isa_unassign_ioport(pio_addr_t start, int length);
+
+
+#ifndef VBOX
+void cpu_outb(pio_addr_t addr, uint8_t val);
+void cpu_outw(pio_addr_t addr, uint16_t val);
+void cpu_outl(pio_addr_t addr, uint32_t val);
+uint8_t cpu_inb(pio_addr_t addr);
+uint16_t cpu_inw(pio_addr_t addr);
+uint32_t cpu_inl(pio_addr_t addr);
+#else
+void cpu_outb(CPUX86State *env, pio_addr_t addr, uint8_t val);
+void cpu_outw(CPUX86State *env, pio_addr_t addr, uint16_t val);
+void cpu_outl(CPUX86State *env, pio_addr_t addr, uint32_t val);
+uint8_t cpu_inb(CPUX86State *env, pio_addr_t addr);
+uint16_t cpu_inw(CPUX86State *env, pio_addr_t addr);
+uint32_t cpu_inl(CPUX86State *env, pio_addr_t addr);
+#endif
+
+#endif /* IOPORT_H */
diff --git a/src/recompiler/osdep.h b/src/recompiler/osdep.h
new file mode 100644
index 00000000..b69d157f
--- /dev/null
+++ b/src/recompiler/osdep.h
@@ -0,0 +1,166 @@
+#ifndef QEMU_OSDEP_H
+#define QEMU_OSDEP_H
+
+#ifdef VBOX /** @todo clean up this, it's not fully synched. */
+
+#include <iprt/alloc.h>
+#ifndef RT_OS_WINDOWS
+# include <iprt/alloca.h>
+#endif
+#include <iprt/stdarg.h>
+#include <iprt/string.h>
+
+#include "config.h"
+
+#define VBOX_ONLY(x) x
+
+#define qemu_snprintf(pszBuf, cbBuf, ...) RTStrPrintf((pszBuf), (cbBuf), __VA_ARGS__)
+#define qemu_vsnprintf(pszBuf, cbBuf, pszFormat, args) \
+ RTStrPrintfV((pszBuf), (cbBuf), (pszFormat), (args))
+#define qemu_vprintf(pszFormat, args) \
+ RTLogPrintfV((pszFormat), (args))
+
+/**@todo the following macros belongs elsewhere */
+#define qemu_malloc(cb) RTMemAlloc(cb)
+#define qemu_mallocz(cb) RTMemAllocZ(cb)
+#define qemu_realloc(ptr, cb) RTMemRealloc(ptr, cb)
+#define qemu_free(pv) RTMemFree(pv)
+#define qemu_strdup(psz) RTStrDup(psz)
+
+/* Misc wrappers */
+#define fflush(file) RTLogFlush(NULL)
+#define printf(...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__))
+/* If DEBUG_TMP_LOGGING - goes to QEMU log file */
+#ifndef DEBUG_TMP_LOGGING
+# define fprintf(logfile, ...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__))
+#endif
+
+#define assert(cond) Assert(cond)
+
+#else /* !VBOX */
+
+#include <stdarg.h>
+#include <stddef.h>
+
+#define VBOX_ONLY(x) /* nike */
+#define qemu_snprintf snprintf /* bird */
+#define qemu_vsnprintf vsnprintf /* bird */
+#define qemu_vprintf vprintf /* bird */
+
+#endif /* !VBOX */
+
+#ifdef __OpenBSD__
+#include <sys/types.h>
+#include <sys/signal.h>
+#endif
+
+#ifndef VBOX
+#ifndef _WIN32
+#include <sys/time.h>
+#endif
+#endif /* !VBOX */
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+#endif
+
+#ifndef likely
+#if __GNUC__ < 3
+#define __builtin_expect(x, n) (x)
+#endif
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifdef CONFIG_NEED_OFFSETOF
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *) 0)->MEMBER)
+#endif
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *) 0)->member) *__mptr = (ptr); \
+ (type *) ((char *) __mptr - offsetof(type, member));})
+#endif
+
+/* Convert from a base type to a parent type, with compile time checking. */
+#ifdef __GNUC__
+#define DO_UPCAST(type, field, dev) ( __extension__ ( { \
+ char __attribute__((unused)) offset_must_be_zero[ \
+ -offsetof(type, field)]; \
+ container_of(dev, type, field);}))
+#else
+#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
+#endif
+
+#define typeof_field(type, field) typeof(((type *)0)->field)
+#define type_check(t1,t2) ((t1*)0 - (t2*)0)
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef always_inline
+#if !((__GNUC__ < 3) || defined(__APPLE__))
+#ifdef __OPTIMIZE__
+#define inline __attribute__ (( always_inline )) __inline__
+#endif
+#endif
+#else
+#define inline always_inline
+#endif
+
+#ifdef __i386__
+#define REGPARM __attribute((regparm(3)))
+#else
+#define REGPARM
+#endif
+
+#ifndef VBOX
+#define qemu_printf printf
+#else /*VBOX*/
+#define qemu_printf RTLogPrintf
+#endif /*VBOX*/
+
+#if defined (__GNUC__) && defined (__GNUC_MINOR__)
+# define QEMU_GNUC_PREREQ(maj, min) \
+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define QEMU_GNUC_PREREQ(maj, min) 0
+#endif
+
+#ifndef VBOX
+void *qemu_memalign(size_t alignment, size_t size);
+void *qemu_vmalloc(size_t size);
+void qemu_vfree(void *ptr);
+
+int qemu_create_pidfile(const char *filename);
+
+#ifdef _WIN32
+int ffs(int i);
+
+typedef struct {
+ long tv_sec;
+ long tv_usec;
+} qemu_timeval;
+int qemu_gettimeofday(qemu_timeval *tp);
+#else
+typedef struct timeval qemu_timeval;
+#define qemu_gettimeofday(tp) gettimeofday(tp, NULL);
+#endif /* !_WIN32 */
+#else /* VBOX */
+# define qemu_memalign(alignment, size) ( (alignment) <= PAGE_SIZE ? RTMemPageAlloc((size)) : NULL )
+# define qemu_vfree(pv) RTMemPageFree(pv, missing_size_parameter)
+# define qemu_vmalloc(cb) RTMemPageAlloc(cb)
+#endif /* VBOX */
+
+#endif
diff --git a/src/recompiler/qemu-barrier.h b/src/recompiler/qemu-barrier.h
new file mode 100644
index 00000000..b77fce23
--- /dev/null
+++ b/src/recompiler/qemu-barrier.h
@@ -0,0 +1,10 @@
+#ifndef __QEMU_BARRIER_H
+#define __QEMU_BARRIER_H 1
+
+/* FIXME: arch dependant, x86 version */
+#define smp_wmb() asm volatile("" ::: "memory")
+
+/* Compiler barrier */
+#define barrier() asm volatile("" ::: "memory")
+
+#endif
diff --git a/src/recompiler/qemu-common.h b/src/recompiler/qemu-common.h
new file mode 100644
index 00000000..ca6e3ce2
--- /dev/null
+++ b/src/recompiler/qemu-common.h
@@ -0,0 +1,348 @@
+/* Common header file that is included by all of qemu. */
+#ifndef QEMU_COMMON_H
+#define QEMU_COMMON_H
+
+#include "config-host.h"
+
+#ifdef VBOX
+
+# include <iprt/string.h>
+# include <iprt/types.h>
+# include <iprt/ctype.h>
+
+void pstrcpy(char *buf, int buf_size, const char *str);
+char *pstrcat(char *buf, int buf_size, const char *s);
+# define snprintf RTStrPrintf
+
+# define qemu_isalnum(c) RT_C_IS_ALNUM((unsigned char)(c))
+# define qemu_isalpha(c) RT_C_IS_ALPHA((unsigned char)(c))
+# define qemu_iscntrl(c) RT_C_IS_CNTRL((unsigned char)(c))
+# define qemu_isdigit(c) RT_C_IS_DIGIT((unsigned char)(c))
+# define qemu_isgraph(c) RT_C_IS_GRAPH((unsigned char)(c))
+# define qemu_islower(c) RT_C_IS_LOWER((unsigned char)(c))
+# define qemu_isprint(c) RT_C_IS_PRINT((unsigned char)(c))
+# define qemu_ispunct(c) RT_C_IS_PUNCT((unsigned char)(c))
+# define qemu_isspace(c) RT_C_IS_SPACE((unsigned char)(c))
+# define qemu_isupper(c) RT_C_IS_UPPER((unsigned char)(c))
+# define qemu_isxdigit(c) RT_C_IS_XDIGIT((unsigned char)(c))
+# define qemu_tolower(c) RT_C_TO_LOWER((unsigned char)(c))
+# define qemu_toupper(c) RT_C_TO_UPPER((unsigned char)(c))
+# define qemu_isascii(c) RT_C_IS_ASCII((unsigned char)(c))
+# define qemu_toascii(c) RT_C_TO_ASCII((unsigned char)(c))
+
+# define qemu_init_vcpu(env) do { } while (0) /* we don't need this :-) */
+
+# define QEMU_NORETURN __attribute__((__noreturn__))
+# ifdef CONFIG_GCC_ATTRIBUTE_WARN_UNUSED_RESULT
+# define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+# else
+# define QEMU_WARN_UNUSED_RESULT
+# endif
+#define QEMU_BUILD_BUG_ON(x) typedef char __build_bug_on__##__LINE__[(x)?-1:1];
+
+#include <stdio.h>
+#include "cpu.h"
+
+
+#else /* !VBOX */
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#define WINVER 0x0501 /* needed for ipv6 bits */
+#include <windows.h>
+#endif
+
+#define QEMU_NORETURN __attribute__ ((__noreturn__))
+#ifdef CONFIG_GCC_ATTRIBUTE_WARN_UNUSED_RESULT
+#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define QEMU_WARN_UNUSED_RESULT
+#endif
+
+#define QEMU_BUILD_BUG_ON(x) typedef char __build_bug_on__##__LINE__[(x)?-1:1];
+
+typedef struct QEMUTimer QEMUTimer;
+typedef struct QEMUFile QEMUFile;
+typedef struct QEMUBH QEMUBH;
+typedef struct DeviceState DeviceState;
+
+
+/* Hack around the mess dyngen-exec.h causes: We need QEMU_NORETURN in files that
+ cannot include the following headers without conflicts. This condition has
+ to be removed once dyngen is gone. */
+#ifndef __DYNGEN_EXEC_H__
+
+/* we put basic includes here to avoid repeating them in device drivers */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <strings.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <time.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <assert.h>
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 0
+#endif
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#ifndef ENOMEDIUM
+#define ENOMEDIUM ENODEV
+#endif
+#if !defined(ENOTSUP)
+#define ENOTSUP 4096
+#endif
+
+#ifndef CONFIG_IOVEC
+#define CONFIG_IOVEC
+struct iovec {
+ void *iov_base;
+ size_t iov_len;
+};
+/*
+ * Use the same value as Linux for now.
+ */
+#define IOV_MAX 1024
+#else
+#include <sys/uio.h>
+#endif
+
+#ifdef _WIN32
+#define fsync _commit
+#define lseek _lseeki64
+extern int qemu_ftruncate64(int, int64_t);
+#define ftruncate qemu_ftruncate64
+
+static inline char *realpath(const char *path, char *resolved_path)
+{
+ _fullpath(resolved_path, path, _MAX_PATH);
+ return resolved_path;
+}
+
+#define PRId64 "I64d"
+#define PRIx64 "I64x"
+#define PRIu64 "I64u"
+#define PRIo64 "I64o"
+#endif
+
+/* FIXME: Remove NEED_CPU_H. */
+#ifndef NEED_CPU_H
+
+#include <setjmp.h>
+#include "osdep.h"
+#include "bswap.h"
+
+#else
+
+#include "cpu.h"
+
+#endif /* !defined(NEED_CPU_H) */
+
+/* bottom halves */
+typedef void QEMUBHFunc(void *opaque);
+
+void async_context_push(void);
+void async_context_pop(void);
+int get_async_context_id(void);
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+void qemu_bh_schedule(QEMUBH *bh);
+/* Bottom halfs that are scheduled from a bottom half handler are instantly
+ * invoked. This can create an infinite loop if a bottom half handler
+ * schedules itself. qemu_bh_schedule_idle() avoids this infinite loop by
+ * ensuring that the bottom half isn't executed until the next main loop
+ * iteration.
+ */
+void qemu_bh_schedule_idle(QEMUBH *bh);
+void qemu_bh_cancel(QEMUBH *bh);
+void qemu_bh_delete(QEMUBH *bh);
+int qemu_bh_poll(void);
+void qemu_bh_update_timeout(int *timeout);
+
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c);
+
+void qemu_get_timedate(struct tm *tm, int offset);
+int qemu_timedate_diff(struct tm *tm);
+
+/* cutils.c */
+void pstrcpy(char *buf, int buf_size, const char *str);
+char *pstrcat(char *buf, int buf_size, const char *s);
+int strstart(const char *str, const char *val, const char **ptr);
+int stristart(const char *str, const char *val, const char **ptr);
+int qemu_strnlen(const char *s, int max_len);
+time_t mktimegm(struct tm *tm);
+int qemu_fls(int i);
+int qemu_fdatasync(int fd);
+int fcntl_setfl(int fd, int flag);
+
+/* path.c */
+void init_paths(const char *prefix);
+const char *path(const char *pathname);
+
+#define qemu_isalnum(c) isalnum((unsigned char)(c))
+#define qemu_isalpha(c) isalpha((unsigned char)(c))
+#define qemu_iscntrl(c) iscntrl((unsigned char)(c))
+#define qemu_isdigit(c) isdigit((unsigned char)(c))
+#define qemu_isgraph(c) isgraph((unsigned char)(c))
+#define qemu_islower(c) islower((unsigned char)(c))
+#define qemu_isprint(c) isprint((unsigned char)(c))
+#define qemu_ispunct(c) ispunct((unsigned char)(c))
+#define qemu_isspace(c) isspace((unsigned char)(c))
+#define qemu_isupper(c) isupper((unsigned char)(c))
+#define qemu_isxdigit(c) isxdigit((unsigned char)(c))
+#define qemu_tolower(c) tolower((unsigned char)(c))
+#define qemu_toupper(c) toupper((unsigned char)(c))
+#define qemu_isascii(c) isascii((unsigned char)(c))
+#define qemu_toascii(c) toascii((unsigned char)(c))
+
+void *qemu_malloc(size_t size);
+void *qemu_realloc(void *ptr, size_t size);
+void *qemu_mallocz(size_t size);
+void qemu_free(void *ptr);
+char *qemu_strdup(const char *str);
+char *qemu_strndup(const char *str, size_t size);
+
+void qemu_mutex_lock_iothread(void);
+void qemu_mutex_unlock_iothread(void);
+
+int qemu_open(const char *name, int flags, ...);
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+ QEMU_WARN_UNUSED_RESULT;
+void qemu_set_cloexec(int fd);
+
+#ifndef _WIN32
+int qemu_eventfd(int pipefd[2]);
+int qemu_pipe(int pipefd[2]);
+#endif
+
+/* Error handling. */
+
+void QEMU_NORETURN hw_error(const char *fmt, ...)
+ __attribute__ ((__format__ (__printf__, 1, 2)));
+
+/* IO callbacks. */
+typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
+typedef int IOCanReadHandler(void *opaque);
+typedef void IOHandler(void *opaque);
+
+struct ParallelIOArg {
+ void *buffer;
+ int count;
+};
+
+typedef int (*DMA_transfer_handler) (void *opaque, int nchan, int pos, int size);
+
+/* A load of opaque types so that device init declarations don't have to
+ pull in all the real definitions. */
+typedef struct NICInfo NICInfo;
+typedef struct HCIInfo HCIInfo;
+typedef struct AudioState AudioState;
+typedef struct BlockDriverState BlockDriverState;
+typedef struct DisplayState DisplayState;
+typedef struct DisplayChangeListener DisplayChangeListener;
+typedef struct DisplaySurface DisplaySurface;
+typedef struct DisplayAllocator DisplayAllocator;
+typedef struct PixelFormat PixelFormat;
+typedef struct TextConsole TextConsole;
+typedef TextConsole QEMUConsole;
+typedef struct CharDriverState CharDriverState;
+typedef struct MACAddr MACAddr;
+typedef struct VLANState VLANState;
+typedef struct VLANClientState VLANClientState;
+typedef struct i2c_bus i2c_bus;
+typedef struct i2c_slave i2c_slave;
+typedef struct SMBusDevice SMBusDevice;
+typedef struct PCIHostState PCIHostState;
+typedef struct PCIExpressHost PCIExpressHost;
+typedef struct PCIBus PCIBus;
+typedef struct PCIDevice PCIDevice;
+typedef struct SerialState SerialState;
+typedef struct IRQState *qemu_irq;
+typedef struct PCMCIACardState PCMCIACardState;
+typedef struct MouseTransformInfo MouseTransformInfo;
+typedef struct uWireSlave uWireSlave;
+typedef struct I2SCodec I2SCodec;
+typedef struct SSIBus SSIBus;
+typedef struct EventNotifier EventNotifier;
+typedef struct VirtIODevice VirtIODevice;
+
+typedef uint64_t pcibus_t;
+
+void cpu_exec_init_all(unsigned long tb_size);
+
+/* CPU save/load. */
+void cpu_save(QEMUFile *f, void *opaque);
+int cpu_load(QEMUFile *f, void *opaque, int version_id);
+
+/* Force QEMU to stop what it's doing and service IO */
+void qemu_service_io(void);
+
+/* Force QEMU to process pending events */
+void qemu_notify_event(void);
+
+/* Unblock cpu */
+void qemu_cpu_kick(void *env);
+int qemu_cpu_self(void *env);
+
+/* work queue */
+struct qemu_work_item {
+ struct qemu_work_item *next;
+ void (*func)(void *data);
+ void *data;
+ int done;
+};
+
+#ifdef CONFIG_USER_ONLY
+#define qemu_init_vcpu(env) do { } while (0)
+#else
+void qemu_init_vcpu(void *env);
+#endif
+
+typedef struct QEMUIOVector {
+ struct iovec *iov;
+ int niov;
+ int nalloc;
+ size_t size;
+} QEMUIOVector;
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
+void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size);
+void qemu_iovec_destroy(QEMUIOVector *qiov);
+void qemu_iovec_reset(QEMUIOVector *qiov);
+void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
+void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
+
+struct Monitor;
+typedef struct Monitor Monitor;
+
+/* Convert a byte between binary and BCD. */
+static inline uint8_t to_bcd(uint8_t val)
+{
+ return ((val / 10) << 4) | (val % 10);
+}
+
+static inline uint8_t from_bcd(uint8_t val)
+{
+ return ((val >> 4) * 10) + (val & 0x0f);
+}
+
+#include "module.h"
+
+#endif /* dyngen-exec.h hack */
+
+#endif /* !VBOX */
+
+#endif
diff --git a/src/recompiler/qemu-lock.h b/src/recompiler/qemu-lock.h
new file mode 100644
index 00000000..1234a5b6
--- /dev/null
+++ b/src/recompiler/qemu-lock.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+/* Locking primitives. Most of this code should be redundant -
+ system emulation doesn't need/use locking, NPTL userspace uses
+ pthread mutexes, and non-NPTL userspace isn't threadsafe anyway.
+ In either case a spinlock is probably the wrong kind of lock.
+ Spinlocks are only good if you know annother CPU has the lock and is
+ likely to release it soon. In environments where you have more threads
+ than physical CPUs (the extreme case being a single CPU host) a spinlock
+ simply wastes CPU until the OS decides to preempt it. */
+#if defined(CONFIG_USE_NPTL)
+
+#include <pthread.h>
+#define spin_lock pthread_mutex_lock
+#define spin_unlock pthread_mutex_unlock
+#define spinlock_t pthread_mutex_t
+#define SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
+
+#else
+
+#if defined(__hppa__)
+
+typedef int spinlock_t[4];
+
+#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 }
+
+static inline void resetlock (spinlock_t *p)
+{
+ (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1;
+}
+
+#else
+
+typedef int spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+
+static inline void resetlock (spinlock_t *p)
+{
+ *p = SPIN_LOCK_UNLOCKED;
+}
+
+#endif
+
+#ifdef VBOX
+DECLINLINE(int) testandset (int *p)
+{
+ return ASMAtomicCmpXchgU32((volatile uint32_t *)p, 1, 0) ? 0 : 1;
+}
+#elif defined(_ARCH_PPC)
+static inline int testandset (int *p)
+{
+ int ret;
+ __asm__ __volatile__ (
+ " lwarx %0,0,%1\n"
+ " xor. %0,%3,%0\n"
+ " bne $+12\n"
+ " stwcx. %2,0,%1\n"
+ " bne- $-16\n"
+ : "=&r" (ret)
+ : "r" (p), "r" (1), "r" (0)
+ : "cr0", "memory");
+ return ret;
+}
+#elif defined(__i386__)
+static inline int testandset (int *p)
+{
+ long int readval = 0;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+ : "+m" (*p), "+a" (readval)
+ : "r" (1)
+ : "cc");
+ return readval;
+}
+#elif defined(__x86_64__)
+static inline int testandset (int *p)
+{
+ long int64_t readval = 0;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+ : "+m" (*p), "+a" (readval)
+ : "r" (1)
+ : "cc");
+ return readval;
+}
+#elif defined(__s390__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
+ " jl 0b"
+ : "=&d" (ret)
+ : "r" (1), "a" (p), "0" (*p)
+ : "cc", "memory" );
+ return ret;
+}
+#elif defined(__alpha__)
+static inline int testandset (int *p)
+{
+ int ret;
+ unsigned long one;
+
+ __asm__ __volatile__ ("0: mov 1,%2\n"
+ " ldl_l %0,%1\n"
+ " stl_c %2,%1\n"
+ " beq %2,1f\n"
+ ".subsection 2\n"
+ "1: br 0b\n"
+ ".previous"
+ : "=r" (ret), "=m" (*p), "=r" (one)
+ : "m" (*p));
+ return ret;
+}
+#elif defined(__sparc__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__("ldstub [%1], %0"
+ : "=r" (ret)
+ : "r" (p)
+ : "memory");
+
+ return (ret ? 1 : 0);
+}
+#elif defined(__arm__)
+static inline int testandset (int *spinlock)
+{
+ register unsigned int ret;
+ __asm__ __volatile__("swp %0, %1, [%2]"
+ : "=r"(ret)
+ : "0"(1), "r"(spinlock));
+
+ return ret;
+}
+#elif defined(__mc68000)
+static inline int testandset (int *p)
+{
+ char ret;
+ __asm__ __volatile__("tas %1; sne %0"
+ : "=r" (ret)
+ : "m" (p)
+ : "cc","memory");
+ return ret;
+}
+#elif defined(__hppa__)
+
+/* Because malloc only guarantees 8-byte alignment for malloc'd data,
+ and GCC only guarantees 8-byte alignment for stack locals, we can't
+ be assured of 16-byte alignment for atomic lock data even if we
+ specify "__attribute ((aligned(16)))" in the type declaration. So,
+ we use a struct containing an array of four ints for the atomic lock
+ type and dynamically select the 16-byte aligned int from the array
+ for the semaphore. */
+#define __PA_LDCW_ALIGNMENT 16
+static inline void *ldcw_align (void *p) {
+ unsigned long a = (unsigned long)p;
+ a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1);
+ return (void *)a;
+}
+
+static inline int testandset (spinlock_t *p)
+{
+ unsigned int ret;
+ p = ldcw_align(p);
+ __asm__ __volatile__("ldcw 0(%1),%0"
+ : "=r" (ret)
+ : "r" (p)
+ : "memory" );
+ return !ret;
+}
+
+#elif defined(__ia64)
+
+#include <ia64intrin.h>
+
+static inline int testandset (int *p)
+{
+ return __sync_lock_test_and_set (p, 1);
+}
+#elif defined(__mips__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ " .set push \n"
+ " .set noat \n"
+ " .set mips2 \n"
+ "1: li $1, 1 \n"
+ " ll %0, %1 \n"
+ " sc $1, %1 \n"
+ " beqz $1, 1b \n"
+ " .set pop "
+ : "=r" (ret), "+R" (*p)
+ :
+ : "memory");
+
+ return ret;
+}
+#else
+#error unimplemented CPU support
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+static inline void spin_lock(spinlock_t *lock)
+{
+ while (testandset(lock));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ resetlock(lock);
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return !testandset(lock);
+}
+#else
+static inline void spin_lock(spinlock_t *lock)
+{
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return 1;
+}
+#endif
+
+#endif
diff --git a/src/recompiler/qemu-log.h b/src/recompiler/qemu-log.h
new file mode 100644
index 00000000..64d375b4
--- /dev/null
+++ b/src/recompiler/qemu-log.h
@@ -0,0 +1,135 @@
+#ifndef QEMU_LOG_H
+#define QEMU_LOG_H
+
+/* The deprecated global variables: */
+extern FILE *logfile;
+extern int loglevel;
+
+
+/*
+ * The new API:
+ *
+ */
+
+/* Log settings checking macros: */
+
+/* Returns true if qemu_log() will really write somewhere
+ */
+#ifndef VBOX
+#define qemu_log_enabled() (logfile != NULL)
+#else
+# define qemu_log_enabled() LogIsEnabled()
+#endif
+
+/* Returns true if a bit is set in the current loglevel mask
+ */
+#define qemu_loglevel_mask(b) ((loglevel & (b)) != 0)
+
+
+/* Logging functions: */
+
+/* main logging function
+ */
+#ifndef VBOX
+#define qemu_log(...) do { \
+ if (logfile) \
+ fprintf(logfile, ## __VA_ARGS__); \
+ } while (0)
+#else
+# define qemu_log(...) Log((__VA_ARGS__))
+#endif
+
+/* vfprintf-like logging function
+ */
+#ifndef VBOX
+#define qemu_log_vprintf(fmt, va) do { \
+ if (logfile) \
+ vfprintf(logfile, fmt, va); \
+ } while (0)
+#else
+# define qemu_log_vprintf(fmt, va) do { \
+ if (LogIsEnabled()) \
+ RTLogLoggerExV(RTLOGGRPFLAGS_LEVEL_1, LOG_GROUP, fmt, va); \
+ } while (0)
+#endif
+
+/* log only if a bit is set on the current loglevel mask
+ */
+#ifndef VBOX
+#define qemu_log_mask(b, ...) do { \
+ if (loglevel & (b)) \
+ fprintf(logfile, ## __VA_ARGS__); \
+ } while (0)
+#else
+# define qemu_log_mask(b, ...) do { \
+ if (loglevel & (b)) \
+ Log((__VA_ARGS__)); \
+ } while (0)
+#endif
+
+
+
+
+/* Special cases: */
+
+/* cpu_dump_state() logging functions: */
+#ifndef VBOX
+#define log_cpu_state(env, f) cpu_dump_state((env), logfile, fprintf, (f));
+#else
+#define log_cpu_state(env, f) cpu_dump_state((env), NULL, NULL, (f));
+#endif
+#define log_cpu_state_mask(b, env, f) do { \
+ if (loglevel & (b)) log_cpu_state((env), (f)); \
+ } while (0)
+
+/* disas() and target_disas() to logfile: */
+#define log_target_disas(start, len, flags) \
+ target_disas(logfile, (start), (len), (flags))
+#define log_disas(start, len) \
+ disas(logfile, (start), (len))
+
+/* page_dump() output to the log file: */
+#define log_page_dump() page_dump(logfile)
+
+
+
+/* Maintenance: */
+
+/* fflush() the log file */
+#ifndef VBOX
+#define qemu_log_flush() fflush(logfile)
+#else
+# define qemu_log_flush() RTLogFlush(NULL)
+#endif
+
+/* Close the log file */
+#ifndef VBOX
+#define qemu_log_close() do { \
+ fclose(logfile); \
+ logfile = NULL; \
+ } while (0)
+#else
+# define qemu_log_close() do { } while (0)
+#endif
+
+/* Set up a new log file */
+#ifndef VBOX
+#define qemu_log_set_file(f) do { \
+ logfile = (f); \
+ } while (0)
+#else
+# define qemu_log_set_file(f) do { } while (0)
+#endif
+
+/* Set up a new log file, only if none is set */
+#ifndef VBOX
+#define qemu_log_try_set_file(f) do { \
+ if (!logfile) \
+ logfile = (f); \
+ } while (0)
+#else
+#define qemu_log_try_set_file(f) do { } while (0)
+#endif
+
+
+#endif
diff --git a/src/recompiler/qemu-queue.h b/src/recompiler/qemu-queue.h
new file mode 100644
index 00000000..1d077458
--- /dev/null
+++ b/src/recompiler/qemu-queue.h
@@ -0,0 +1,449 @@
+/* $NetBSD: queue.h,v 1.52 2009/04/20 09:56:08 mschuett Exp $ */
+
+/*
+ * Qemu version: Copy from netbsd, removed debug code, removed some of
+ * the implementations. Left in lists, simple queues, tail queues and
+ * circular queues.
+ */
+
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ */
+
+#ifndef QEMU_SYS_QUEUE_H_
+#define QEMU_SYS_QUEUE_H_
+
+/*
+ * This file defines four types of data structures:
+ * lists, simple queues, tail queues, and circular queues.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A simple queue is headed by a pair of pointers, one the head of the
+ * list and the other to the tail of the list. The elements are singly
+ * linked to save space, so elements can only be removed from the
+ * head of the list. New elements can be added to the list after
+ * an existing element, at the head of the list, or at the end of the
+ * list. A simple queue may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * A circle queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the list.
+ * A circle queue may be traversed in either direction, but has a more
+ * complex end of list detection.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * List definitions.
+ */
+#define QLIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define QLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define QLIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+#define QLIST_INIT(head) do { \
+ (head)->lh_first = NULL; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_AFTER(listelm, elm, field) do { \
+ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
+ (listelm)->field.le_next->field.le_prev = \
+ &(elm)->field.le_next; \
+ (listelm)->field.le_next = (elm); \
+ (elm)->field.le_prev = &(listelm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ (elm)->field.le_next = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &(elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.le_next = (head)->lh_first) != NULL) \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+ (head)->lh_first = (elm); \
+ (elm)->field.le_prev = &(head)->lh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_REMOVE(elm, field) do { \
+ if ((elm)->field.le_next != NULL) \
+ (elm)->field.le_next->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = (elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_FOREACH(var, head, field) \
+ for ((var) = ((head)->lh_first); \
+ (var); \
+ (var) = ((var)->field.le_next))
+
+#define QLIST_FOREACH_SAFE(var, head, field, next_var) \
+ for ((var) = ((head)->lh_first); \
+ (var) && ((next_var) = ((var)->field.le_next), 1); \
+ (var) = (next_var))
+
+/*
+ * List access methods.
+ */
+#define QLIST_EMPTY(head) ((head)->lh_first == NULL)
+#define QLIST_FIRST(head) ((head)->lh_first)
+#define QLIST_NEXT(elm, field) ((elm)->field.le_next)
+
+
+/*
+ * Simple queue definitions.
+ */
+#define QSIMPLEQ_HEAD(name, type) \
+struct name { \
+ struct type *sqh_first; /* first element */ \
+ struct type **sqh_last; /* addr of last next element */ \
+}
+
+#define QSIMPLEQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).sqh_first }
+
+#define QSIMPLEQ_ENTRY(type) \
+struct { \
+ struct type *sqe_next; /* next element */ \
+}
+
+/*
+ * Simple queue functions.
+ */
+#define QSIMPLEQ_INIT(head) do { \
+ (head)->sqh_first = NULL; \
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (head)->sqh_first = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.sqe_next = NULL; \
+ *(head)->sqh_last = (elm); \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL) \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (listelm)->field.sqe_next = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE_HEAD(head, field) do { \
+ if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE(head, elm, type, field) do { \
+ if ((head)->sqh_first == (elm)) { \
+ QSIMPLEQ_REMOVE_HEAD((head), field); \
+ } else { \
+ struct type *curelm = (head)->sqh_first; \
+ while (curelm->field.sqe_next != (elm)) \
+ curelm = curelm->field.sqe_next; \
+ if ((curelm->field.sqe_next = \
+ curelm->field.sqe_next->field.sqe_next) == NULL) \
+ (head)->sqh_last = &(curelm)->field.sqe_next; \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->sqh_first); \
+ (var); \
+ (var) = ((var)->field.sqe_next))
+
+#define QSIMPLEQ_FOREACH_SAFE(var, head, field, next) \
+ for ((var) = ((head)->sqh_first); \
+ (var) && ((next = ((var)->field.sqe_next)), 1); \
+ (var) = (next))
+
+#define QSIMPLEQ_CONCAT(head1, head2) do { \
+ if (!QSIMPLEQ_EMPTY((head2))) { \
+ *(head1)->sqh_last = (head2)->sqh_first; \
+ (head1)->sqh_last = (head2)->sqh_last; \
+ QSIMPLEQ_INIT((head2)); \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_LAST(head, type, field) \
+ (QSIMPLEQ_EMPTY((head)) ? \
+ NULL : \
+ ((struct type *)(void *) \
+ ((char *)((head)->sqh_last) - offsetof(struct type, field))))
+
+/*
+ * Simple queue access methods.
+ */
+#define QSIMPLEQ_EMPTY(head) ((head)->sqh_first == NULL)
+#define QSIMPLEQ_FIRST(head) ((head)->sqh_first)
+#define QSIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
+
+
+/*
+ * Tail queue definitions.
+ */
+#define Q_TAILQ_HEAD(name, type, qual) \
+struct name { \
+ qual type *tqh_first; /* first element */ \
+ qual type *qual *tqh_last; /* addr of last next element */ \
+}
+#define QTAILQ_HEAD(name, type) Q_TAILQ_HEAD(name, struct type,)
+
+#define QTAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first }
+
+#define Q_TAILQ_ENTRY(type, qual) \
+struct { \
+ qual type *tqe_next; /* next element */ \
+ qual type *qual *tqe_prev; /* address of previous next element */\
+}
+#define QTAILQ_ENTRY(type) Q_TAILQ_ENTRY(struct type,)
+
+/*
+ * Tail queue functions.
+ */
+#define QTAILQ_INIT(head) do { \
+ (head)->tqh_first = NULL; \
+ (head)->tqh_last = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
+ (head)->tqh_first->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (head)->tqh_first = (elm); \
+ (elm)->field.tqe_prev = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.tqe_next = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+ (elm)->field.tqe_next->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (listelm)->field.tqe_next = (elm); \
+ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ (elm)->field.tqe_next = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_REMOVE(head, elm, field) do { \
+ if (((elm)->field.tqe_next) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->tqh_first); \
+ (var); \
+ (var) = ((var)->field.tqe_next))
+
+#define QTAILQ_FOREACH_SAFE(var, head, field, next_var) \
+ for ((var) = ((head)->tqh_first); \
+ (var) && ((next_var) = ((var)->field.tqe_next), 1); \
+ (var) = (next_var))
+
+#define QTAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last)); \
+ (var); \
+ (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last)))
+
+/*
+ * Tail queue access methods.
+ */
+#define QTAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#define QTAILQ_FIRST(head) ((head)->tqh_first)
+#define QTAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define QTAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define QTAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+
+/*
+ * Circular queue definitions.
+ */
+#define QCIRCLEQ_HEAD(name, type) \
+struct name { \
+ struct type *cqh_first; /* first element */ \
+ struct type *cqh_last; /* last element */ \
+}
+
+#define QCIRCLEQ_HEAD_INITIALIZER(head) \
+ { (void *)&head, (void *)&head }
+
+#define QCIRCLEQ_ENTRY(type) \
+struct { \
+ struct type *cqe_next; /* next element */ \
+ struct type *cqe_prev; /* previous element */ \
+}
+
+/*
+ * Circular queue functions.
+ */
+#define QCIRCLEQ_INIT(head) do { \
+ (head)->cqh_first = (void *)(head); \
+ (head)->cqh_last = (void *)(head); \
+} while (/*CONSTCOND*/0)
+
+#define QCIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ (elm)->field.cqe_next = (listelm)->field.cqe_next; \
+ (elm)->field.cqe_prev = (listelm); \
+ if ((listelm)->field.cqe_next == (void *)(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (listelm)->field.cqe_next->field.cqe_prev = (elm); \
+ (listelm)->field.cqe_next = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QCIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \
+ (elm)->field.cqe_next = (listelm); \
+ (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \
+ if ((listelm)->field.cqe_prev == (void *)(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (listelm)->field.cqe_prev->field.cqe_next = (elm); \
+ (listelm)->field.cqe_prev = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QCIRCLEQ_INSERT_HEAD(head, elm, field) do { \
+ (elm)->field.cqe_next = (head)->cqh_first; \
+ (elm)->field.cqe_prev = (void *)(head); \
+ if ((head)->cqh_last == (void *)(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (head)->cqh_first->field.cqe_prev = (elm); \
+ (head)->cqh_first = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QCIRCLEQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.cqe_next = (void *)(head); \
+ (elm)->field.cqe_prev = (head)->cqh_last; \
+ if ((head)->cqh_first == (void *)(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (head)->cqh_last->field.cqe_next = (elm); \
+ (head)->cqh_last = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QCIRCLEQ_REMOVE(head, elm, field) do { \
+ if ((elm)->field.cqe_next == (void *)(head)) \
+ (head)->cqh_last = (elm)->field.cqe_prev; \
+ else \
+ (elm)->field.cqe_next->field.cqe_prev = \
+ (elm)->field.cqe_prev; \
+ if ((elm)->field.cqe_prev == (void *)(head)) \
+ (head)->cqh_first = (elm)->field.cqe_next; \
+ else \
+ (elm)->field.cqe_prev->field.cqe_next = \
+ (elm)->field.cqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QCIRCLEQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->cqh_first); \
+ (var) != (const void *)(head); \
+ (var) = ((var)->field.cqe_next))
+
+#define QCIRCLEQ_FOREACH_REVERSE(var, head, field) \
+ for ((var) = ((head)->cqh_last); \
+ (var) != (const void *)(head); \
+ (var) = ((var)->field.cqe_prev))
+
+/*
+ * Circular queue access methods.
+ */
+#define QCIRCLEQ_EMPTY(head) ((head)->cqh_first == (void *)(head))
+#define QCIRCLEQ_FIRST(head) ((head)->cqh_first)
+#define QCIRCLEQ_LAST(head) ((head)->cqh_last)
+#define QCIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next)
+#define QCIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev)
+
+#define QCIRCLEQ_LOOP_NEXT(head, elm, field) \
+ (((elm)->field.cqe_next == (void *)(head)) \
+ ? ((head)->cqh_first) \
+ : (elm->field.cqe_next))
+#define QCIRCLEQ_LOOP_PREV(head, elm, field) \
+ (((elm)->field.cqe_prev == (void *)(head)) \
+ ? ((head)->cqh_last) \
+ : (elm->field.cqe_prev))
+
+#endif /* !QEMU_SYS_QUEUE_H_ */
diff --git a/src/recompiler/qemu-timer.h b/src/recompiler/qemu-timer.h
new file mode 100644
index 00000000..209c4d2c
--- /dev/null
+++ b/src/recompiler/qemu-timer.h
@@ -0,0 +1,272 @@
+#ifndef QEMU_TIMER_H
+#define QEMU_TIMER_H
+
+#include "qemu-common.h"
+
+/* timers */
+#ifndef VBOX
+
+typedef struct QEMUClock QEMUClock;
+typedef void QEMUTimerCB(void *opaque);
+
+/* The real time clock should be used only for stuff which does not
+ change the virtual machine state, as it is run even if the virtual
+ machine is stopped. The real time clock has a frequency of 1000
+ Hz. */
+extern QEMUClock *rt_clock;
+
+/* The virtual clock is only run during the emulation. It is stopped
+ when the virtual machine is stopped. Virtual timers use a high
+ precision clock, usually cpu cycles (use ticks_per_sec). */
+extern QEMUClock *vm_clock;
+
+/* The host clock should be use for device models that emulate accurate
+ real time sources. It will continue to run when the virtual machine
+ is suspended, and it will reflect system time changes the host may
+ undergo (e.g. due to NTP). The host clock has the same precision as
+ the virtual clock. */
+extern QEMUClock *host_clock;
+
+int64_t qemu_get_clock(QEMUClock *clock);
+int64_t qemu_get_clock_ns(QEMUClock *clock);
+void qemu_clock_enable(QEMUClock *clock, int enabled);
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque);
+void qemu_free_timer(QEMUTimer *ts);
+void qemu_del_timer(QEMUTimer *ts);
+void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
+int qemu_timer_pending(QEMUTimer *ts);
+int qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time);
+
+void qemu_run_all_timers(void);
+int qemu_alarm_pending(void);
+int64_t qemu_next_deadline(void);
+void configure_alarms(char const *opt);
+void configure_icount(const char *option);
+int qemu_calculate_timeout(void);
+void init_clocks(void);
+int init_timer_alarm(void);
+void quit_timers(void);
+
+static inline int64_t get_ticks_per_sec(void)
+{
+ return 1000000000LL;
+}
+
+
+void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
+void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
+
+/* ptimer.c */
+typedef struct ptimer_state ptimer_state;
+typedef void (*ptimer_cb)(void *opaque);
+
+ptimer_state *ptimer_init(QEMUBH *bh);
+void ptimer_set_period(ptimer_state *s, int64_t period);
+void ptimer_set_freq(ptimer_state *s, uint32_t freq);
+void ptimer_set_limit(ptimer_state *s, uint64_t limit, int reload);
+uint64_t ptimer_get_count(ptimer_state *s);
+void ptimer_set_count(ptimer_state *s, uint64_t count);
+void ptimer_run(ptimer_state *s, int oneshot);
+void ptimer_stop(ptimer_state *s);
+void qemu_put_ptimer(QEMUFile *f, ptimer_state *s);
+void qemu_get_ptimer(QEMUFile *f, ptimer_state *s);
+
+/* icount */
+int64_t qemu_icount_round(int64_t count);
+extern int64_t qemu_icount;
+#endif /* !VBOX */
+extern int use_icount;
+#ifndef VBOX
+extern int icount_time_shift;
+extern int64_t qemu_icount_bias;
+int64_t cpu_get_icount(void);
+
+/*******************************************/
+/* host CPU ticks (if available) */
+
+#if defined(_ARCH_PPC)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t retval;
+#ifdef _ARCH_PPC64
+ /* This reads timebase in one 64bit go and includes Cell workaround from:
+ http://ozlabs.org/pipermail/linuxppc-dev/2006-October/027052.html
+ */
+ __asm__ __volatile__ ("mftb %0\n\t"
+ "cmpwi %0,0\n\t"
+ "beq- $-8"
+ : "=r" (retval));
+#else
+ /* http://ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html */
+ unsigned long junk;
+ __asm__ __volatile__ ("mfspr %1,269\n\t" /* mftbu */
+ "mfspr %L0,268\n\t" /* mftb */
+ "mfspr %0,269\n\t" /* mftbu */
+ "cmpw %0,%1\n\t"
+ "bne $-16"
+ : "=r" (retval), "=r" (junk));
+#endif
+ return retval;
+}
+
+#elif defined(__i386__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("rdtsc" : "=A" (val));
+ return val;
+}
+
+#elif defined(__x86_64__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ uint32_t low,high;
+ int64_t val;
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+ val = high;
+ val <<= 32;
+ val |= low;
+ return val;
+}
+
+#elif defined(__hppa__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int val;
+ asm volatile ("mfctl %%cr16, %0" : "=r"(val));
+ return val;
+}
+
+#elif defined(__ia64)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("mov %0 = ar.itc" : "=r"(val) :: "memory");
+ return val;
+}
+
+#elif defined(__s390__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile("stck 0(%1)" : "=m" (val) : "a" (&val) : "cc");
+ return val;
+}
+
+#elif defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__)
+
+static inline int64_t cpu_get_real_ticks (void)
+{
+#if defined(_LP64)
+ uint64_t rval;
+ asm volatile("rd %%tick,%0" : "=r"(rval));
+ return rval;
+#else
+ union {
+ uint64_t i64;
+ struct {
+ uint32_t high;
+ uint32_t low;
+ } i32;
+ } rval;
+ asm volatile("rd %%tick,%1; srlx %1,32,%0"
+ : "=r"(rval.i32.high), "=r"(rval.i32.low));
+ return rval.i64;
+#endif
+}
+
+#elif defined(__mips__) && \
+ ((defined(__mips_isa_rev) && __mips_isa_rev >= 2) || defined(__linux__))
+/*
+ * binutils wants to use rdhwr only on mips32r2
+ * but as linux kernel emulate it, it's fine
+ * to use it.
+ *
+ */
+#define MIPS_RDHWR(rd, value) { \
+ __asm__ __volatile__ (".set push\n\t" \
+ ".set mips32r2\n\t" \
+ "rdhwr %0, "rd"\n\t" \
+ ".set pop" \
+ : "=r" (value)); \
+ }
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ /* On kernels >= 2.6.25 rdhwr <reg>, $2 and $3 are emulated */
+ uint32_t count;
+ static uint32_t cyc_per_count = 0;
+
+ if (!cyc_per_count) {
+ MIPS_RDHWR("$3", cyc_per_count);
+ }
+
+ MIPS_RDHWR("$2", count);
+ return (int64_t)(count * cyc_per_count);
+}
+
+#elif defined(__alpha__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ uint64_t cc;
+ uint32_t cur, ofs;
+
+ asm volatile("rpcc %0" : "=r"(cc));
+ cur = cc;
+ ofs = cc >> 32;
+ return cur - ofs;
+}
+
+#else
+/* The host CPU doesn't have an easily accessible cycle counter.
+ Just return a monotonically increasing value. This will be
+ totally wrong, but hopefully better than nothing. */
+static inline int64_t cpu_get_real_ticks (void)
+{
+ static int64_t ticks = 0;
+ return ticks++;
+}
+#endif
+
+#endif /* !VBOX */
+
+#ifdef NEED_CPU_H
+/* Deterministic execution requires that IO only be performed on the last
+ instruction of a TB so that interrupts take effect immediately. */
+static inline int can_do_io(CPUState *env)
+{
+ if (!use_icount)
+ return 1;
+
+ /* If not executing code then assume we are ok. */
+ if (!env->current_tb)
+ return 1;
+
+ return env->can_do_io != 0;
+}
+#endif
+
+#ifndef VBOX
+
+#ifdef CONFIG_PROFILER
+static inline int64_t profile_getclock(void)
+{
+ return cpu_get_real_ticks();
+}
+
+extern int64_t qemu_time, qemu_time_start;
+extern int64_t tlb_flush_time;
+extern int64_t dev_time;
+#endif
+
+#endif /* !VBOX */
+
+#endif
diff --git a/src/recompiler/softmmu_defs.h b/src/recompiler/softmmu_defs.h
new file mode 100644
index 00000000..e23c3498
--- /dev/null
+++ b/src/recompiler/softmmu_defs.h
@@ -0,0 +1,57 @@
+#ifndef SOFTMMU_DEFS_H
+#define SOFTMMU_DEFS_H
+
+#ifndef VBOX
+uint8_t REGPARM __ldb_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
+uint16_t REGPARM __ldw_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx);
+uint32_t REGPARM __ldl_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t REGPARM __ldq_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx);
+
+uint8_t REGPARM __ldb_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx);
+uint16_t REGPARM __ldw_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx);
+uint32_t REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
+#else /* VBOX */
+RTCCUINTREG REGPARM __ldb_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
+RTCCUINTREG REGPARM __ldw_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx);
+RTCCUINTREG REGPARM __ldl_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t REGPARM __ldq_mmu(target_ulong addr, int mmu_idx);
+void REGPARM __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx);
+
+RTCCUINTREG REGPARM __ldb_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx);
+RTCCUINTREG REGPARM __ldw_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx);
+RTCCUINTREG REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx);
+void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
+
+# ifdef REM_PHYS_ADDR_IN_TLB
+RTCCUINTREG REGPARM __ldb_vbox_phys(RTCCUINTREG addr);
+RTCCUINTREG REGPARM __ldub_vbox_phys(RTCCUINTREG addr);
+void REGPARM __stb_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val);
+RTCCUINTREG REGPARM __ldw_vbox_phys(RTCCUINTREG addr);
+RTCCUINTREG REGPARM __lduw_vbox_phys(RTCCUINTREG addr);
+void REGPARM __stw_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val);
+RTCCUINTREG REGPARM __ldl_vbox_phys(RTCCUINTREG addr);
+RTCCUINTREG REGPARM __ldul_vbox_phys(RTCCUINTREG addr);
+void REGPARM __stl_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val);
+uint64_t REGPARM __ldq_vbox_phys(RTCCUINTREG addr);
+void REGPARM __stq_vbox_phys(RTCCUINTREG addr, uint64_t val);
+# endif
+
+#endif /* VBOX */
+
+#endif
diff --git a/src/recompiler/softmmu_exec.h b/src/recompiler/softmmu_exec.h
new file mode 100644
index 00000000..28d1d53d
--- /dev/null
+++ b/src/recompiler/softmmu_exec.h
@@ -0,0 +1,153 @@
+/* Common softmmu definitions and inline routines. */
+
+/* XXX: find something cleaner.
+ * Furthermore, this is false for 64 bits targets
+ */
+#define ldul_user ldl_user
+#define ldul_kernel ldl_kernel
+#define ldul_hypv ldl_hypv
+#define ldul_executive ldl_executive
+#define ldul_supervisor ldl_supervisor
+
+#include "softmmu_defs.h"
+
+#define ACCESS_TYPE 0
+#define MEMSUFFIX MMU_MODE0_SUFFIX
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+
+#define ACCESS_TYPE 1
+#define MEMSUFFIX MMU_MODE1_SUFFIX
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+
+#if (NB_MMU_MODES >= 3)
+
+#define ACCESS_TYPE 2
+#define MEMSUFFIX MMU_MODE2_SUFFIX
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 3) */
+
+#if (NB_MMU_MODES >= 4)
+
+#define ACCESS_TYPE 3
+#define MEMSUFFIX MMU_MODE3_SUFFIX
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 4) */
+
+#if (NB_MMU_MODES >= 5)
+
+#define ACCESS_TYPE 4
+#define MEMSUFFIX MMU_MODE4_SUFFIX
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 5) */
+
+#if (NB_MMU_MODES >= 6)
+
+#define ACCESS_TYPE 5
+#define MEMSUFFIX MMU_MODE5_SUFFIX
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 6) */
+
+#if (NB_MMU_MODES > 6)
+#error "NB_MMU_MODES > 6 is not supported for now"
+#endif /* (NB_MMU_MODES > 6) */
+
+/* these access are slower, they must be as rare as possible */
+#define ACCESS_TYPE (NB_MMU_MODES)
+#define MEMSUFFIX _data
+#define DATA_SIZE 1
+#include "softmmu_header.h"
+
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+
+#define ldub(p) ldub_data(p)
+#define ldsb(p) ldsb_data(p)
+#define lduw(p) lduw_data(p)
+#define ldsw(p) ldsw_data(p)
+#define ldl(p) ldl_data(p)
+#define ldq(p) ldq_data(p)
+
+#define stb(p, v) stb_data(p, v)
+#define stw(p, v) stw_data(p, v)
+#define stl(p, v) stl_data(p, v)
+#define stq(p, v) stq_data(p, v)
diff --git a/src/recompiler/softmmu_header.h b/src/recompiler/softmmu_header.h
new file mode 100644
index 00000000..06e2466d
--- /dev/null
+++ b/src/recompiler/softmmu_header.h
@@ -0,0 +1,208 @@
+/*
+ * Software MMU support
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#if DATA_SIZE == 8
+#define SUFFIX q
+#define USUFFIX q
+#define DATA_TYPE uint64_t
+#elif DATA_SIZE == 4
+#define SUFFIX l
+#define USUFFIX l
+#define DATA_TYPE uint32_t
+#elif DATA_SIZE == 2
+#define SUFFIX w
+#define USUFFIX uw
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#elif DATA_SIZE == 1
+#define SUFFIX b
+#define USUFFIX ub
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#else
+#error unsupported data size
+#endif
+
+#if ACCESS_TYPE < (NB_MMU_MODES)
+
+#define CPU_MMU_INDEX ACCESS_TYPE
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == (NB_MMU_MODES)
+
+#define CPU_MMU_INDEX (cpu_mmu_index(env))
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == (NB_MMU_MODES + 1)
+
+#define CPU_MMU_INDEX (cpu_mmu_index(env))
+#define MMUSUFFIX _cmmu
+
+#else
+#error invalid ACCESS_TYPE
+#endif
+
+#if DATA_SIZE == 8
+#define RES_TYPE uint64_t
+#else
+#define RES_TYPE uint32_t
+#endif
+
+#if ACCESS_TYPE == (NB_MMU_MODES + 1)
+#define ADDR_READ addr_code
+#else
+#define ADDR_READ addr_read
+#endif
+
+/* generic load/store macros */
+
+static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+ int page_index;
+ RES_TYPE res;
+ target_ulong addr;
+ uintptr_t physaddr;
+ int mmu_idx;
+
+ addr = ptr;
+ page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ mmu_idx = CPU_MMU_INDEX;
+ if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
+ (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
+ res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+ } else {
+ physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+ res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
+ }
+ return res;
+}
+
+#if DATA_SIZE <= 2
+static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr)
+{
+ int res, page_index;
+ target_ulong addr;
+ uintptr_t physaddr;
+ int mmu_idx;
+
+ addr = ptr;
+ page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ mmu_idx = CPU_MMU_INDEX;
+ if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
+ (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
+ res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx);
+ } else {
+ physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+ res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
+ }
+ return res;
+}
+#endif
+
+#if ACCESS_TYPE != (NB_MMU_MODES + 1)
+
+/* generic store macro */
+
+static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v)
+{
+ int page_index;
+ target_ulong addr;
+ uintptr_t physaddr;
+ int mmu_idx;
+
+ addr = ptr;
+ page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ mmu_idx = CPU_MMU_INDEX;
+ if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
+ (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
+ glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, mmu_idx);
+ } else {
+ physaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
+ glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
+ }
+}
+
+#endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
+
+#if ACCESS_TYPE != (NB_MMU_MODES + 1)
+
+#if DATA_SIZE == 8
+static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr)
+{
+ union {
+ float64 d;
+ uint64_t i;
+ } u;
+ u.i = glue(ldq, MEMSUFFIX)(ptr);
+ return u.d;
+}
+
+static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v)
+{
+ union {
+ float64 d;
+ uint64_t i;
+ } u;
+ u.d = v;
+ glue(stq, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 8 */
+
+#if DATA_SIZE == 4
+static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.i = glue(ldl, MEMSUFFIX)(ptr);
+ return u.f;
+}
+
+static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.f = v;
+ glue(stl, MEMSUFFIX)(ptr, u.i);
+}
+#endif /* DATA_SIZE == 4 */
+
+#endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */
+
+#undef RES_TYPE
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef SUFFIX
+#undef USUFFIX
+#undef DATA_SIZE
+#undef CPU_MMU_INDEX
+#undef MMUSUFFIX
+#undef ADDR_READ
diff --git a/src/recompiler/softmmu_template.h b/src/recompiler/softmmu_template.h
new file mode 100644
index 00000000..a31411ba
--- /dev/null
+++ b/src/recompiler/softmmu_template.h
@@ -0,0 +1,366 @@
+/*
+ * Software MMU support
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include "qemu-timer.h"
+
+#define DATA_SIZE (1 << SHIFT)
+
+#if DATA_SIZE == 8
+#define SUFFIX q
+#define USUFFIX q
+#define DATA_TYPE uint64_t
+#ifdef VBOX
+# define DATA_TYPE_PROMOTED uint64_t
+#endif
+#elif DATA_SIZE == 4
+#define SUFFIX l
+#define USUFFIX l
+#define DATA_TYPE uint32_t
+#ifdef VBOX
+# define DATA_TYPE_PROMOTED RTCCUINTREG
+#endif
+#elif DATA_SIZE == 2
+#define SUFFIX w
+#define USUFFIX uw
+#define DATA_TYPE uint16_t
+#ifdef VBOX
+# define DATA_TYPE_PROMOTED RTCCUINTREG
+#endif
+#elif DATA_SIZE == 1
+#define SUFFIX b
+#define USUFFIX ub
+#define DATA_TYPE uint8_t
+#ifdef VBOX
+# define DATA_TYPE_PROMOTED RTCCUINTREG
+#endif
+#else
+#error unsupported data size
+#endif
+
+#ifdef SOFTMMU_CODE_ACCESS
+#define READ_ACCESS_TYPE 2
+#define ADDR_READ addr_code
+#else
+#define READ_ACCESS_TYPE 0
+#define ADDR_READ addr_read
+#endif
+
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ int mmu_idx,
+ void *retaddr);
+static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
+ target_ulong addr,
+ void *retaddr)
+{
+ DATA_TYPE res;
+ int index;
+ index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+ env->mem_io_pc = (uintptr_t)retaddr;
+ if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
+ && !can_do_io(env)) {
+ cpu_io_recompile(env, retaddr);
+ }
+
+ env->mem_io_vaddr = addr;
+#if SHIFT <= 2
+ res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
+#else
+#ifdef TARGET_WORDS_BIGENDIAN
+ res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32;
+ res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4);
+#else
+ res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
+ res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
+#endif
+#endif /* SHIFT > 2 */
+ return res;
+}
+
+/* handle all cases except unaligned access which span two pages */
+#ifndef VBOX
+DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ int mmu_idx)
+#else
+/* Load helpers invoked from generated code, and TCG makes an assumption
+ that valid value takes the whole register, why gcc after 4.3 may
+ use only lower part of register for smaller types. So force promotion. */
+DATA_TYPE_PROMOTED REGPARM
+glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ int mmu_idx)
+#endif
+{
+ DATA_TYPE res;
+ int index;
+ target_ulong tlb_addr;
+ target_phys_addr_t ioaddr;
+ uintptr_t addend;
+ void *retaddr;
+
+ /* test if there is match for unaligned or IO access */
+ /* XXX: could done more in memory macro in a non portable way */
+ index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+ tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+ if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (tlb_addr & ~TARGET_PAGE_MASK) {
+ /* IO access */
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ goto do_unaligned_access;
+ retaddr = GETPC();
+ ioaddr = env->iotlb[mmu_idx][index];
+ res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+ } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+ /* slow unaligned access (it spans two pages or IO) */
+ do_unaligned_access:
+ retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+ do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+#endif
+ res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
+ mmu_idx, retaddr);
+ } else {
+ /* unaligned/aligned access in the same page */
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0) {
+ retaddr = GETPC();
+ do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ }
+#endif
+ addend = env->tlb_table[mmu_idx][index].addend;
+ res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend));
+ }
+ } else {
+ /* the page is not in the TLB : fill it */
+ retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+#endif
+ tlb_fill(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ goto redo;
+ }
+ return res;
+}
+
+/* handle all unaligned cases */
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ int mmu_idx,
+ void *retaddr)
+{
+ DATA_TYPE res, res1, res2;
+ int index, shift;
+ target_phys_addr_t ioaddr;
+ uintptr_t addend;
+ target_ulong tlb_addr, addr1, addr2;
+
+ index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+ tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+ if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (tlb_addr & ~TARGET_PAGE_MASK) {
+ /* IO access */
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ goto do_unaligned_access;
+ ioaddr = env->iotlb[mmu_idx][index];
+ res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+ } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+ do_unaligned_access:
+ /* slow unaligned access (it spans two pages) */
+ addr1 = addr & ~(DATA_SIZE - 1);
+ addr2 = addr1 + DATA_SIZE;
+ res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1,
+ mmu_idx, retaddr);
+ res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2,
+ mmu_idx, retaddr);
+ shift = (addr & (DATA_SIZE - 1)) * 8;
+#ifdef TARGET_WORDS_BIGENDIAN
+ res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
+#else
+ res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
+#endif
+ res = (DATA_TYPE)res;
+ } else {
+ /* unaligned/aligned access in the same page */
+ addend = env->tlb_table[mmu_idx][index].addend;
+ res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend));
+ }
+ } else {
+ /* the page is not in the TLB : fill it */
+ tlb_fill(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ goto redo;
+ }
+ return res;
+}
+
+#ifndef SOFTMMU_CODE_ACCESS
+
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ DATA_TYPE val,
+ int mmu_idx,
+ void *retaddr);
+
+static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
+ DATA_TYPE val,
+ target_ulong addr,
+ void *retaddr)
+{
+ int index;
+ index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+ if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
+ && !can_do_io(env)) {
+ cpu_io_recompile(env, retaddr);
+ }
+
+ env->mem_io_vaddr = addr;
+ env->mem_io_pc = (uintptr_t)retaddr;
+#if SHIFT <= 2
+ io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
+#else
+#ifdef TARGET_WORDS_BIGENDIAN
+ io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32);
+ io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val);
+#else
+ io_mem_write[index][2](io_mem_opaque[index], physaddr, val);
+ io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
+#endif
+#endif /* SHIFT > 2 */
+}
+
+void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ DATA_TYPE val,
+ int mmu_idx)
+{
+ target_phys_addr_t ioaddr;
+ uintptr_t addend;
+ target_ulong tlb_addr;
+ void *retaddr;
+ int index;
+
+ index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+ tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (tlb_addr & ~TARGET_PAGE_MASK) {
+ /* IO access */
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ goto do_unaligned_access;
+ retaddr = GETPC();
+ ioaddr = env->iotlb[mmu_idx][index];
+ glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+ } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+ do_unaligned_access:
+ retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+ do_unaligned_access(addr, 1, mmu_idx, retaddr);
+#endif
+ glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+ mmu_idx, retaddr);
+ } else {
+ /* aligned/unaligned access in the same page */
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0) {
+ retaddr = GETPC();
+ do_unaligned_access(addr, 1, mmu_idx, retaddr);
+ }
+#endif
+ addend = env->tlb_table[mmu_idx][index].addend;
+ glue(glue(st, SUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend), val);
+ }
+ } else {
+ /* the page is not in the TLB : fill it */
+ retaddr = GETPC();
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ do_unaligned_access(addr, 1, mmu_idx, retaddr);
+#endif
+ tlb_fill(addr, 1, mmu_idx, retaddr);
+ goto redo;
+ }
+}
+
+/* handles all unaligned cases */
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ DATA_TYPE val,
+ int mmu_idx,
+ void *retaddr)
+{
+ target_phys_addr_t ioaddr;
+ uintptr_t addend;
+ target_ulong tlb_addr;
+ int index, i;
+
+ index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+ tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (tlb_addr & ~TARGET_PAGE_MASK) {
+ /* IO access */
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ goto do_unaligned_access;
+ ioaddr = env->iotlb[mmu_idx][index];
+ glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+ } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+ do_unaligned_access:
+ /* XXX: not efficient, but simple */
+ /* Note: relies on the fact that tlb_fill() does not remove the
+ * previous page from the TLB cache. */
+ for(i = DATA_SIZE - 1; i >= 0; i--) {
+#ifdef TARGET_WORDS_BIGENDIAN
+ glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
+ mmu_idx, retaddr);
+#else
+ glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8),
+ mmu_idx, retaddr);
+#endif
+ }
+ } else {
+ /* aligned/unaligned access in the same page */
+ addend = env->tlb_table[mmu_idx][index].addend;
+ glue(glue(st, SUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend), val);
+ }
+ } else {
+ /* the page is not in the TLB : fill it */
+ tlb_fill(addr, 1, mmu_idx, retaddr);
+ goto redo;
+ }
+}
+
+#endif /* !defined(SOFTMMU_CODE_ACCESS) */
+
+#ifdef VBOX
+# undef DATA_TYPE_PROMOTED
+#endif
+#undef READ_ACCESS_TYPE
+#undef SHIFT
+#undef DATA_TYPE
+#undef SUFFIX
+#undef USUFFIX
+#undef DATA_SIZE
+#undef ADDR_READ
diff --git a/src/recompiler/target-i386/Makefile.kup b/src/recompiler/target-i386/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/recompiler/target-i386/Makefile.kup
diff --git a/src/recompiler/target-i386/TODO b/src/recompiler/target-i386/TODO
new file mode 100644
index 00000000..8dfd4633
--- /dev/null
+++ b/src/recompiler/target-i386/TODO
@@ -0,0 +1,32 @@
+Correctness issues:
+
+- some eflags manipulation incorrectly reset the bit 0x2.
+- SVM: test, cpu save/restore, SMM save/restore.
+- x86_64: lcall/ljmp intel/amd differences ?
+- better code fetch (different exception handling + CS.limit support)
+- user/kernel PUSHL/POPL in helper.c
+- add missing cpuid tests
+- return UD exception if LOCK prefix incorrectly used
+- test ldt limit < 7 ?
+- fix some 16 bit sp push/pop overflow (pusha/popa, lcall lret)
+- full support of segment limit/rights
+- full x87 exception support
+- improve x87 bit exactness (use bochs code ?)
+- DRx register support
+- CR0.AC emulation
+- SSE alignment checks
+- fix SSE min/max with nans
+
+Optimizations/Features:
+
+- add SVM nested paging support
+- add VMX support
+- add AVX support
+- add SSE5 support
+- fxsave/fxrstor AMD extensions
+- improve monitor/mwait support
+- faster EFLAGS update: consider SZAP, C, O can be updated separately
+ with a bit field in CC_OP and more state variables.
+- evaluate x87 stack pointer statically
+- find a way to avoid translating several time the same TB if CR0.TS
+ is set or not.
diff --git a/src/recompiler/target-i386/cpu.h b/src/recompiler/target-i386/cpu.h
new file mode 100644
index 00000000..c643db8d
--- /dev/null
+++ b/src/recompiler/target-i386/cpu.h
@@ -0,0 +1,1215 @@
+/*
+ * i386 virtual CPU header
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#ifndef CPU_I386_H
+#define CPU_I386_H
+
+#include "config.h"
+
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+/* target supports implicit self modifying code */
+#define TARGET_HAS_SMC
+/* support for self modifying code even if the modified instruction is
+ close to the modifying instruction */
+#define TARGET_HAS_PRECISE_SMC
+
+#define TARGET_HAS_ICE 1
+
+#ifdef TARGET_X86_64
+#define ELF_MACHINE EM_X86_64
+#else
+#define ELF_MACHINE EM_386
+#endif
+
+#define CPUState struct CPUX86State
+
+#include "cpu-defs.h"
+
+#include "softfloat.h"
+
+#ifdef VBOX
+# include <iprt/critsect.h>
+# include <iprt/thread.h>
+# include <iprt/assert.h>
+# include <iprt/asm.h>
+# include <VBox/vmm/vmm.h>
+# include <VBox/vmm/stam.h>
+# include <VBox/vmm/cpumctx.h>
+# undef MSR_IA32_APICBASE_BSP
+#endif /* VBOX */
+
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL 0
+#define R_CL 1
+#define R_DL 2
+#define R_BL 3
+#define R_AH 4
+#define R_CH 5
+#define R_DH 6
+#define R_BH 7
+
+#define R_ES 0
+#define R_CS 1
+#define R_SS 2
+#define R_DS 3
+#define R_FS 4
+#define R_GS 5
+
+/* segment descriptor fields */
+#define DESC_G_MASK (1 << 23)
+#define DESC_B_SHIFT 22
+#define DESC_B_MASK (1 << DESC_B_SHIFT)
+#define DESC_L_SHIFT 21 /* x86_64 only : 64 bit code segment */
+#define DESC_L_MASK (1 << DESC_L_SHIFT)
+#define DESC_AVL_MASK (1 << 20)
+#define DESC_P_MASK (1 << 15)
+#define DESC_DPL_SHIFT 13
+#define DESC_DPL_MASK (3 << DESC_DPL_SHIFT)
+#define DESC_S_MASK (1 << 12)
+#define DESC_TYPE_SHIFT 8
+#define DESC_TYPE_MASK (15 << DESC_TYPE_SHIFT)
+#define DESC_A_MASK (1 << 8)
+
+#define DESC_CS_MASK (1 << 11) /* 1=code segment 0=data segment */
+#define DESC_C_MASK (1 << 10) /* code: conforming */
+#define DESC_R_MASK (1 << 9) /* code: readable */
+
+#define DESC_E_MASK (1 << 10) /* data: expansion direction */
+#define DESC_W_MASK (1 << 9) /* data: writable */
+
+#define DESC_TSS_BUSY_MASK (1 << 9)
+#ifdef VBOX
+# define DESC_INTEL_UNUSABLE RT_BIT_32(16+8) /**< Internal VT-x bit for NULL sectors. */
+# define DESC_RAW_FLAG_BITS UINT32_C(0x00ffffff) /**< Flag bits we load from the descriptor. */
+#endif
+
+/* eflags masks */
+#define CC_C 0x0001
+#define CC_P 0x0004
+#define CC_A 0x0010
+#define CC_Z 0x0040
+#define CC_S 0x0080
+#define CC_O 0x0800
+
+#define TF_SHIFT 8
+#define IOPL_SHIFT 12
+#define VM_SHIFT 17
+
+#define TF_MASK 0x00000100
+#define IF_MASK 0x00000200
+#define DF_MASK 0x00000400
+#define IOPL_MASK 0x00003000
+#define NT_MASK 0x00004000
+#define RF_MASK 0x00010000
+#define VM_MASK 0x00020000
+#define AC_MASK 0x00040000
+#define VIF_MASK 0x00080000
+#define VIP_MASK 0x00100000
+#define ID_MASK 0x00200000
+
+/* hidden flags - used internally by qemu to represent additional cpu
+ states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
+ redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit
+ position to ease oring with eflags. */
+/* current cpl */
+#define HF_CPL_SHIFT 0
+/* true if soft mmu is being used */
+#define HF_SOFTMMU_SHIFT 2
+/* true if hardware interrupts must be disabled for next instruction */
+#define HF_INHIBIT_IRQ_SHIFT 3
+/* 16 or 32 segments */
+#define HF_CS32_SHIFT 4
+#define HF_SS32_SHIFT 5
+/* zero base for DS, ES and SS : can be '0' only in 32 bit CS segment */
+#define HF_ADDSEG_SHIFT 6
+/* copy of CR0.PE (protected mode) */
+#define HF_PE_SHIFT 7
+#define HF_TF_SHIFT 8 /* must be same as eflags */
+#define HF_MP_SHIFT 9 /* the order must be MP, EM, TS */
+#define HF_EM_SHIFT 10
+#define HF_TS_SHIFT 11
+#define HF_IOPL_SHIFT 12 /* must be same as eflags */
+#define HF_LMA_SHIFT 14 /* only used on x86_64: long mode active */
+#define HF_CS64_SHIFT 15 /* only used on x86_64: 64 bit code segment */
+#define HF_RF_SHIFT 16 /* must be same as eflags */
+#define HF_VM_SHIFT 17 /* must be same as eflags */
+#define HF_SMM_SHIFT 19 /* CPU in SMM mode */
+#define HF_SVME_SHIFT 20 /* SVME enabled (copy of EFER.SVME) */
+#define HF_SVMI_SHIFT 21 /* SVM intercepts are active */
+#define HF_OSFXSR_SHIFT 22 /* CR4.OSFXSR */
+
+#define HF_CPL_MASK (3 << HF_CPL_SHIFT)
+#define HF_SOFTMMU_MASK (1 << HF_SOFTMMU_SHIFT)
+#define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT)
+#define HF_CS32_MASK (1 << HF_CS32_SHIFT)
+#define HF_SS32_MASK (1 << HF_SS32_SHIFT)
+#define HF_ADDSEG_MASK (1 << HF_ADDSEG_SHIFT)
+#define HF_PE_MASK (1 << HF_PE_SHIFT)
+#define HF_TF_MASK (1 << HF_TF_SHIFT)
+#define HF_MP_MASK (1 << HF_MP_SHIFT)
+#define HF_EM_MASK (1 << HF_EM_SHIFT)
+#define HF_TS_MASK (1 << HF_TS_SHIFT)
+#define HF_IOPL_MASK (3 << HF_IOPL_SHIFT)
+#define HF_LMA_MASK (1 << HF_LMA_SHIFT)
+#define HF_CS64_MASK (1 << HF_CS64_SHIFT)
+#define HF_RF_MASK (1 << HF_RF_SHIFT)
+#define HF_VM_MASK (1 << HF_VM_SHIFT)
+#define HF_SMM_MASK (1 << HF_SMM_SHIFT)
+#define HF_SVME_MASK (1 << HF_SVME_SHIFT)
+#define HF_SVMI_MASK (1 << HF_SVMI_SHIFT)
+#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT)
+
+/* hflags2 */
+
+#define HF2_GIF_SHIFT 0 /* if set CPU takes interrupts */
+#define HF2_HIF_SHIFT 1 /* value of IF_MASK when entering SVM */
+#define HF2_NMI_SHIFT 2 /* CPU serving NMI */
+#define HF2_VINTR_SHIFT 3 /* value of V_INTR_MASKING bit */
+
+#define HF2_GIF_MASK (1 << HF2_GIF_SHIFT)
+#define HF2_HIF_MASK (1 << HF2_HIF_SHIFT)
+#define HF2_NMI_MASK (1 << HF2_NMI_SHIFT)
+#define HF2_VINTR_MASK (1 << HF2_VINTR_SHIFT)
+
+#define CR0_PE_SHIFT 0
+#define CR0_MP_SHIFT 1
+
+#define CR0_PE_MASK (1 << 0)
+#define CR0_MP_MASK (1 << 1)
+#define CR0_EM_MASK (1 << 2)
+#define CR0_TS_MASK (1 << 3)
+#define CR0_ET_MASK (1 << 4)
+#define CR0_NE_MASK (1 << 5)
+#define CR0_WP_MASK (1 << 16)
+#define CR0_AM_MASK (1 << 18)
+#define CR0_PG_MASK (1U << 31)
+
+#define CR4_VME_MASK (1 << 0)
+#define CR4_PVI_MASK (1 << 1)
+#define CR4_TSD_MASK (1 << 2)
+#define CR4_DE_MASK (1 << 3)
+#define CR4_PSE_MASK (1 << 4)
+#define CR4_PAE_MASK (1 << 5)
+#define CR4_MCE_MASK (1 << 6)
+#define CR4_PGE_MASK (1 << 7)
+#define CR4_PCE_MASK (1 << 8)
+#define CR4_OSFXSR_SHIFT 9
+#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
+#define CR4_OSXMMEXCPT_MASK (1 << 10)
+
+#define DR6_BD (1 << 13)
+#define DR6_BS (1 << 14)
+#define DR6_BT (1 << 15)
+#define DR6_FIXED_1 0xffff0ff0
+
+#define DR7_GD (1 << 13)
+#define DR7_TYPE_SHIFT 16
+#define DR7_LEN_SHIFT 18
+#define DR7_FIXED_1 0x00000400
+
+#define PG_PRESENT_BIT 0
+#define PG_RW_BIT 1
+#define PG_USER_BIT 2
+#define PG_PWT_BIT 3
+#define PG_PCD_BIT 4
+#define PG_ACCESSED_BIT 5
+#define PG_DIRTY_BIT 6
+#define PG_PSE_BIT 7
+#define PG_GLOBAL_BIT 8
+#define PG_NX_BIT 63
+
+#define PG_PRESENT_MASK (1 << PG_PRESENT_BIT)
+#define PG_RW_MASK (1 << PG_RW_BIT)
+#define PG_USER_MASK (1 << PG_USER_BIT)
+#define PG_PWT_MASK (1 << PG_PWT_BIT)
+#define PG_PCD_MASK (1 << PG_PCD_BIT)
+#define PG_ACCESSED_MASK (1 << PG_ACCESSED_BIT)
+#define PG_DIRTY_MASK (1 << PG_DIRTY_BIT)
+#define PG_PSE_MASK (1 << PG_PSE_BIT)
+#define PG_GLOBAL_MASK (1 << PG_GLOBAL_BIT)
+#define PG_NX_MASK (1LL << PG_NX_BIT)
+
+#define PG_ERROR_W_BIT 1
+
+#define PG_ERROR_P_MASK 0x01
+#define PG_ERROR_W_MASK (1 << PG_ERROR_W_BIT)
+#define PG_ERROR_U_MASK 0x04
+#define PG_ERROR_RSVD_MASK 0x08
+#define PG_ERROR_I_D_MASK 0x10
+
+#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
+
+#define MCE_CAP_DEF MCG_CTL_P
+#define MCE_BANKS_DEF 10
+
+#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+
+#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
+#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
+#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+
+#define MSR_IA32_TSC 0x10
+#define MSR_IA32_APICBASE 0x1b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_MTRRcap 0xfe
+#define MSR_MTRRcap_VCNT 8
+#define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8)
+#define MSR_MTRRcap_WC_SUPPORTED (1 << 10)
+
+#define MSR_IA32_SYSENTER_CS 0x174
+#define MSR_IA32_SYSENTER_ESP 0x175
+#define MSR_IA32_SYSENTER_EIP 0x176
+
+#define MSR_MCG_CAP 0x179
+#define MSR_MCG_STATUS 0x17a
+#define MSR_MCG_CTL 0x17b
+
+#define MSR_IA32_PERF_STATUS 0x198
+
+#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
+#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
+
+#define MSR_MTRRfix64K_00000 0x250
+#define MSR_MTRRfix16K_80000 0x258
+#define MSR_MTRRfix16K_A0000 0x259
+#define MSR_MTRRfix4K_C0000 0x268
+#define MSR_MTRRfix4K_C8000 0x269
+#define MSR_MTRRfix4K_D0000 0x26a
+#define MSR_MTRRfix4K_D8000 0x26b
+#define MSR_MTRRfix4K_E0000 0x26c
+#define MSR_MTRRfix4K_E8000 0x26d
+#define MSR_MTRRfix4K_F0000 0x26e
+#define MSR_MTRRfix4K_F8000 0x26f
+
+#define MSR_PAT 0x277
+
+#define MSR_MTRRdefType 0x2ff
+
+#define MSR_MC0_CTL 0x400
+#define MSR_MC0_STATUS 0x401
+#define MSR_MC0_ADDR 0x402
+#define MSR_MC0_MISC 0x403
+
+#define MSR_EFER 0xc0000080
+
+#define MSR_EFER_SCE (1 << 0)
+#define MSR_EFER_LME (1 << 8)
+#define MSR_EFER_LMA (1 << 10)
+#define MSR_EFER_NXE (1 << 11)
+#define MSR_EFER_SVME (1 << 12)
+#define MSR_EFER_FFXSR (1 << 14)
+
+#ifdef VBOX
+# define MSR_APIC_RANGE_START 0x800
+# define MSR_APIC_RANGE_END 0x900
+#endif
+
+#define MSR_STAR 0xc0000081
+#define MSR_LSTAR 0xc0000082
+#define MSR_CSTAR 0xc0000083
+#define MSR_FMASK 0xc0000084
+#define MSR_FSBASE 0xc0000100
+#define MSR_GSBASE 0xc0000101
+#define MSR_KERNELGSBASE 0xc0000102
+#define MSR_TSC_AUX 0xc0000103
+
+#define MSR_VM_HSAVE_PA 0xc0010117
+
+/* cpuid_features bits */
+#define CPUID_FP87 (1 << 0)
+#define CPUID_VME (1 << 1)
+#define CPUID_DE (1 << 2)
+#define CPUID_PSE (1 << 3)
+#define CPUID_TSC (1 << 4)
+#define CPUID_MSR (1 << 5)
+#define CPUID_PAE (1 << 6)
+#define CPUID_MCE (1 << 7)
+#define CPUID_CX8 (1 << 8)
+#define CPUID_APIC (1 << 9)
+#define CPUID_SEP (1 << 11) /* sysenter/sysexit */
+#define CPUID_MTRR (1 << 12)
+#define CPUID_PGE (1 << 13)
+#define CPUID_MCA (1 << 14)
+#define CPUID_CMOV (1 << 15)
+#define CPUID_PAT (1 << 16)
+#define CPUID_PSE36 (1 << 17)
+#define CPUID_PN (1 << 18)
+#define CPUID_CLFLUSH (1 << 19)
+#define CPUID_DTS (1 << 21)
+#define CPUID_ACPI (1 << 22)
+#define CPUID_MMX (1 << 23)
+#define CPUID_FXSR (1 << 24)
+#define CPUID_SSE (1 << 25)
+#define CPUID_SSE2 (1 << 26)
+#define CPUID_SS (1 << 27)
+#define CPUID_HT (1 << 28)
+#define CPUID_TM (1 << 29)
+#define CPUID_IA64 (1 << 30)
+#define CPUID_PBE (1 << 31)
+
+#define CPUID_EXT_SSE3 (1 << 0)
+#define CPUID_EXT_DTES64 (1 << 2)
+#define CPUID_EXT_MONITOR (1 << 3)
+#define CPUID_EXT_DSCPL (1 << 4)
+#define CPUID_EXT_VMX (1 << 5)
+#define CPUID_EXT_SMX (1 << 6)
+#define CPUID_EXT_EST (1 << 7)
+#define CPUID_EXT_TM2 (1 << 8)
+#define CPUID_EXT_SSSE3 (1 << 9)
+#define CPUID_EXT_CID (1 << 10)
+#define CPUID_EXT_CX16 (1 << 13)
+#define CPUID_EXT_XTPR (1 << 14)
+#define CPUID_EXT_PDCM (1 << 15)
+#define CPUID_EXT_DCA (1 << 18)
+#define CPUID_EXT_SSE41 (1 << 19)
+#define CPUID_EXT_SSE42 (1 << 20)
+#define CPUID_EXT_X2APIC (1 << 21)
+#define CPUID_EXT_MOVBE (1 << 22)
+#define CPUID_EXT_POPCNT (1 << 23)
+#define CPUID_EXT_XSAVE (1 << 26)
+#define CPUID_EXT_OSXSAVE (1 << 27)
+#define CPUID_EXT_HYPERVISOR (1 << 31)
+
+#define CPUID_EXT2_SYSCALL (1 << 11)
+#define CPUID_EXT2_MP (1 << 19)
+#define CPUID_EXT2_NX (1 << 20)
+#define CPUID_EXT2_MMXEXT (1 << 22)
+#define CPUID_EXT2_FFXSR (1 << 25)
+#define CPUID_EXT2_PDPE1GB (1 << 26)
+#define CPUID_EXT2_RDTSCP (1 << 27)
+#define CPUID_EXT2_LM (1 << 29)
+#define CPUID_EXT2_3DNOWEXT (1 << 30)
+#define CPUID_EXT2_3DNOW (1 << 31)
+
+#define CPUID_EXT3_LAHF_LM (1 << 0)
+#define CPUID_EXT3_CMP_LEG (1 << 1)
+#define CPUID_EXT3_SVM (1 << 2)
+#define CPUID_EXT3_EXTAPIC (1 << 3)
+#define CPUID_EXT3_CR8LEG (1 << 4)
+#define CPUID_EXT3_ABM (1 << 5)
+#define CPUID_EXT3_SSE4A (1 << 6)
+#define CPUID_EXT3_MISALIGNSSE (1 << 7)
+#define CPUID_EXT3_3DNOWPREFETCH (1 << 8)
+#define CPUID_EXT3_OSVW (1 << 9)
+#define CPUID_EXT3_IBS (1 << 10)
+#define CPUID_EXT3_SKINIT (1 << 12)
+
+#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
+#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
+#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
+
+#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */
+#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */
+#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */
+
+#define CPUID_MWAIT_IBE (1 << 1) /* Interrupts can exit capability */
+#define CPUID_MWAIT_EMX (1 << 0) /* enumeration supported */
+
+#define EXCP00_DIVZ 0
+#define EXCP01_DB 1
+#define EXCP02_NMI 2
+#define EXCP03_INT3 3
+#define EXCP04_INTO 4
+#define EXCP05_BOUND 5
+#define EXCP06_ILLOP 6
+#define EXCP07_PREX 7
+#define EXCP08_DBLE 8
+#define EXCP09_XERR 9
+#define EXCP0A_TSS 10
+#define EXCP0B_NOSEG 11
+#define EXCP0C_STACK 12
+#define EXCP0D_GPF 13
+#define EXCP0E_PAGE 14
+#define EXCP10_COPR 16
+#define EXCP11_ALGN 17
+#define EXCP12_MCHK 18
+
+#define EXCP_SYSCALL 0x100 /* only happens in user only emulation
+ for syscall instruction */
+
+enum {
+ CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
+ CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */
+
+ CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
+ CC_OP_MULW,
+ CC_OP_MULL,
+ CC_OP_MULQ,
+
+ CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_ADDW,
+ CC_OP_ADDL,
+ CC_OP_ADDQ,
+
+ CC_OP_ADCB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_ADCW,
+ CC_OP_ADCL,
+ CC_OP_ADCQ,
+
+ CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_SUBW,
+ CC_OP_SUBL,
+ CC_OP_SUBQ,
+
+ CC_OP_SBBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+ CC_OP_SBBW,
+ CC_OP_SBBL,
+ CC_OP_SBBQ,
+
+ CC_OP_LOGICB, /* modify all flags, CC_DST = res */
+ CC_OP_LOGICW,
+ CC_OP_LOGICL,
+ CC_OP_LOGICQ,
+
+ CC_OP_INCB, /* modify all flags except, CC_DST = res, CC_SRC = C */
+ CC_OP_INCW,
+ CC_OP_INCL,
+ CC_OP_INCQ,
+
+ CC_OP_DECB, /* modify all flags except, CC_DST = res, CC_SRC = C */
+ CC_OP_DECW,
+ CC_OP_DECL,
+ CC_OP_DECQ,
+
+ CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.msb = C */
+ CC_OP_SHLW,
+ CC_OP_SHLL,
+ CC_OP_SHLQ,
+
+ CC_OP_SARB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */
+ CC_OP_SARW,
+ CC_OP_SARL,
+ CC_OP_SARQ,
+
+ CC_OP_NB,
+};
+
+#ifdef FLOATX80
+#define USE_X86LDOUBLE
+#endif
+
+#ifdef USE_X86LDOUBLE
+typedef floatx80 CPU86_LDouble;
+#else
+typedef float64 CPU86_LDouble;
+#endif
+
+typedef struct SegmentCache {
+ uint32_t selector;
+#ifdef VBOX
+ /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */
+ uint16_t newselector;
+ uint16_t fVBoxFlags;
+#endif
+ target_ulong base;
+ uint32_t limit;
+ uint32_t flags;
+} SegmentCache;
+
+typedef union {
+ uint8_t _b[16];
+ uint16_t _w[8];
+ uint32_t _l[4];
+ uint64_t _q[2];
+ float32 _s[4];
+ float64 _d[2];
+} XMMReg;
+
+typedef union {
+ uint8_t _b[8];
+ uint16_t _w[4];
+ uint32_t _l[2];
+ float32 _s[2];
+ uint64_t q;
+} MMXReg;
+
+#ifdef HOST_WORDS_BIGENDIAN
+#define XMM_B(n) _b[15 - (n)]
+#define XMM_W(n) _w[7 - (n)]
+#define XMM_L(n) _l[3 - (n)]
+#define XMM_S(n) _s[3 - (n)]
+#define XMM_Q(n) _q[1 - (n)]
+#define XMM_D(n) _d[1 - (n)]
+
+#define MMX_B(n) _b[7 - (n)]
+#define MMX_W(n) _w[3 - (n)]
+#define MMX_L(n) _l[1 - (n)]
+#define MMX_S(n) _s[1 - (n)]
+#else
+#define XMM_B(n) _b[n]
+#define XMM_W(n) _w[n]
+#define XMM_L(n) _l[n]
+#define XMM_S(n) _s[n]
+#define XMM_Q(n) _q[n]
+#define XMM_D(n) _d[n]
+
+#define MMX_B(n) _b[n]
+#define MMX_W(n) _w[n]
+#define MMX_L(n) _l[n]
+#define MMX_S(n) _s[n]
+#endif
+#define MMX_Q(n) q
+
+typedef union {
+#ifdef USE_X86LDOUBLE
+ CPU86_LDouble d __attribute__((aligned(16)));
+#else
+ CPU86_LDouble d;
+#endif
+ MMXReg mmx;
+} FPReg;
+
+typedef struct {
+ uint64_t base;
+ uint64_t mask;
+} MTRRVar;
+
+#define CPU_NB_REGS64 16
+#define CPU_NB_REGS32 8
+
+#ifdef TARGET_X86_64
+#define CPU_NB_REGS CPU_NB_REGS64
+#else
+#define CPU_NB_REGS CPU_NB_REGS32
+#endif
+
+#define NB_MMU_MODES 2
+
+typedef struct CPUX86State {
+ /* standard registers */
+ target_ulong regs[CPU_NB_REGS];
+ target_ulong eip;
+ target_ulong eflags; /* eflags register. During CPU emulation, CC
+ flags and DF are set to zero because they are
+ stored elsewhere */
+
+ /* emulator internal eflags handling */
+ target_ulong cc_src;
+ target_ulong cc_dst;
+ uint32_t cc_op;
+ int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+ uint32_t hflags; /* TB flags, see HF_xxx constants. These flags
+ are known at translation time. */
+ uint32_t hflags2; /* various other flags, see HF2_xxx constants. */
+
+ /* segments */
+ SegmentCache segs[6]; /* selector values */
+ SegmentCache ldt;
+ SegmentCache tr;
+ SegmentCache gdt; /* only base and limit are used */
+ SegmentCache idt; /* only base and limit are used */
+
+ target_ulong cr[5]; /* NOTE: cr1 is unused */
+ int32_t a20_mask;
+
+ /* FPU state */
+ unsigned int fpstt; /* top of stack index */
+ uint16_t fpus;
+ uint16_t fpuc;
+ uint8_t fptags[8]; /* 0 = valid, 1 = empty */
+ FPReg fpregs[8];
+
+ /* emulator internal variables */
+ float_status fp_status;
+#ifdef VBOX
+ uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
+#endif
+ CPU86_LDouble ft0;
+#if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
+ uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
+#endif
+
+ float_status mmx_status; /* for 3DNow! float ops */
+ float_status sse_status;
+ uint32_t mxcsr;
+ XMMReg xmm_regs[CPU_NB_REGS];
+ XMMReg xmm_t0;
+ MMXReg mmx_t0;
+ target_ulong cc_tmp; /* temporary for rcr/rcl */
+
+ /* sysenter registers */
+ uint32_t sysenter_cs;
+#ifdef VBOX
+ uint32_t alignment0;
+#endif
+ target_ulong sysenter_esp;
+ target_ulong sysenter_eip;
+ uint64_t efer;
+ uint64_t star;
+
+ uint64_t vm_hsave;
+ uint64_t vm_vmcb;
+ uint64_t tsc_offset;
+ uint64_t intercept;
+ uint16_t intercept_cr_read;
+ uint16_t intercept_cr_write;
+ uint16_t intercept_dr_read;
+ uint16_t intercept_dr_write;
+ uint32_t intercept_exceptions;
+ uint8_t v_tpr;
+
+#ifdef TARGET_X86_64
+ target_ulong lstar;
+ target_ulong cstar;
+ target_ulong fmask;
+ target_ulong kernelgsbase;
+#endif
+ uint64_t system_time_msr;
+ uint64_t wall_clock_msr;
+
+ uint64_t tsc;
+
+ uint64_t pat;
+
+ /* exception/interrupt handling */
+ int error_code;
+ int exception_is_int;
+#ifdef VBOX
+# define EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ 0x42 /**< Special CPUX86State::exception_is_int value indicating hardware irq. (HACK ALERT) */
+#endif
+ target_ulong exception_next_eip;
+ target_ulong dr[8]; /* debug registers */
+ union {
+ CPUBreakpoint *cpu_breakpoint[4];
+ CPUWatchpoint *cpu_watchpoint[4];
+ }; /* break/watchpoints for dr[0..3] */
+ uint32_t smbase;
+ int old_exception; /* exception in flight */
+
+ CPU_COMMON
+
+#ifdef VBOX
+ /** cpu state flags. (see defines below) */
+ uint32_t state;
+ /** The VM handle. */
+ PVM pVM;
+ /** The VMCPU handle. */
+ PVMCPU pVCpu;
+ /** code buffer for instruction emulation */
+ void *pvCodeBuffer;
+ /** code buffer size */
+ uint32_t cbCodeBuffer;
+#endif /* VBOX */
+
+ /* processor features (e.g. for CPUID insn) */
+#ifndef VBOX /* remR3CpuId deals with these */
+ uint32_t cpuid_level;
+ uint32_t cpuid_vendor1;
+ uint32_t cpuid_vendor2;
+ uint32_t cpuid_vendor3;
+ uint32_t cpuid_version;
+#endif /* !VBOX */
+ uint32_t cpuid_features;
+ uint32_t cpuid_ext_features;
+#ifndef VBOX
+ uint32_t cpuid_xlevel;
+ uint32_t cpuid_model[12];
+#endif /* !VBOX */
+ uint32_t cpuid_ext2_features;
+ uint32_t cpuid_ext3_features;
+ uint32_t cpuid_apic_id;
+#ifndef VBOX
+ int cpuid_vendor_override;
+
+ /* MTRRs */
+ uint64_t mtrr_fixed[11];
+ uint64_t mtrr_deftype;
+ MTRRVar mtrr_var[8];
+
+ /* For KVM */
+ uint32_t mp_state;
+ int32_t exception_injected;
+ int32_t interrupt_injected;
+ uint8_t soft_interrupt;
+ uint8_t nmi_injected;
+ uint8_t nmi_pending;
+ uint8_t has_error_code;
+ uint32_t sipi_vector;
+
+ uint32_t cpuid_kvm_features;
+
+ /* in order to simplify APIC support, we leave this pointer to the
+ user */
+ struct DeviceState *apic_state;
+
+ uint64 mcg_cap;
+ uint64 mcg_status;
+ uint64 mcg_ctl;
+ uint64 mce_banks[MCE_BANKS_DEF*4];
+
+ uint64_t tsc_aux;
+
+ /* vmstate */
+ uint16_t fpus_vmstate;
+ uint16_t fptag_vmstate;
+ uint16_t fpregs_format_vmstate;
+
+ uint64_t xstate_bv;
+ XMMReg ymmh_regs[CPU_NB_REGS];
+
+ uint64_t xcr0;
+#else /* VBOX */
+
+ /** Alignment padding. */
+# if HC_ARCH_BITS == 64 \
+ || ( HC_ARCH_BITS == 32 \
+ && !defined(RT_OS_WINDOWS) \
+ && ( (!defined(VBOX_ENABLE_VBOXREM64) && !defined(RT_OS_SOLARIS) && !defined(RT_OS_FREEBSD)) \
+ || (defined(VBOX_ENABLE_VBOXREM64) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))) ) )
+ uint32_t alignment2[1];
+# endif
+
+ /** Profiling tb_flush. */
+ STAMPROFILE StatTbFlush;
+
+ /** Addends for HVA -> GPA translations. */
+ target_phys_addr_t phys_addends[NB_MMU_MODES][CPU_TLB_SIZE];
+#endif /* VBOX */
+} CPUX86State;
+
+#ifdef VBOX
+
+/* Version 1.6 structure; just for loading the old saved state */
+typedef struct SegmentCache_Ver16 {
+ uint32_t selector;
+ uint32_t base;
+ uint32_t limit;
+ uint32_t flags;
+ /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */
+ uint32_t newselector;
+} SegmentCache_Ver16;
+
+# define CPU_NB_REGS_VER16 8
+
+/* Version 1.6 structure; just for loading the old saved state */
+typedef struct CPUX86State_Ver16 {
+# if TARGET_LONG_BITS > HOST_LONG_BITS
+ /* temporaries if we cannot store them in host registers */
+ uint32_t t0, t1, t2;
+# endif
+
+ /* standard registers */
+ uint32_t regs[CPU_NB_REGS_VER16];
+ uint32_t eip;
+ uint32_t eflags; /* eflags register. During CPU emulation, CC
+ flags and DF are set to zero because they are
+ stored elsewhere */
+
+ /* emulator internal eflags handling */
+ uint32_t cc_src;
+ uint32_t cc_dst;
+ uint32_t cc_op;
+ int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+ uint32_t hflags; /* hidden flags, see HF_xxx constants */
+
+ /* segments */
+ SegmentCache_Ver16 segs[6]; /* selector values */
+ SegmentCache_Ver16 ldt;
+ SegmentCache_Ver16 tr;
+ SegmentCache_Ver16 gdt; /* only base and limit are used */
+ SegmentCache_Ver16 idt; /* only base and limit are used */
+
+ uint32_t cr[5]; /* NOTE: cr1 is unused */
+ uint32_t a20_mask;
+
+ /* FPU state */
+ unsigned int fpstt; /* top of stack index */
+ unsigned int fpus;
+ unsigned int fpuc;
+ uint8_t fptags[8]; /* 0 = valid, 1 = empty */
+ union {
+# ifdef USE_X86LDOUBLE
+ CPU86_LDouble d __attribute__((aligned(16)));
+# else
+ CPU86_LDouble d;
+# endif
+ MMXReg mmx;
+ } fpregs[8];
+
+ /* emulator internal variables */
+ float_status fp_status;
+# ifdef VBOX
+ uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
+# endif
+ CPU86_LDouble ft0;
+# if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
+ uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
+# endif
+ union {
+ float f;
+ double d;
+ int i32;
+ int64_t i64;
+ } fp_convert;
+
+ float_status sse_status;
+ uint32_t mxcsr;
+ XMMReg xmm_regs[CPU_NB_REGS_VER16];
+ XMMReg xmm_t0;
+ MMXReg mmx_t0;
+
+ /* sysenter registers */
+ uint32_t sysenter_cs;
+ uint32_t sysenter_esp;
+ uint32_t sysenter_eip;
+# ifdef VBOX
+ uint32_t alignment0;
+# endif
+ uint64_t efer;
+ uint64_t star;
+
+ uint64_t pat;
+
+ /* temporary data for USE_CODE_COPY mode */
+# ifdef USE_CODE_COPY
+ uint32_t tmp0;
+ uint32_t saved_esp;
+ int native_fp_regs; /* if true, the FPU state is in the native CPU regs */
+# endif
+
+ /* exception/interrupt handling */
+ jmp_buf jmp_env;
+} CPUX86State_Ver16;
+
+/** CPUX86State state flags
+ * @{ */
+# define CPU_RAW_RING0 0x0002 /* Set after first time RawR0 is executed, never cleared. */
+# define CPU_EMULATE_SINGLE_INSTR 0x0040 /* Execute a single instruction in emulation mode */
+# define CPU_EMULATE_SINGLE_STEP 0x0080 /* go into single step mode */
+# define CPU_RAW_HM 0x0100 /* Set after first time HWACC is executed, never cleared. */
+/** @} */
+#endif /* !VBOX */
+
+#ifdef VBOX
+CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model);
+#else /* !VBOX */
+CPUX86State *cpu_x86_init(const char *cpu_model);
+#endif /* !VBOX */
+int cpu_x86_exec(CPUX86State *s);
+void cpu_x86_close(CPUX86State *s);
+void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ const char *optarg);
+void x86_cpudef_setup(void);
+
+int cpu_get_pic_interrupt(CPUX86State *s);
+/* MSDOS compatibility mode FPU exception support */
+void cpu_set_ferr(CPUX86State *s);
+
+/* this function must always be used to load data in the segment
+ cache: it synchronizes the hflags with the segment cache values */
+#ifndef VBOX
+static inline void cpu_x86_load_seg_cache(CPUX86State *env,
+ int seg_reg, unsigned int selector,
+ target_ulong base,
+ unsigned int limit,
+ unsigned int flags)
+#else
+static inline void cpu_x86_load_seg_cache_with_clean_flags(CPUX86State *env,
+ int seg_reg, unsigned int selector,
+ target_ulong base,
+ unsigned int limit,
+ unsigned int flags)
+#endif
+{
+ SegmentCache *sc;
+ unsigned int new_hflags;
+
+ sc = &env->segs[seg_reg];
+ sc->selector = selector;
+ sc->base = base;
+ sc->limit = limit;
+ sc->flags = flags;
+#ifdef VBOX
+ sc->newselector = 0;
+ sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+#endif
+
+ /* update the hidden flags */
+ {
+ if (seg_reg == R_CS) {
+#ifdef TARGET_X86_64
+ if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
+ /* long mode */
+ env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+ env->hflags &= ~(HF_ADDSEG_MASK);
+ } else
+#endif
+ {
+ /* legacy / compatibility case */
+ new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
+ >> (DESC_B_SHIFT - HF_CS32_SHIFT);
+ env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
+ new_hflags;
+ }
+ }
+ new_hflags = (env->segs[R_SS].flags & DESC_B_MASK)
+ >> (DESC_B_SHIFT - HF_SS32_SHIFT);
+ if (env->hflags & HF_CS64_MASK) {
+ /* zero base assumed for DS, ES and SS in long mode */
+ } else if (!(env->cr[0] & CR0_PE_MASK) ||
+ (env->eflags & VM_MASK) ||
+ !(env->hflags & HF_CS32_MASK)) {
+ /* XXX: try to avoid this test. The problem comes from the
+ fact that is real mode or vm86 mode we only modify the
+ 'base' and 'selector' fields of the segment cache to go
+ faster. A solution may be to force addseg to one in
+ translate-i386.c. */
+ new_hflags |= HF_ADDSEG_MASK;
+ } else {
+ new_hflags |= ((env->segs[R_DS].base |
+ env->segs[R_ES].base |
+ env->segs[R_SS].base) != 0) <<
+ HF_ADDSEG_SHIFT;
+ }
+ env->hflags = (env->hflags &
+ ~(HF_SS32_MASK | HF_ADDSEG_MASK)) | new_hflags;
+ }
+}
+
+#ifdef VBOX
+/* Raw input, adjust the flags adding the stupid intel flag when applicable. */
+static inline void cpu_x86_load_seg_cache(CPUX86State *env,
+ int seg_reg, unsigned int selector,
+ target_ulong base,
+ unsigned int limit,
+ unsigned int flags)
+{
+ flags &= DESC_RAW_FLAG_BITS;
+ if (flags & DESC_P_MASK)
+ flags |= DESC_A_MASK; /* Make sure the A bit is set to avoid trouble. */
+ else if (selector < 4U)
+ flags |= DESC_INTEL_UNUSABLE;
+ cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, base, limit, flags);
+}
+#endif
+
+static inline void cpu_x86_load_seg_cache_sipi(CPUX86State *env,
+ int sipi_vector)
+{
+ env->eip = 0;
+ cpu_x86_load_seg_cache(env, R_CS, sipi_vector << 8,
+ sipi_vector << 12,
+ env->segs[R_CS].limit,
+ env->segs[R_CS].flags);
+ env->halted = 0;
+}
+
+int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
+ target_ulong *base, unsigned int *limit,
+ unsigned int *flags);
+
+/* wrapper, just in case memory mappings must be changed */
+static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl)
+{
+#if HF_CPL_MASK == 3
+ s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl;
+#else
+#error HF_CPL_MASK is hardcoded
+#endif
+}
+
+/* op_helper.c */
+/* used for debug or cpu save/restore */
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f);
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper);
+
+/* cpu-exec.c */
+/* the following helpers are only usable in user mode simulation as
+ they can trigger unexpected exceptions */
+void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+
+/* you can call this signal handler from your SIGBUS and SIGSEGV
+ signal handlers to inform the virtual CPU of exceptions. non zero
+ is returned if the signal was handled by the virtual CPU. */
+int cpu_x86_signal_handler(int host_signum, void *pinfo,
+ void *puc);
+
+/* cpuid.c */
+void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx);
+int cpu_x86_register (CPUX86State *env, const char *cpu_model);
+void cpu_clear_apic_feature(CPUX86State *env);
+
+/* helper.c */
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+ int is_write, int mmu_idx, int is_softmmu);
+#define cpu_handle_mmu_fault cpu_x86_handle_mmu_fault
+void cpu_x86_set_a20(CPUX86State *env, int a20_state);
+
+static inline int hw_breakpoint_enabled(unsigned long dr7, int index)
+{
+ return (dr7 >> (index * 2)) & 3;
+}
+
+static inline int hw_breakpoint_type(unsigned long dr7, int index)
+{
+ return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
+}
+
+static inline int hw_breakpoint_len(unsigned long dr7, int index)
+{
+ int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
+ return (len == 2) ? 8 : len + 1;
+}
+
+void hw_breakpoint_insert(CPUX86State *env, int index);
+void hw_breakpoint_remove(CPUX86State *env, int index);
+int check_hw_breakpoints(CPUX86State *env, int force_dr6_update);
+
+/* will be suppressed */
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3);
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);
+
+/* hw/pc.c */
+void cpu_smm_update(CPUX86State *env);
+uint64_t cpu_get_tsc(CPUX86State *env);
+
+/* used to debug */
+#define X86_DUMP_FPU 0x0001 /* dump FPU state too */
+#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */
+
+#ifdef VBOX
+int cpu_rdmsr(CPUX86State *env, uint32_t idMsr, uint64_t *puValue);
+int cpu_wrmsr(CPUX86State *env, uint32_t idMsr, uint64_t uValue);
+void cpu_trap_raw(CPUX86State *env1);
+
+/* in helper.c */
+uint8_t read_byte(CPUX86State *env1, target_ulong addr);
+uint16_t read_word(CPUX86State *env1, target_ulong addr);
+void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val);
+uint32_t read_dword(CPUX86State *env1, target_ulong addr);
+void write_word(CPUX86State *env1, target_ulong addr, uint16_t val);
+void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val);
+/* in helper.c */
+int emulate_single_instr(CPUX86State *env1);
+int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr, uint32_t *esp_ptr, int dpl);
+
+void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr);
+void save_raw_fp_state(CPUX86State *env, uint8_t *ptr);
+#endif /* VBOX */
+
+#define TARGET_PAGE_BITS 12
+
+#ifdef TARGET_X86_64
+#define TARGET_PHYS_ADDR_SPACE_BITS 52
+/* ??? This is really 48 bits, sign-extended, but the only thing
+ accessible to userland with bit 48 set is the VSYSCALL, and that
+ is handled via other mechanisms. */
+#define TARGET_VIRT_ADDR_SPACE_BITS 47
+#else
+#define TARGET_PHYS_ADDR_SPACE_BITS 36
+#define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
+
+#define cpu_init cpu_x86_init
+#define cpu_exec cpu_x86_exec
+#define cpu_gen_code cpu_x86_gen_code
+#define cpu_signal_handler cpu_x86_signal_handler
+#define cpu_list_id x86_cpu_list
+#define cpudef_setup x86_cpudef_setup
+
+#define CPU_SAVE_VERSION 12
+
+/* MMU modes definitions */
+#define MMU_MODE0_SUFFIX _kernel
+#define MMU_MODE1_SUFFIX _user
+#define MMU_USER_IDX 1
+static inline int cpu_mmu_index (CPUState *env)
+{
+ return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
+}
+
+/* translate.c */
+void optimize_flags_init(void);
+
+typedef struct CCTable {
+ int (*compute_all)(void); /* return all the flags */
+ int (*compute_c)(void); /* return the C flag */
+} CCTable;
+
+#if defined(CONFIG_USER_ONLY)
+static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
+{
+ if (newsp)
+ env->regs[R_ESP] = newsp;
+ env->regs[R_EAX] = 0;
+}
+#endif
+
+#include "cpu-all.h"
+#include "svm.h"
+
+#ifndef VBOX
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/apic.h"
+#endif
+#else /* VBOX */
+extern void cpu_set_apic_tpr(CPUX86State *env, uint8_t val);
+extern uint8_t cpu_get_apic_tpr(CPUX86State *env);
+extern uint64_t cpu_get_apic_base(CPUX86State *env);
+#endif /* VBOX */
+
+static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
+ target_ulong *cs_base, int *flags)
+{
+ *cs_base = env->segs[R_CS].base;
+ if (env->hflags & HF_CS64_MASK)
+ *pc = *cs_base + env->eip;
+ else
+ *pc = (uint32_t)(*cs_base + env->eip);
+ *flags = env->hflags |
+ (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
+}
+
+#ifndef VBOX
+void apic_init_reset(CPUState *env);
+void apic_sipi(CPUState *env);
+void do_cpu_init(CPUState *env);
+void do_cpu_sipi(CPUState *env);
+#endif /* !VBOX */
+#endif /* CPU_I386_H */
diff --git a/src/recompiler/target-i386/exec.h b/src/recompiler/target-i386/exec.h
new file mode 100644
index 00000000..355599fa
--- /dev/null
+++ b/src/recompiler/target-i386/exec.h
@@ -0,0 +1,370 @@
+/*
+ * i386 execution defines
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include "config.h"
+#include "dyngen-exec.h"
+
+/* XXX: factorize this mess */
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+#include "cpu-defs.h"
+
+register struct CPUX86State *env asm(AREG0);
+
+#include "qemu-common.h"
+#include "qemu-log.h"
+
+#undef EAX
+#define EAX (env->regs[R_EAX])
+#undef ECX
+#define ECX (env->regs[R_ECX])
+#undef EDX
+#define EDX (env->regs[R_EDX])
+#undef EBX
+#define EBX (env->regs[R_EBX])
+#undef ESP
+#define ESP (env->regs[R_ESP])
+#undef EBP
+#define EBP (env->regs[R_EBP])
+#undef ESI
+#define ESI (env->regs[R_ESI])
+#undef EDI
+#define EDI (env->regs[R_EDI])
+#undef EIP
+#define EIP (env->eip)
+#define DF (env->df)
+
+#define CC_SRC (env->cc_src)
+#define CC_DST (env->cc_dst)
+#define CC_OP (env->cc_op)
+
+/* float macros */
+#define FT0 (env->ft0)
+#define ST0 (env->fpregs[env->fpstt].d)
+#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1 ST(1)
+
+#include "cpu.h"
+#include "exec-all.h"
+
+/* op_helper.c */
+void do_interrupt(int intno, int is_int, int error_code,
+ target_ulong next_eip, int is_hw);
+void do_interrupt_user(int intno, int is_int, int error_code,
+ target_ulong next_eip);
+void QEMU_NORETURN raise_exception_err(int exception_index, int error_code);
+void QEMU_NORETURN raise_exception(int exception_index);
+void QEMU_NORETURN raise_exception_env(int exception_index, CPUState *nenv);
+void do_smm_enter(void);
+
+/* n must be a constant to be efficient */
+static inline target_long lshift(target_long x, int n)
+{
+ if (n >= 0)
+ return x << n;
+ else
+ return x >> (-n);
+}
+
+#include "helper.h"
+
+static inline void svm_check_intercept(uint32_t type)
+{
+ helper_svm_check_intercept_param(type, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#include "softmmu_exec.h"
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#ifdef USE_X86LDOUBLE
+/* use long double functions */
+#define floatx_to_int32 floatx80_to_int32
+#define floatx_to_int64 floatx80_to_int64
+#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
+#define int32_to_floatx int32_to_floatx80
+#define int64_to_floatx int64_to_floatx80
+#define float32_to_floatx float32_to_floatx80
+#define float64_to_floatx float64_to_floatx80
+#define floatx_to_float32 floatx80_to_float32
+#define floatx_to_float64 floatx80_to_float64
+#define floatx_abs floatx80_abs
+#define floatx_chs floatx80_chs
+#define floatx_round_to_int floatx80_round_to_int
+#define floatx_compare floatx80_compare
+#define floatx_compare_quiet floatx80_compare_quiet
+#else
+#define floatx_to_int32 float64_to_int32
+#define floatx_to_int64 float64_to_int64
+#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
+#define int32_to_floatx int32_to_float64
+#define int64_to_floatx int64_to_float64
+#define float32_to_floatx float32_to_float64
+#define float64_to_floatx(x, e) (x)
+#define floatx_to_float32 float64_to_float32
+#define floatx_to_float64(x, e) (x)
+#define floatx_abs float64_abs
+#define floatx_chs float64_chs
+#define floatx_round_to_int float64_round_to_int
+#define floatx_compare float64_compare
+#define floatx_compare_quiet float64_compare_quiet
+#endif
+
+#ifdef VBOX
+# ifdef IPRT_NO_CRT
+# undef sin
+# undef cos
+# undef sqrt
+# undef pow
+# undef log
+# undef tan
+# undef atan2
+# undef floor
+# undef ceil
+# undef ldexp
+# define sin sinl
+# define cos cosl
+# define sqrt sqrtl
+# define pow powl
+# define log logl
+# define tan tanl
+# define atan2 atan2l
+# define floor floorl
+# define ceil ceill
+# define ldexp ldexpl
+# endif
+#endif
+
+#define RC_MASK 0xc00
+#define RC_NEAR 0x000
+#define RC_DOWN 0x400
+#define RC_UP 0x800
+#define RC_CHOP 0xc00
+
+#define MAXTAN 9223372036854775808.0
+
+#ifdef USE_X86LDOUBLE
+
+/* only for x86 */
+typedef union {
+ long double d;
+ struct {
+ unsigned long long lower;
+ unsigned short upper;
+ } l;
+} CPU86_LDoubleU;
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp) (fp.l.upper & 0x7fff)
+#define SIGND(fp) ((fp.l.upper) & 0x8000)
+#define MANTD(fp) (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+#else
+
+/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
+typedef union {
+ double d;
+#if !defined(HOST_WORDS_BIGENDIAN) && !defined(__arm__)
+ struct {
+ uint32_t lower;
+ int32_t upper;
+ } l;
+#else
+ struct {
+ int32_t upper;
+ uint32_t lower;
+ } l;
+#endif
+#ifndef __arm__
+ int64_t ll;
+#endif
+} CPU86_LDoubleU;
+
+/* the following deal with IEEE double-precision numbers */
+#define MAXEXPD 0x7ff
+#define EXPBIAS 1023
+#define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF)
+#define SIGND(fp) ((fp.l.upper) & 0x80000000)
+#ifdef __arm__
+#define MANTD(fp) (fp.l.lower | ((uint64_t)(fp.l.upper & ((1 << 20) - 1)) << 32))
+#else
+#define MANTD(fp) (fp.ll & ((1LL << 52) - 1))
+#endif
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7ff << 20)) | (EXPBIAS << 20)
+#endif
+
+static inline void fpush(void)
+{
+ env->fpstt = (env->fpstt - 1) & 7;
+ env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(void)
+{
+ env->fptags[env->fpstt] = 1; /* invvalidate stack entry */
+ env->fpstt = (env->fpstt + 1) & 7;
+}
+
+#ifndef USE_X86LDOUBLE
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+ int upper, e;
+ uint64_t ll;
+
+ /* mantissa */
+ upper = lduw(ptr + 8);
+ /* XXX: handle overflow ? */
+ e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+ e |= (upper >> 4) & 0x800; /* sign */
+ ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+ temp.l.upper = (e << 20) | (ll >> 32);
+ temp.l.lower = ll;
+#else
+ temp.ll = ll | ((uint64_t)e << 52);
+#endif
+ return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+ int e;
+
+ temp.d = f;
+ /* mantissa */
+ stq(ptr, (MANTD(temp) << 11) | (1LL << 63));
+ /* exponent + sign */
+ e = EXPD(temp) - EXPBIAS + 16383;
+ e |= SIGND(temp) >> 16;
+ stw(ptr + 8, e);
+}
+#else
+
+/* we use memory access macros */
+
+static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+
+ temp.l.lower = ldq(ptr);
+ temp.l.upper = lduw(ptr + 8);
+ return temp.d;
+}
+
+static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+{
+ CPU86_LDoubleU temp;
+
+ temp.d = f;
+ stq(ptr, temp.l.lower);
+ stw(ptr + 8, temp.l.upper);
+}
+
+#endif /* USE_X86LDOUBLE */
+
+#define FPUS_IE (1 << 0)
+#define FPUS_DE (1 << 1)
+#define FPUS_ZE (1 << 2)
+#define FPUS_OE (1 << 3)
+#define FPUS_UE (1 << 4)
+#define FPUS_PE (1 << 5)
+#define FPUS_SF (1 << 6)
+#define FPUS_SE (1 << 7)
+#define FPUS_B (1 << 15)
+
+#define FPUC_EM 0x3f
+
+static inline uint32_t compute_eflags(void)
+{
+ return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+}
+
+/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */
+static inline void load_eflags(int eflags, int update_mask)
+{
+ CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+ DF = 1 - (2 * ((eflags >> 10) & 1));
+ env->eflags = (env->eflags & ~update_mask) |
+ (eflags & update_mask) | 0x2;
+}
+
+static inline int cpu_has_work(CPUState *env)
+{
+ int work;
+
+ work = (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+ (env->eflags & IF_MASK);
+ work |= env->interrupt_request & CPU_INTERRUPT_NMI;
+ work |= env->interrupt_request & CPU_INTERRUPT_INIT;
+ work |= env->interrupt_request & CPU_INTERRUPT_SIPI;
+
+ return work;
+}
+
+static inline int cpu_halted(CPUState *env) {
+ /* handle exit of HALTED state */
+ if (!env->halted)
+ return 0;
+ /* disable halt condition */
+ if (cpu_has_work(env)) {
+ env->halted = 0;
+ return 0;
+ }
+ return EXCP_HALTED;
+}
+
+/* load efer and update the corresponding hflags. XXX: do consistency
+ checks with cpuid bits ? */
+static inline void cpu_load_efer(CPUState *env, uint64_t val)
+{
+ env->efer = val;
+ env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK);
+ if (env->efer & MSR_EFER_LMA)
+ env->hflags |= HF_LMA_MASK;
+ if (env->efer & MSR_EFER_SVME)
+ env->hflags |= HF_SVME_MASK;
+}
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+ env->eip = tb->pc - tb->cs_base;
+}
+
diff --git a/src/recompiler/target-i386/helper.c b/src/recompiler/target-i386/helper.c
new file mode 100644
index 00000000..a6f37ff6
--- /dev/null
+++ b/src/recompiler/target-i386/helper.c
@@ -0,0 +1,1227 @@
+/*
+ * i386 helpers (without register variable usage)
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifndef VBOX
+#include <inttypes.h>
+#include <signal.h>
+#endif /* !VBOX */
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "qemu-common.h"
+#include "kvm.h"
+
+//#define DEBUG_MMU
+
+/* NOTE: must be called outside the CPU execute loop */
+void cpu_reset(CPUX86State *env)
+{
+ int i;
+
+ if (qemu_loglevel_mask(CPU_LOG_RESET)) {
+ qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
+ log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+ }
+
+ memset(env, 0, offsetof(CPUX86State, breakpoints));
+
+ tlb_flush(env, 1);
+
+ env->old_exception = -1;
+
+ /* init to reset state */
+
+#ifdef CONFIG_SOFTMMU
+ env->hflags |= HF_SOFTMMU_MASK;
+#endif
+ env->hflags2 |= HF2_GIF_MASK;
+
+ cpu_x86_update_cr0(env, 0x60000010);
+ env->a20_mask = ~0x0;
+ env->smbase = 0x30000;
+
+ env->idt.limit = 0xffff;
+ env->gdt.limit = 0xffff;
+ env->ldt.limit = 0xffff;
+ env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT);
+ env->tr.limit = 0xffff;
+ env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT);
+
+ cpu_x86_load_seg_cache(env, R_CS, 0xf000, 0xffff0000, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
+ DESC_R_MASK | DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffff,
+ DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+ DESC_A_MASK);
+
+ env->eip = 0xfff0;
+#ifndef VBOX /* We'll get the right value from CPUM. */
+ env->regs[R_EDX] = env->cpuid_version;
+#endif
+
+ env->eflags = 0x2;
+
+ /* FPU init */
+ for(i = 0;i < 8; i++)
+ env->fptags[i] = 1;
+ env->fpuc = 0x37f;
+
+ env->mxcsr = 0x1f80;
+
+ memset(env->dr, 0, sizeof(env->dr));
+ env->dr[6] = DR6_FIXED_1;
+ env->dr[7] = DR7_FIXED_1;
+ cpu_breakpoint_remove_all(env, BP_CPU);
+ cpu_watchpoint_remove_all(env, BP_CPU);
+
+#ifndef VBOX
+ env->mcg_status = 0;
+#endif
+}
+
+void cpu_x86_close(CPUX86State *env)
+{
+#ifndef VBOX
+ qemu_free(env);
+#endif
+}
+
+/***********************************************************/
+/* x86 debug */
+
+static const char *cc_op_str[] = {
+ "DYNAMIC",
+ "EFLAGS",
+
+ "MULB",
+ "MULW",
+ "MULL",
+ "MULQ",
+
+ "ADDB",
+ "ADDW",
+ "ADDL",
+ "ADDQ",
+
+ "ADCB",
+ "ADCW",
+ "ADCL",
+ "ADCQ",
+
+ "SUBB",
+ "SUBW",
+ "SUBL",
+ "SUBQ",
+
+ "SBBB",
+ "SBBW",
+ "SBBL",
+ "SBBQ",
+
+ "LOGICB",
+ "LOGICW",
+ "LOGICL",
+ "LOGICQ",
+
+ "INCB",
+ "INCW",
+ "INCL",
+ "INCQ",
+
+ "DECB",
+ "DECW",
+ "DECL",
+ "DECQ",
+
+ "SHLB",
+ "SHLW",
+ "SHLL",
+ "SHLQ",
+
+ "SARB",
+ "SARW",
+ "SARL",
+ "SARQ",
+};
+
+static void
+cpu_x86_dump_seg_cache(CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ const char *name, struct SegmentCache *sc)
+{
+#ifdef VBOX
+# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__)
+#endif
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK) {
+ cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
+ sc->selector, sc->base, sc->limit, sc->flags);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector,
+ (uint32_t)sc->base, sc->limit, sc->flags);
+ }
+
+ if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK))
+ goto done;
+
+ cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT);
+ if (sc->flags & DESC_S_MASK) {
+ if (sc->flags & DESC_CS_MASK) {
+ cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" :
+ ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16"));
+ cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-',
+ (sc->flags & DESC_R_MASK) ? 'R' : '-');
+ } else {
+ cpu_fprintf(f, (sc->flags & DESC_B_MASK) ? "DS " : "DS16");
+ cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-',
+ (sc->flags & DESC_W_MASK) ? 'W' : '-');
+ }
+ cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-');
+ } else {
+ static const char *sys_type_name[2][16] = {
+ { /* 32 bit mode */
+ "Reserved", "TSS16-avl", "LDT", "TSS16-busy",
+ "CallGate16", "TaskGate", "IntGate16", "TrapGate16",
+ "Reserved", "TSS32-avl", "Reserved", "TSS32-busy",
+ "CallGate32", "Reserved", "IntGate32", "TrapGate32"
+ },
+ { /* 64 bit mode */
+ "<hiword>", "Reserved", "LDT", "Reserved", "Reserved",
+ "Reserved", "Reserved", "Reserved", "Reserved",
+ "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64",
+ "Reserved", "IntGate64", "TrapGate64"
+ }
+ };
+ cpu_fprintf(f, "%s",
+ sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0]
+ [(sc->flags & DESC_TYPE_MASK)
+ >> DESC_TYPE_SHIFT]);
+ }
+done:
+ cpu_fprintf(f, "\n");
+#ifdef VBOX
+# undef cpu_fprintf
+#endif
+}
+
+void cpu_dump_state(CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ int flags)
+{
+ int eflags, i, nb;
+ char cc_op_name[32];
+ static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
+
+#ifdef VBOX
+# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__)
+#endif
+ cpu_synchronize_state(env);
+
+ eflags = env->eflags;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK) {
+ cpu_fprintf(f,
+ "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n"
+ "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n"
+ "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n"
+ "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n"
+ "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+ env->regs[R_EAX],
+ env->regs[R_EBX],
+ env->regs[R_ECX],
+ env->regs[R_EDX],
+ env->regs[R_ESI],
+ env->regs[R_EDI],
+ env->regs[R_EBP],
+ env->regs[R_ESP],
+ env->regs[8],
+ env->regs[9],
+ env->regs[10],
+ env->regs[11],
+ env->regs[12],
+ env->regs[13],
+ env->regs[14],
+ env->regs[15],
+ env->eip, eflags,
+ eflags & DF_MASK ? 'D' : '-',
+ eflags & CC_O ? 'O' : '-',
+ eflags & CC_S ? 'S' : '-',
+ eflags & CC_Z ? 'Z' : '-',
+ eflags & CC_A ? 'A' : '-',
+ eflags & CC_P ? 'P' : '-',
+ eflags & CC_C ? 'C' : '-',
+ env->hflags & HF_CPL_MASK,
+ (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+ (env->a20_mask >> 20) & 1,
+ (env->hflags >> HF_SMM_SHIFT) & 1,
+ env->halted);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n"
+ "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n"
+ "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
+ (uint32_t)env->regs[R_EAX],
+ (uint32_t)env->regs[R_EBX],
+ (uint32_t)env->regs[R_ECX],
+ (uint32_t)env->regs[R_EDX],
+ (uint32_t)env->regs[R_ESI],
+ (uint32_t)env->regs[R_EDI],
+ (uint32_t)env->regs[R_EBP],
+ (uint32_t)env->regs[R_ESP],
+ (uint32_t)env->eip, eflags,
+ eflags & DF_MASK ? 'D' : '-',
+ eflags & CC_O ? 'O' : '-',
+ eflags & CC_S ? 'S' : '-',
+ eflags & CC_Z ? 'Z' : '-',
+ eflags & CC_A ? 'A' : '-',
+ eflags & CC_P ? 'P' : '-',
+ eflags & CC_C ? 'C' : '-',
+ env->hflags & HF_CPL_MASK,
+ (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
+ (env->a20_mask >> 20) & 1,
+ (env->hflags >> HF_SMM_SHIFT) & 1,
+ env->halted);
+ }
+
+ for(i = 0; i < 6; i++) {
+ cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i],
+ &env->segs[i]);
+ }
+ cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt);
+ cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr);
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ cpu_fprintf(f, "GDT= %016" PRIx64 " %08x\n",
+ env->gdt.base, env->gdt.limit);
+ cpu_fprintf(f, "IDT= %016" PRIx64 " %08x\n",
+ env->idt.base, env->idt.limit);
+ cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n",
+ (uint32_t)env->cr[0],
+ env->cr[2],
+ env->cr[3],
+ (uint32_t)env->cr[4]);
+ for(i = 0; i < 4; i++)
+ cpu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]);
+ cpu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n",
+ env->dr[6], env->dr[7]);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "GDT= %08x %08x\n",
+ (uint32_t)env->gdt.base, env->gdt.limit);
+ cpu_fprintf(f, "IDT= %08x %08x\n",
+ (uint32_t)env->idt.base, env->idt.limit);
+ cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n",
+ (uint32_t)env->cr[0],
+ (uint32_t)env->cr[2],
+ (uint32_t)env->cr[3],
+ (uint32_t)env->cr[4]);
+ for(i = 0; i < 4; i++)
+ cpu_fprintf(f, "DR%d=%08x ", i, env->dr[i]);
+ cpu_fprintf(f, "\nDR6=%08x DR7=%08x\n", env->dr[6], env->dr[7]);
+ }
+ if (flags & X86_DUMP_CCOP) {
+ if ((unsigned)env->cc_op < CC_OP_NB)
+ snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
+ else
+ snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK) {
+ cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n",
+ env->cc_src, env->cc_dst,
+ cc_op_name);
+ } else
+#endif
+ {
+ cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n",
+ (uint32_t)env->cc_src, (uint32_t)env->cc_dst,
+ cc_op_name);
+ }
+ }
+ cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
+ if (flags & X86_DUMP_FPU) {
+ int fptag;
+ fptag = 0;
+ for(i = 0; i < 8; i++) {
+ fptag |= ((!env->fptags[i]) << i);
+ }
+ cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n",
+ env->fpuc,
+ (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11,
+ env->fpstt,
+ fptag,
+ env->mxcsr);
+ for(i=0;i<8;i++) {
+#if defined(USE_X86LDOUBLE)
+ union {
+ long double d;
+ struct {
+ uint64_t lower;
+ uint16_t upper;
+ } l;
+ } tmp;
+ tmp.d = env->fpregs[i].d;
+ cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x",
+ i, tmp.l.lower, tmp.l.upper);
+#else
+ cpu_fprintf(f, "FPR%d=%016" PRIx64,
+ i, env->fpregs[i].mmx.q);
+#endif
+ if ((i & 1) == 1)
+ cpu_fprintf(f, "\n");
+ else
+ cpu_fprintf(f, " ");
+ }
+ if (env->hflags & HF_CS64_MASK)
+ nb = 16;
+ else
+ nb = 8;
+ for(i=0;i<nb;i++) {
+ cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
+ i,
+ env->xmm_regs[i].XMM_L(3),
+ env->xmm_regs[i].XMM_L(2),
+ env->xmm_regs[i].XMM_L(1),
+ env->xmm_regs[i].XMM_L(0));
+ if ((i & 1) == 1)
+ cpu_fprintf(f, "\n");
+ else
+ cpu_fprintf(f, " ");
+ }
+ }
+#ifdef VBOX
+# undef cpu_fprintf
+#endif
+}
+
+/***********************************************************/
+/* x86 mmu */
+/* XXX: add PGE support */
+
+void cpu_x86_set_a20(CPUX86State *env, int a20_state)
+{
+ a20_state = (a20_state != 0);
+ if (a20_state != ((env->a20_mask >> 20) & 1)) {
+#if defined(DEBUG_MMU)
+ printf("A20 update: a20=%d\n", a20_state);
+#endif
+ /* if the cpu is currently executing code, we must unlink it and
+ all the potentially executing TB */
+ cpu_interrupt(env, CPU_INTERRUPT_EXITTB);
+
+ /* when a20 is changed, all the MMU mappings are invalid, so
+ we must flush everything */
+ tlb_flush(env, 1);
+ env->a20_mask = ~(1 << 20) | (a20_state << 20);
+ }
+}
+
+void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0)
+{
+ int pe_state;
+
+#if defined(DEBUG_MMU)
+ printf("CR0 update: CR0=0x%08x\n", new_cr0);
+#endif
+ if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) !=
+ (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) {
+ tlb_flush(env, 1);
+ }
+
+#ifdef TARGET_X86_64
+ if (!(env->cr[0] & CR0_PG_MASK) && (new_cr0 & CR0_PG_MASK) &&
+ (env->efer & MSR_EFER_LME)) {
+ /* enter in long mode */
+ /* XXX: generate an exception */
+ if (!(env->cr[4] & CR4_PAE_MASK))
+ return;
+ env->efer |= MSR_EFER_LMA;
+ env->hflags |= HF_LMA_MASK;
+ } else if ((env->cr[0] & CR0_PG_MASK) && !(new_cr0 & CR0_PG_MASK) &&
+ (env->efer & MSR_EFER_LMA)) {
+ /* exit long mode */
+ env->efer &= ~MSR_EFER_LMA;
+ env->hflags &= ~(HF_LMA_MASK | HF_CS64_MASK);
+ env->eip &= 0xffffffff;
+ }
+#endif
+ env->cr[0] = new_cr0 | CR0_ET_MASK;
+
+ /* update PE flag in hidden flags */
+ pe_state = (env->cr[0] & CR0_PE_MASK);
+ env->hflags = (env->hflags & ~HF_PE_MASK) | (pe_state << HF_PE_SHIFT);
+ /* ensure that ADDSEG is always set in real mode */
+ env->hflags |= ((pe_state ^ 1) << HF_ADDSEG_SHIFT);
+ /* update FPU flags */
+ env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
+ ((new_cr0 << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
+#ifdef VBOX
+ remR3ChangeCpuMode(env);
+#endif
+}
+
+/* XXX: in legacy PAE mode, generate a GPF if reserved bits are set in
+ the PDPT */
+void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
+{
+ env->cr[3] = new_cr3;
+ if (env->cr[0] & CR0_PG_MASK) {
+#if defined(DEBUG_MMU)
+ printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
+#endif
+ tlb_flush(env, 0);
+ }
+}
+
+void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
+{
+#if defined(DEBUG_MMU)
+ printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
+#endif
+ if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
+ (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
+ tlb_flush(env, 1);
+ }
+ /* SSE handling */
+ if (!(env->cpuid_features & CPUID_SSE))
+ new_cr4 &= ~CR4_OSFXSR_MASK;
+ if (new_cr4 & CR4_OSFXSR_MASK)
+ env->hflags |= HF_OSFXSR_MASK;
+ else
+ env->hflags &= ~HF_OSFXSR_MASK;
+
+ env->cr[4] = new_cr4;
+#ifdef VBOX
+ remR3ChangeCpuMode(env);
+#endif
+}
+
+#if defined(CONFIG_USER_ONLY)
+
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+ int is_write, int mmu_idx, int is_softmmu)
+{
+ /* user mode only emulation */
+ is_write &= 1;
+ env->cr[2] = addr;
+ env->error_code = (is_write << PG_ERROR_W_BIT);
+ env->error_code |= PG_ERROR_U_MASK;
+ env->exception_index = EXCP0E_PAGE;
+ return 1;
+}
+
+#else
+
+/* XXX: This value should match the one returned by CPUID
+ * and in exec.c */
+# if defined(TARGET_X86_64)
+# define PHYS_ADDR_MASK 0xfffffff000LL
+# else
+# define PHYS_ADDR_MASK 0xffffff000LL
+# endif
+
+/* return value:
+ -1 = cannot handle fault
+ 0 = nothing more to do
+ 1 = generate PF fault
+*/
+int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
+ int is_write1, int mmu_idx, int is_softmmu)
+{
+ uint64_t ptep, pte;
+ target_ulong pde_addr, pte_addr;
+ int error_code, is_dirty, prot, page_size, is_write, is_user;
+ target_phys_addr_t paddr;
+ uint32_t page_offset;
+ target_ulong vaddr, virt_addr;
+
+ is_user = mmu_idx == MMU_USER_IDX;
+#if defined(DEBUG_MMU)
+ printf("MMU fault: addr=" TARGET_FMT_lx " w=%d u=%d eip=" TARGET_FMT_lx "\n",
+ addr, is_write1, is_user, env->eip);
+#endif
+ is_write = is_write1 & 1;
+
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ pte = addr;
+ virt_addr = addr & TARGET_PAGE_MASK;
+ prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ page_size = 4096;
+ goto do_mapping;
+ }
+
+ if (env->cr[4] & CR4_PAE_MASK) {
+ uint64_t pde, pdpe;
+ target_ulong pdpe_addr;
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint64_t pml4e_addr, pml4e;
+ int32_t sext;
+
+ /* test virtual address sign extension */
+ sext = (int64_t)addr >> 47;
+ if (sext != 0 && sext != -1) {
+ env->error_code = 0;
+ env->exception_index = EXCP0D_GPF;
+ return 1;
+ }
+
+ pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pml4e = ldq_phys(pml4e_addr);
+ if (!(pml4e & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ if (!(pml4e & PG_ACCESSED_MASK)) {
+ pml4e |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pml4e_addr, pml4e);
+ }
+ ptep = pml4e ^ PG_NX_MASK;
+ pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ ptep &= pdpe ^ PG_NX_MASK;
+ if (!(pdpe & PG_ACCESSED_MASK)) {
+ pdpe |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pdpe_addr, pdpe);
+ }
+ } else
+#endif
+ {
+ /* XXX: load them when cr3 is loaded ? */
+ pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+ }
+
+ pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pde = ldq_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ ptep &= pde ^ PG_NX_MASK;
+ if (pde & PG_PSE_MASK) {
+ /* 2 MB page */
+ page_size = 2048 * 1024;
+ ptep ^= PG_NX_MASK;
+ if ((ptep & PG_NX_MASK) && is_write1 == 2)
+ goto do_fault_protect;
+ if (is_user) {
+ if (!(ptep & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+ if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+ pde |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pde |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+ /* align to page_size */
+ pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff);
+ virt_addr = addr & ~(page_size - 1);
+ } else {
+ /* 4 KB page */
+ if (!(pde & PG_ACCESSED_MASK)) {
+ pde |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+ pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pte = ldq_phys(pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) {
+ error_code = PG_ERROR_RSVD_MASK;
+ goto do_fault;
+ }
+ /* combine pde and pte nx, user and rw protections */
+ ptep &= pte ^ PG_NX_MASK;
+ ptep ^= PG_NX_MASK;
+ if ((ptep & PG_NX_MASK) && is_write1 == 2)
+ goto do_fault_protect;
+ if (is_user) {
+ if (!(ptep & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+ if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+ pte |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pte |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pte_addr, pte);
+ }
+ page_size = 4096;
+ virt_addr = addr & ~0xfff;
+ pte = pte & (PHYS_ADDR_MASK | 0xfff);
+ }
+ } else {
+ uint32_t pde;
+
+ /* page directory entry */
+ pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
+ env->a20_mask;
+ pde = ldl_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ /* if PSE bit is set, then we use a 4MB page */
+ if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+ page_size = 4096 * 1024;
+ if (is_user) {
+ if (!(pde & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(pde & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(pde & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pde & PG_DIRTY_MASK);
+ if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
+ pde |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pde |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+
+ pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+ ptep = pte;
+ virt_addr = addr & ~(page_size - 1);
+ } else {
+ if (!(pde & PG_ACCESSED_MASK)) {
+ pde |= PG_ACCESSED_MASK;
+ stl_phys_notdirty(pde_addr, pde);
+ }
+
+ /* page directory entry */
+ pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
+ env->a20_mask;
+ pte = ldl_phys(pte_addr);
+ if (!(pte & PG_PRESENT_MASK)) {
+ error_code = 0;
+ goto do_fault;
+ }
+ /* combine pde and pte user and rw protections */
+ ptep = pte & pde;
+ if (is_user) {
+ if (!(ptep & PG_USER_MASK))
+ goto do_fault_protect;
+ if (is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ } else {
+ if ((env->cr[0] & CR0_WP_MASK) &&
+ is_write && !(ptep & PG_RW_MASK))
+ goto do_fault_protect;
+ }
+ is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+ if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+ pte |= PG_ACCESSED_MASK;
+ if (is_dirty)
+ pte |= PG_DIRTY_MASK;
+ stl_phys_notdirty(pte_addr, pte);
+ }
+ page_size = 4096;
+ virt_addr = addr & ~0xfff;
+ }
+ }
+ /* the page can be put in the TLB */
+ prot = PAGE_READ;
+ if (!(ptep & PG_NX_MASK))
+ prot |= PAGE_EXEC;
+ if (pte & PG_DIRTY_MASK) {
+ /* only set write access if already dirty... otherwise wait
+ for dirty access */
+ if (is_user) {
+ if (ptep & PG_RW_MASK)
+ prot |= PAGE_WRITE;
+ } else {
+ if (!(env->cr[0] & CR0_WP_MASK) ||
+ (ptep & PG_RW_MASK))
+ prot |= PAGE_WRITE;
+ }
+ }
+ do_mapping:
+#ifndef VBOX
+ pte = pte & env->a20_mask;
+#endif
+
+ /* Even if 4MB pages, we map only one 4KB page in the cache to
+ avoid filling it too fast */
+ page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+ paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+#ifdef VBOX
+ paddr &= env->a20_mask;
+#endif
+ vaddr = virt_addr + page_offset;
+
+ tlb_set_page(env, vaddr, paddr, prot, mmu_idx, page_size);
+ return 0;
+ do_fault_protect:
+ error_code = PG_ERROR_P_MASK;
+ do_fault:
+ error_code |= (is_write << PG_ERROR_W_BIT);
+ if (is_user)
+ error_code |= PG_ERROR_U_MASK;
+ if (is_write1 == 2 &&
+ (env->efer & MSR_EFER_NXE) &&
+ (env->cr[4] & CR4_PAE_MASK))
+ error_code |= PG_ERROR_I_D_MASK;
+ if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
+ /* cr2 is not modified in case of exceptions */
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+ addr);
+ } else {
+ env->cr[2] = addr;
+ }
+ env->error_code = error_code;
+ env->exception_index = EXCP0E_PAGE;
+ return 1;
+}
+
+target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+ target_ulong pde_addr, pte_addr;
+ uint64_t pte;
+ target_phys_addr_t paddr;
+ uint32_t page_offset;
+ int page_size;
+
+ if (env->cr[4] & CR4_PAE_MASK) {
+ target_ulong pdpe_addr;
+ uint64_t pde, pdpe;
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint64_t pml4e_addr, pml4e;
+ int32_t sext;
+
+ /* test virtual address sign extension */
+ sext = (int64_t)addr >> 47;
+ if (sext != 0 && sext != -1)
+ return -1;
+
+ pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pml4e = ldq_phys(pml4e_addr);
+ if (!(pml4e & PG_PRESENT_MASK))
+ return -1;
+
+ pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK))
+ return -1;
+ } else
+#endif
+ {
+ pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
+ env->a20_mask;
+ pdpe = ldq_phys(pdpe_addr);
+ if (!(pdpe & PG_PRESENT_MASK))
+ return -1;
+ }
+
+ pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
+ env->a20_mask;
+ pde = ldq_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK)) {
+ return -1;
+ }
+ if (pde & PG_PSE_MASK) {
+ /* 2 MB page */
+ page_size = 2048 * 1024;
+ pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
+ } else {
+ /* 4 KB page */
+ pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
+ env->a20_mask;
+ page_size = 4096;
+ pte = ldq_phys(pte_addr);
+ }
+ if (!(pte & PG_PRESENT_MASK))
+ return -1;
+ } else {
+ uint32_t pde;
+
+ if (!(env->cr[0] & CR0_PG_MASK)) {
+ pte = addr;
+ page_size = 4096;
+ } else {
+ /* page directory entry */
+ pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
+ pde = ldl_phys(pde_addr);
+ if (!(pde & PG_PRESENT_MASK))
+ return -1;
+ if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
+ pte = pde & ~0x003ff000; /* align to 4MB */
+ page_size = 4096 * 1024;
+ } else {
+ /* page directory entry */
+ pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
+ pte = ldl_phys(pte_addr);
+ if (!(pte & PG_PRESENT_MASK))
+ return -1;
+ page_size = 4096;
+ }
+ }
+ pte = pte & env->a20_mask;
+ }
+
+ page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
+ paddr = (pte & TARGET_PAGE_MASK) + page_offset;
+ return paddr;
+}
+
+void hw_breakpoint_insert(CPUState *env, int index)
+{
+ int type, err = 0;
+
+ switch (hw_breakpoint_type(env->dr[7], index)) {
+ case 0:
+ if (hw_breakpoint_enabled(env->dr[7], index))
+ err = cpu_breakpoint_insert(env, env->dr[index], BP_CPU,
+ &env->cpu_breakpoint[index]);
+ break;
+ case 1:
+ type = BP_CPU | BP_MEM_WRITE;
+ goto insert_wp;
+ case 2:
+ /* No support for I/O watchpoints yet */
+ break;
+ case 3:
+ type = BP_CPU | BP_MEM_ACCESS;
+ insert_wp:
+ err = cpu_watchpoint_insert(env, env->dr[index],
+ hw_breakpoint_len(env->dr[7], index),
+ type, &env->cpu_watchpoint[index]);
+ break;
+ }
+ if (err)
+ env->cpu_breakpoint[index] = NULL;
+}
+
+void hw_breakpoint_remove(CPUState *env, int index)
+{
+ if (!env->cpu_breakpoint[index])
+ return;
+ switch (hw_breakpoint_type(env->dr[7], index)) {
+ case 0:
+ if (hw_breakpoint_enabled(env->dr[7], index))
+ cpu_breakpoint_remove_by_ref(env, env->cpu_breakpoint[index]);
+ break;
+ case 1:
+ case 3:
+ cpu_watchpoint_remove_by_ref(env, env->cpu_watchpoint[index]);
+ break;
+ case 2:
+ /* No support for I/O watchpoints yet */
+ break;
+ }
+}
+
+int check_hw_breakpoints(CPUState *env, int force_dr6_update)
+{
+ target_ulong dr6;
+ int reg, type;
+ int hit_enabled = 0;
+
+ dr6 = env->dr[6] & ~0xf;
+ for (reg = 0; reg < 4; reg++) {
+ type = hw_breakpoint_type(env->dr[7], reg);
+ if ((type == 0 && env->dr[reg] == env->eip) ||
+ ((type & 1) && env->cpu_watchpoint[reg] &&
+ (env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT))) {
+ dr6 |= 1 << reg;
+ if (hw_breakpoint_enabled(env->dr[7], reg))
+ hit_enabled = 1;
+ }
+ }
+ if (hit_enabled || force_dr6_update)
+ env->dr[6] = dr6;
+ return hit_enabled;
+}
+
+static CPUDebugExcpHandler *prev_debug_excp_handler;
+
+void raise_exception_env(int exception_index, CPUState *env);
+
+static void breakpoint_handler(CPUState *env)
+{
+ CPUBreakpoint *bp;
+
+ if (env->watchpoint_hit) {
+ if (env->watchpoint_hit->flags & BP_CPU) {
+ env->watchpoint_hit = NULL;
+ if (check_hw_breakpoints(env, 0))
+ raise_exception_env(EXCP01_DB, env);
+ else
+ cpu_resume_from_signal(env, NULL);
+ }
+ } else {
+ QTAILQ_FOREACH(bp, &env->breakpoints, entry)
+ if (bp->pc == env->eip) {
+ if (bp->flags & BP_CPU) {
+ check_hw_breakpoints(env, 1);
+ raise_exception_env(EXCP01_DB, env);
+ }
+ break;
+ }
+ }
+ if (prev_debug_excp_handler)
+ prev_debug_excp_handler(env);
+}
+
+#ifndef VBOX
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+ uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+ uint64_t mcg_cap = cenv->mcg_cap;
+ unsigned bank_num = mcg_cap & 0xff;
+ uint64_t *banks = cenv->mce_banks;
+
+ if (bank >= bank_num || !(status & MCI_STATUS_VAL))
+ return;
+
+ /*
+ * if MSR_MCG_CTL is not all 1s, the uncorrected error
+ * reporting is disabled
+ */
+ if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+ cenv->mcg_ctl != ~(uint64_t)0)
+ return;
+ banks += 4 * bank;
+ /*
+ * if MSR_MCi_CTL is not all 1s, the uncorrected error
+ * reporting is disabled for the bank
+ */
+ if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0)
+ return;
+ if (status & MCI_STATUS_UC) {
+ if ((cenv->mcg_status & MCG_STATUS_MCIP) ||
+ !(cenv->cr[4] & CR4_MCE_MASK)) {
+ fprintf(stderr, "injects mce exception while previous "
+ "one is in progress!\n");
+ qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+ qemu_system_reset_request();
+ return;
+ }
+ if (banks[1] & MCI_STATUS_VAL)
+ status |= MCI_STATUS_OVER;
+ banks[2] = addr;
+ banks[3] = misc;
+ cenv->mcg_status = mcg_status;
+ banks[1] = status;
+ cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
+ } else if (!(banks[1] & MCI_STATUS_VAL)
+ || !(banks[1] & MCI_STATUS_UC)) {
+ if (banks[1] & MCI_STATUS_VAL)
+ status |= MCI_STATUS_OVER;
+ banks[2] = addr;
+ banks[3] = misc;
+ banks[1] = status;
+ } else
+ banks[1] |= MCI_STATUS_OVER;
+}
+#endif /* !VBOX */
+#endif /* !CONFIG_USER_ONLY */
+
+#ifndef VBOX
+
+static void mce_init(CPUX86State *cenv)
+{
+ unsigned int bank, bank_num;
+
+ if (((cenv->cpuid_version >> 8)&0xf) >= 6
+ && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) {
+ cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF;
+ cenv->mcg_ctl = ~(uint64_t)0;
+ bank_num = MCE_BANKS_DEF;
+ for (bank = 0; bank < bank_num; bank++)
+ cenv->mce_banks[bank*4] = ~(uint64_t)0;
+ }
+}
+
+int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
+ target_ulong *base, unsigned int *limit,
+ unsigned int *flags)
+{
+ SegmentCache *dt;
+ target_ulong ptr;
+ uint32_t e1, e2;
+ int index;
+
+ if (selector & 0x4)
+ dt = &env->ldt;
+ else
+ dt = &env->gdt;
+ index = selector & ~7;
+ ptr = dt->base + index;
+ if ((index + 7) > dt->limit
+ || cpu_memory_rw_debug(env, ptr, (uint8_t *)&e1, sizeof(e1), 0) != 0
+ || cpu_memory_rw_debug(env, ptr+4, (uint8_t *)&e2, sizeof(e2), 0) != 0)
+ return 0;
+
+ *base = ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
+ *limit = (e1 & 0xffff) | (e2 & 0x000f0000);
+ if (e2 & DESC_G_MASK)
+ *limit = (*limit << 12) | 0xfff;
+ *flags = e2;
+
+ return 1;
+}
+
+#endif /* !VBOX */
+
+#ifndef VBOX
+CPUX86State *cpu_x86_init(const char *cpu_model)
+#else
+CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model)
+#endif
+{
+#ifndef VBOX
+ CPUX86State *env;
+#endif
+ static int inited;
+
+#ifndef VBOX
+ env = qemu_mallocz(sizeof(CPUX86State));
+#endif
+ cpu_exec_init(env);
+ env->cpu_model_str = cpu_model;
+
+ /* init various static tables */
+ if (!inited) {
+ inited = 1;
+ optimize_flags_init();
+#ifndef CONFIG_USER_ONLY
+ prev_debug_excp_handler =
+ cpu_set_debug_excp_handler(breakpoint_handler);
+#endif
+ }
+#ifndef VBOX
+ if (cpu_x86_register(env, cpu_model) < 0) {
+ cpu_x86_close(env);
+ return NULL;
+ }
+ mce_init(env);
+#endif
+
+ qemu_init_vcpu(env);
+
+ return env;
+}
+
+#ifndef VBOX
+#if !defined(CONFIG_USER_ONLY)
+void do_cpu_init(CPUState *env)
+{
+ int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI;
+ cpu_reset(env);
+ env->interrupt_request = sipi;
+ apic_init_reset(env->apic_state);
+ env->halted = !cpu_is_bsp(env);
+}
+
+void do_cpu_sipi(CPUState *env)
+{
+ apic_sipi(env->apic_state);
+}
+#else
+void do_cpu_init(CPUState *env)
+{
+}
+void do_cpu_sipi(CPUState *env)
+{
+}
+#endif
+#endif /* !VBOX */
diff --git a/src/recompiler/target-i386/helper.h b/src/recompiler/target-i386/helper.h
new file mode 100644
index 00000000..8307304a
--- /dev/null
+++ b/src/recompiler/target-i386/helper.h
@@ -0,0 +1,253 @@
+#include "def-helper.h"
+
+DEF_HELPER_FLAGS_1(cc_compute_all, TCG_CALL_PURE, i32, int)
+DEF_HELPER_FLAGS_1(cc_compute_c, TCG_CALL_PURE, i32, int)
+
+DEF_HELPER_0(lock, void)
+DEF_HELPER_0(unlock, void)
+DEF_HELPER_2(write_eflags, void, tl, i32)
+DEF_HELPER_0(read_eflags, tl)
+DEF_HELPER_1(divb_AL, void, tl)
+DEF_HELPER_1(idivb_AL, void, tl)
+DEF_HELPER_1(divw_AX, void, tl)
+DEF_HELPER_1(idivw_AX, void, tl)
+DEF_HELPER_1(divl_EAX, void, tl)
+DEF_HELPER_1(idivl_EAX, void, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(mulq_EAX_T0, void, tl)
+DEF_HELPER_1(imulq_EAX_T0, void, tl)
+DEF_HELPER_2(imulq_T0_T1, tl, tl, tl)
+DEF_HELPER_1(divq_EAX, void, tl)
+DEF_HELPER_1(idivq_EAX, void, tl)
+#endif
+
+DEF_HELPER_1(aam, void, int)
+DEF_HELPER_1(aad, void, int)
+DEF_HELPER_0(aaa, void)
+DEF_HELPER_0(aas, void)
+DEF_HELPER_0(daa, void)
+DEF_HELPER_0(das, void)
+
+DEF_HELPER_1(lsl, tl, tl)
+DEF_HELPER_1(lar, tl, tl)
+DEF_HELPER_1(verr, void, tl)
+DEF_HELPER_1(verw, void, tl)
+DEF_HELPER_1(lldt, void, int)
+DEF_HELPER_1(ltr, void, int)
+DEF_HELPER_2(load_seg, void, int, int)
+DEF_HELPER_3(ljmp_protected, void, int, tl, int)
+DEF_HELPER_4(lcall_real, void, int, tl, int, int)
+DEF_HELPER_4(lcall_protected, void, int, tl, int, int)
+DEF_HELPER_1(iret_real, void, int)
+DEF_HELPER_2(iret_protected, void, int, int)
+DEF_HELPER_2(lret_protected, void, int, int)
+DEF_HELPER_1(read_crN, tl, int)
+DEF_HELPER_2(write_crN, void, int, tl)
+DEF_HELPER_1(lmsw, void, tl)
+DEF_HELPER_0(clts, void)
+DEF_HELPER_2(movl_drN_T0, void, int, tl)
+DEF_HELPER_1(invlpg, void, tl)
+
+DEF_HELPER_3(enter_level, void, int, int, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_3(enter64_level, void, int, int, tl)
+#endif
+DEF_HELPER_0(sysenter, void)
+DEF_HELPER_1(sysexit, void, int)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(syscall, void, int)
+DEF_HELPER_1(sysret, void, int)
+#endif
+DEF_HELPER_1(hlt, void, int)
+DEF_HELPER_1(monitor, void, tl)
+DEF_HELPER_1(mwait, void, int)
+DEF_HELPER_0(debug, void)
+DEF_HELPER_0(reset_rf, void)
+DEF_HELPER_2(raise_interrupt, void, int, int)
+DEF_HELPER_1(raise_exception, void, int)
+DEF_HELPER_0(cli, void)
+DEF_HELPER_0(sti, void)
+DEF_HELPER_0(set_inhibit_irq, void)
+DEF_HELPER_0(reset_inhibit_irq, void)
+DEF_HELPER_2(boundw, void, tl, int)
+DEF_HELPER_2(boundl, void, tl, int)
+DEF_HELPER_0(rsm, void)
+DEF_HELPER_1(into, void, int)
+DEF_HELPER_1(cmpxchg8b, void, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cmpxchg16b, void, tl)
+#endif
+DEF_HELPER_0(single_step, void)
+DEF_HELPER_0(cpuid, void)
+DEF_HELPER_0(rdtsc, void)
+DEF_HELPER_0(rdtscp, void)
+DEF_HELPER_0(rdpmc, void)
+DEF_HELPER_0(rdmsr, void)
+DEF_HELPER_0(wrmsr, void)
+
+DEF_HELPER_1(check_iob, void, i32)
+DEF_HELPER_1(check_iow, void, i32)
+DEF_HELPER_1(check_iol, void, i32)
+DEF_HELPER_2(outb, void, i32, i32)
+DEF_HELPER_1(inb, tl, i32)
+DEF_HELPER_2(outw, void, i32, i32)
+DEF_HELPER_1(inw, tl, i32)
+DEF_HELPER_2(outl, void, i32, i32)
+DEF_HELPER_1(inl, tl, i32)
+
+DEF_HELPER_2(svm_check_intercept_param, void, i32, i64)
+DEF_HELPER_2(vmexit, void, i32, i64)
+DEF_HELPER_3(svm_check_io, void, i32, i32, i32)
+DEF_HELPER_2(vmrun, void, int, int)
+DEF_HELPER_0(vmmcall, void)
+DEF_HELPER_1(vmload, void, int)
+DEF_HELPER_1(vmsave, void, int)
+DEF_HELPER_0(stgi, void)
+DEF_HELPER_0(clgi, void)
+DEF_HELPER_0(skinit, void)
+DEF_HELPER_1(invlpga, void, int)
+
+/* x86 FPU */
+
+DEF_HELPER_1(flds_FT0, void, i32)
+DEF_HELPER_1(fldl_FT0, void, i64)
+DEF_HELPER_1(fildl_FT0, void, s32)
+DEF_HELPER_1(flds_ST0, void, i32)
+DEF_HELPER_1(fldl_ST0, void, i64)
+DEF_HELPER_1(fildl_ST0, void, s32)
+DEF_HELPER_1(fildll_ST0, void, s64)
+#ifndef VBOX
+DEF_HELPER_0(fsts_ST0, i32)
+DEF_HELPER_0(fstl_ST0, i64)
+DEF_HELPER_0(fist_ST0, s32)
+DEF_HELPER_0(fistl_ST0, s32)
+DEF_HELPER_0(fistll_ST0, s64)
+DEF_HELPER_0(fistt_ST0, s32)
+DEF_HELPER_0(fisttl_ST0, s32)
+DEF_HELPER_0(fisttll_ST0, s64)
+#else /* VBOX */
+DEF_HELPER_0(fsts_ST0, RTCCUINTREG)
+DEF_HELPER_0(fstl_ST0, i64)
+DEF_HELPER_0(fist_ST0, RTCCINTREG)
+DEF_HELPER_0(fistl_ST0, RTCCINTREG)
+DEF_HELPER_0(fistll_ST0, s64)
+DEF_HELPER_0(fistt_ST0, RTCCINTREG)
+DEF_HELPER_0(fisttl_ST0, RTCCINTREG)
+DEF_HELPER_0(fisttll_ST0, s64)
+#endif /* VBOX */
+DEF_HELPER_1(fldt_ST0, void, tl)
+DEF_HELPER_1(fstt_ST0, void, tl)
+DEF_HELPER_0(fpush, void)
+DEF_HELPER_0(fpop, void)
+DEF_HELPER_0(fdecstp, void)
+DEF_HELPER_0(fincstp, void)
+DEF_HELPER_1(ffree_STN, void, int)
+DEF_HELPER_0(fmov_ST0_FT0, void)
+DEF_HELPER_1(fmov_FT0_STN, void, int)
+DEF_HELPER_1(fmov_ST0_STN, void, int)
+DEF_HELPER_1(fmov_STN_ST0, void, int)
+DEF_HELPER_1(fxchg_ST0_STN, void, int)
+DEF_HELPER_0(fcom_ST0_FT0, void)
+DEF_HELPER_0(fucom_ST0_FT0, void)
+DEF_HELPER_0(fcomi_ST0_FT0, void)
+DEF_HELPER_0(fucomi_ST0_FT0, void)
+DEF_HELPER_0(fadd_ST0_FT0, void)
+DEF_HELPER_0(fmul_ST0_FT0, void)
+DEF_HELPER_0(fsub_ST0_FT0, void)
+DEF_HELPER_0(fsubr_ST0_FT0, void)
+DEF_HELPER_0(fdiv_ST0_FT0, void)
+DEF_HELPER_0(fdivr_ST0_FT0, void)
+DEF_HELPER_1(fadd_STN_ST0, void, int)
+DEF_HELPER_1(fmul_STN_ST0, void, int)
+DEF_HELPER_1(fsub_STN_ST0, void, int)
+DEF_HELPER_1(fsubr_STN_ST0, void, int)
+DEF_HELPER_1(fdiv_STN_ST0, void, int)
+DEF_HELPER_1(fdivr_STN_ST0, void, int)
+DEF_HELPER_0(fchs_ST0, void)
+DEF_HELPER_0(fabs_ST0, void)
+DEF_HELPER_0(fxam_ST0, void)
+DEF_HELPER_0(fld1_ST0, void)
+DEF_HELPER_0(fldl2t_ST0, void)
+DEF_HELPER_0(fldl2e_ST0, void)
+DEF_HELPER_0(fldpi_ST0, void)
+DEF_HELPER_0(fldlg2_ST0, void)
+DEF_HELPER_0(fldln2_ST0, void)
+DEF_HELPER_0(fldz_ST0, void)
+DEF_HELPER_0(fldz_FT0, void)
+#ifndef VBOX
+DEF_HELPER_0(fnstsw, i32)
+DEF_HELPER_0(fnstcw, i32)
+#else /* VBOX */
+DEF_HELPER_0(fnstsw, RTCCUINTREG)
+DEF_HELPER_0(fnstcw, RTCCUINTREG)
+#endif /* VBOX */
+DEF_HELPER_1(fldcw, void, i32)
+DEF_HELPER_0(fclex, void)
+DEF_HELPER_0(fwait, void)
+DEF_HELPER_0(fninit, void)
+DEF_HELPER_1(fbld_ST0, void, tl)
+DEF_HELPER_1(fbst_ST0, void, tl)
+DEF_HELPER_0(f2xm1, void)
+DEF_HELPER_0(fyl2x, void)
+DEF_HELPER_0(fptan, void)
+DEF_HELPER_0(fpatan, void)
+DEF_HELPER_0(fxtract, void)
+DEF_HELPER_0(fprem1, void)
+DEF_HELPER_0(fprem, void)
+DEF_HELPER_0(fyl2xp1, void)
+DEF_HELPER_0(fsqrt, void)
+DEF_HELPER_0(fsincos, void)
+DEF_HELPER_0(frndint, void)
+DEF_HELPER_0(fscale, void)
+DEF_HELPER_0(fsin, void)
+DEF_HELPER_0(fcos, void)
+DEF_HELPER_2(fstenv, void, tl, int)
+DEF_HELPER_2(fldenv, void, tl, int)
+DEF_HELPER_2(fsave, void, tl, int)
+DEF_HELPER_2(frstor, void, tl, int)
+DEF_HELPER_2(fxsave, void, tl, int)
+DEF_HELPER_2(fxrstor, void, tl, int)
+DEF_HELPER_1(bsf, tl, tl)
+DEF_HELPER_1(bsr, tl, tl)
+DEF_HELPER_2(lzcnt, tl, tl, int)
+
+/* MMX/SSE */
+
+DEF_HELPER_0(enter_mmx, void)
+DEF_HELPER_0(emms, void)
+DEF_HELPER_2(movq, void, ptr, ptr)
+
+#define SHIFT 0
+#include "ops_sse_header.h"
+#define SHIFT 1
+#include "ops_sse_header.h"
+
+DEF_HELPER_2(rclb, tl, tl, tl)
+DEF_HELPER_2(rclw, tl, tl, tl)
+DEF_HELPER_2(rcll, tl, tl, tl)
+DEF_HELPER_2(rcrb, tl, tl, tl)
+DEF_HELPER_2(rcrw, tl, tl, tl)
+DEF_HELPER_2(rcrl, tl, tl, tl)
+#ifdef TARGET_X86_64
+DEF_HELPER_2(rclq, tl, tl, tl)
+DEF_HELPER_2(rcrq, tl, tl, tl)
+#endif
+
+#ifdef VBOX
+DEF_HELPER_1(write_eflags_vme, void, tl)
+DEF_HELPER_0(read_eflags_vme, tl)
+DEF_HELPER_0(cli_vme, void)
+DEF_HELPER_0(sti_vme, void)
+DEF_HELPER_0(check_external_event, void)
+DEF_HELPER_0(dump_state, void)
+DEF_HELPER_1(sync_seg, void, i32)
+
+void helper_external_event(void);
+void helper_record_call(void);
+
+/* in op_helper.c */
+void sync_seg(CPUX86State *env1, int seg_reg, int selector);
+void sync_ldtr(CPUX86State *env1, int selector);
+#endif /* VBOX */
+
+#include "def-helper.h"
diff --git a/src/recompiler/target-i386/helper_template.h b/src/recompiler/target-i386/helper_template.h
new file mode 100644
index 00000000..193b3274
--- /dev/null
+++ b/src/recompiler/target-i386/helper_template.h
@@ -0,0 +1,344 @@
+/*
+ * i386 helpers
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#define DATA_BITS (1 << (3 + SHIFT))
+#define SHIFT_MASK (DATA_BITS - 1)
+#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1))
+#if DATA_BITS <= 32
+#define SHIFT1_MASK 0x1f
+#else
+#define SHIFT1_MASK 0x3f
+#endif
+
+#if DATA_BITS == 8
+#define SUFFIX b
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#define DATA_MASK 0xff
+#elif DATA_BITS == 16
+#define SUFFIX w
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#define DATA_MASK 0xffff
+#elif DATA_BITS == 32
+#define SUFFIX l
+#define DATA_TYPE uint32_t
+#define DATA_STYPE int32_t
+#define DATA_MASK 0xffffffff
+#elif DATA_BITS == 64
+#define SUFFIX q
+#define DATA_TYPE uint64_t
+#define DATA_STYPE int64_t
+#define DATA_MASK 0xffffffffffffffffULL
+#else
+#error unhandled operand size
+#endif
+
+/* dynamic flags computation */
+
+static int glue(compute_all_add, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_SRC;
+ src2 = CC_DST - CC_SRC;
+ cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_add, SUFFIX)(void)
+{
+ int cf;
+ target_long src1;
+ src1 = CC_SRC;
+ cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+ return cf;
+}
+
+static int glue(compute_all_adc, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_SRC;
+ src2 = CC_DST - CC_SRC - 1;
+ cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_adc, SUFFIX)(void)
+{
+ int cf;
+ target_long src1;
+ src1 = CC_SRC;
+ cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
+ return cf;
+}
+
+static int glue(compute_all_sub, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sub, SUFFIX)(void)
+{
+ int cf;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+ return cf;
+}
+
+static int glue(compute_all_sbb, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC + 1;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sbb, SUFFIX)(void)
+{
+ int cf;
+ target_long src1, src2;
+ src1 = CC_DST + CC_SRC + 1;
+ src2 = CC_SRC;
+ cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
+ return cf;
+}
+
+static int glue(compute_all_logic, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = 0;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = 0;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_logic, SUFFIX)(void)
+{
+ return 0;
+}
+
+static int glue(compute_all_inc, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST - 1;
+ src2 = 1;
+ cf = CC_SRC;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
+ return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_inc, SUFFIX)(void)
+{
+ return CC_SRC;
+}
+#endif
+
+static int glue(compute_all_dec, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ target_long src1, src2;
+ src1 = CC_DST + 1;
+ src2 = 1;
+ cf = CC_SRC;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = (CC_DST ^ src1 ^ src2) & 0x10;
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_all_shl, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0; /* undefined */
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ /* of is defined if shift count == 1 */
+ of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_shl, SUFFIX)(void)
+{
+ return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_sar, SUFFIX)(void)
+{
+ return CC_SRC & 1;
+}
+#endif
+
+static int glue(compute_all_sar, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = CC_SRC & 1;
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0; /* undefined */
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ /* of is defined if shift count == 1 */
+ of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+ return cf | pf | af | zf | sf | of;
+}
+
+#if DATA_BITS == 32
+static int glue(compute_c_mul, SUFFIX)(void)
+{
+ int cf;
+ cf = (CC_SRC != 0);
+ return cf;
+}
+#endif
+
+/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
+ CF are modified and it is slower to do that. */
+static int glue(compute_all_mul, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = (CC_SRC != 0);
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0; /* undefined */
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = cf << 11;
+ return cf | pf | af | zf | sf | of;
+}
+
+/* shifts */
+
+target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1)
+{
+ int count, eflags;
+ target_ulong src;
+ target_long res;
+
+ count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+ count = rclw_table[count];
+#elif DATA_BITS == 8
+ count = rclb_table[count];
+#endif
+ if (count) {
+ eflags = helper_cc_compute_all(CC_OP);
+ t0 &= DATA_MASK;
+ src = t0;
+ res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
+ if (count > 1)
+ res |= t0 >> (DATA_BITS + 1 - count);
+ t0 = res;
+ env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+ (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+ ((src >> (DATA_BITS - count)) & CC_C);
+ } else {
+ env->cc_tmp = -1;
+ }
+ return t0;
+}
+
+target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1)
+{
+ int count, eflags;
+ target_ulong src;
+ target_long res;
+
+ count = t1 & SHIFT1_MASK;
+#if DATA_BITS == 16
+ count = rclw_table[count];
+#elif DATA_BITS == 8
+ count = rclb_table[count];
+#endif
+ if (count) {
+ eflags = helper_cc_compute_all(CC_OP);
+ t0 &= DATA_MASK;
+ src = t0;
+ res = (t0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
+ if (count > 1)
+ res |= t0 << (DATA_BITS + 1 - count);
+ t0 = res;
+ env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+ (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
+ ((src >> (count - 1)) & CC_C);
+ } else {
+ env->cc_tmp = -1;
+ }
+ return t0;
+}
+
+#undef DATA_BITS
+#undef SHIFT_MASK
+#undef SHIFT1_MASK
+#undef SIGN_MASK
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef DATA_MASK
+#undef SUFFIX
diff --git a/src/recompiler/target-i386/op_helper.c b/src/recompiler/target-i386/op_helper.c
new file mode 100644
index 00000000..07b58f8d
--- /dev/null
+++ b/src/recompiler/target-i386/op_helper.c
@@ -0,0 +1,7164 @@
+/*
+ * i386 helpers
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include "exec.h"
+#include "exec-all.h"
+#include "host-utils.h"
+#include "ioport.h"
+
+#ifdef VBOX
+# include "qemu-common.h"
+# include <math.h>
+# include "tcg.h"
+# include <VBox/err.h>
+#endif /* VBOX */
+
+//#define DEBUG_PCALL
+
+
+#ifdef DEBUG_PCALL
+# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
+# define LOG_PCALL_STATE(env) \
+ log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP)
+#else
+# define LOG_PCALL(...) do { } while (0)
+# define LOG_PCALL_STATE(env) do { } while (0)
+#endif
+
+
+#if 0
+#define raise_exception_err(a, b)\
+do {\
+ qemu_log("raise_exception line=%d\n", __LINE__);\
+ (raise_exception_err)(a, b);\
+} while (0)
+#endif
+
+static const uint8_t parity_table[256] = {
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
+ 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
+};
+
+/* modulo 17 table */
+static const uint8_t rclw_table[32] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9,10,11,12,13,14,15,
+ 16, 0, 1, 2, 3, 4, 5, 6,
+ 7, 8, 9,10,11,12,13,14,
+};
+
+/* modulo 9 table */
+static const uint8_t rclb_table[32] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 0, 1, 2, 3, 4, 5, 6,
+ 7, 8, 0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 0, 1, 2, 3, 4,
+};
+
+static const CPU86_LDouble f15rk[7] =
+{
+ 0.00000000000000000000L,
+ 1.00000000000000000000L,
+ 3.14159265358979323851L, /*pi*/
+ 0.30102999566398119523L, /*lg2*/
+ 0.69314718055994530943L, /*ln2*/
+ 1.44269504088896340739L, /*l2e*/
+ 3.32192809488736234781L, /*l2t*/
+};
+
+/* broken thread support */
+
+static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void helper_lock(void)
+{
+ spin_lock(&global_cpu_lock);
+}
+
+void helper_unlock(void)
+{
+ spin_unlock(&global_cpu_lock);
+}
+
+void helper_write_eflags(target_ulong t0, uint32_t update_mask)
+{
+ load_eflags(t0, update_mask);
+}
+
+target_ulong helper_read_eflags(void)
+{
+ uint32_t eflags;
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags |= (DF & DF_MASK);
+ eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+ return eflags;
+}
+
+#ifdef VBOX
+
+void helper_write_eflags_vme(target_ulong t0)
+{
+ unsigned int new_eflags = t0;
+
+ assert(env->eflags & (1<<VM_SHIFT));
+
+ /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */
+ /* if TF will be set -> #GP */
+ if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK))
+ || (new_eflags & TF_MASK)) {
+ raise_exception(EXCP0D_GPF);
+ } else {
+ load_eflags(new_eflags,
+ (TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff);
+
+ if (new_eflags & IF_MASK) {
+ env->eflags |= VIF_MASK;
+ } else {
+ env->eflags &= ~VIF_MASK;
+ }
+ }
+}
+
+target_ulong helper_read_eflags_vme(void)
+{
+ uint32_t eflags;
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags |= (DF & DF_MASK);
+ eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+ if (env->eflags & VIF_MASK)
+ eflags |= IF_MASK;
+ else
+ eflags &= ~IF_MASK;
+
+ /* According to AMD manual, should be read with IOPL == 3 */
+ eflags |= (3 << IOPL_SHIFT);
+
+ /* We only use helper_read_eflags_vme() in 16-bits mode */
+ return eflags & 0xffff;
+}
+
+void helper_dump_state()
+{
+ LogRel(("CS:EIP=%08x:%08x, FLAGS=%08x\n", env->segs[R_CS].base, env->eip, env->eflags));
+ LogRel(("EAX=%08x\tECX=%08x\tEDX=%08x\tEBX=%08x\n",
+ (uint32_t)env->regs[R_EAX], (uint32_t)env->regs[R_ECX],
+ (uint32_t)env->regs[R_EDX], (uint32_t)env->regs[R_EBX]));
+ LogRel(("ESP=%08x\tEBP=%08x\tESI=%08x\tEDI=%08x\n",
+ (uint32_t)env->regs[R_ESP], (uint32_t)env->regs[R_EBP],
+ (uint32_t)env->regs[R_ESI], (uint32_t)env->regs[R_EDI]));
+}
+
+/**
+ * Updates e2 with the DESC_A_MASK, writes it to the descriptor table, and
+ * returns the updated e2.
+ *
+ * @returns e2 with A set.
+ * @param e2 The 2nd selector DWORD.
+ */
+static uint32_t set_segment_accessed(int selector, uint32_t e2)
+{
+ SegmentCache *dt = selector & X86_SEL_LDT ? &env->ldt : &env->gdt;
+ target_ulong ptr = dt->base + (selector & X86_SEL_MASK);
+
+ e2 |= DESC_A_MASK;
+ stl_kernel(ptr + 4, e2);
+ return e2;
+}
+
+#endif /* VBOX */
+
+/* return non zero if error */
+static inline int load_segment(uint32_t *e1_ptr, uint32_t *e2_ptr,
+ int selector)
+{
+ SegmentCache *dt;
+ int index;
+ target_ulong ptr;
+
+ if (selector & 0x4)
+ dt = &env->ldt;
+ else
+ dt = &env->gdt;
+ index = selector & ~7;
+ if ((index + 7) > dt->limit)
+ return -1;
+ ptr = dt->base + index;
+ *e1_ptr = ldl_kernel(ptr);
+ *e2_ptr = ldl_kernel(ptr + 4);
+ return 0;
+}
+
+static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2)
+{
+ unsigned int limit;
+ limit = (e1 & 0xffff) | (e2 & 0x000f0000);
+ if (e2 & DESC_G_MASK)
+ limit = (limit << 12) | 0xfff;
+ return limit;
+}
+
+static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2)
+{
+ return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
+}
+
+static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2)
+{
+ sc->base = get_seg_base(e1, e2);
+ sc->limit = get_seg_limit(e1, e2);
+#ifndef VBOX
+ sc->flags = e2;
+#else
+ sc->flags = e2 & DESC_RAW_FLAG_BITS;
+ sc->newselector = 0;
+ sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+#endif
+}
+
+/* init the segment cache in vm86 mode. */
+static inline void load_seg_vm(int seg, int selector)
+{
+ selector &= 0xffff;
+#ifdef VBOX
+ /* flags must be 0xf3; expand-up read/write accessed data segment with DPL=3. (VT-x) */
+ unsigned flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | DESC_A_MASK;
+ flags |= (3 << DESC_DPL_SHIFT);
+
+ cpu_x86_load_seg_cache(env, seg, selector,
+ (selector << 4), 0xffff, flags);
+#else /* VBOX */
+ cpu_x86_load_seg_cache(env, seg, selector,
+ (selector << 4), 0xffff, 0);
+#endif /* VBOX */
+}
+
+static inline void get_ss_esp_from_tss(uint32_t *ss_ptr,
+ uint32_t *esp_ptr, int dpl)
+{
+#ifndef VBOX
+ int type, index, shift;
+#else
+ unsigned int type, index, shift;
+#endif
+
+#if 0
+ {
+ int i;
+ printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit);
+ for(i=0;i<env->tr.limit;i++) {
+ printf("%02x ", env->tr.base[i]);
+ if ((i & 7) == 7) printf("\n");
+ }
+ printf("\n");
+ }
+#endif
+
+ if (!(env->tr.flags & DESC_P_MASK))
+ cpu_abort(env, "invalid tss");
+ type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ if ((type & 7) != 3)
+ cpu_abort(env, "invalid tss type");
+ shift = type >> 3;
+ index = (dpl * 4 + 2) << shift;
+ if (index + (4 << shift) - 1 > env->tr.limit)
+ raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+ if (shift == 0) {
+ *esp_ptr = lduw_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 2);
+ } else {
+ *esp_ptr = ldl_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 4);
+ }
+}
+
+/* XXX: merge with load_seg() */
+static void tss_load_seg(int seg_reg, int selector)
+{
+ uint32_t e1, e2;
+ int rpl, dpl, cpl;
+
+#ifdef VBOX
+ e1 = e2 = 0; /* gcc warning? */
+ cpl = env->hflags & HF_CPL_MASK;
+ /* Trying to load a selector with CPL=1? */
+ if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0))
+ {
+ Log(("RPL 1 -> sel %04X -> %04X (tss_load_seg)\n", selector, selector & 0xfffc));
+ selector = selector & 0xfffc;
+ }
+#endif /* VBOX */
+
+ if ((selector & 0xfffc) != 0) {
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ if (!(e2 & DESC_S_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (seg_reg == R_CS) {
+ if (!(e2 & DESC_CS_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ /* XXX: is it correct ? */
+ if (dpl != rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ if ((e2 & DESC_C_MASK) && dpl > rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ } else if (seg_reg == R_SS) {
+ /* SS must be writable data */
+ if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ if (dpl != cpl || dpl != rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ } else {
+ /* not readable code */
+ if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK))
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ /* if data or non conforming code, checks the rights */
+ if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) {
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+ }
+ }
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ cpu_x86_load_seg_cache(env, seg_reg, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ } else {
+ if (seg_reg == R_SS || seg_reg == R_CS)
+ raise_exception_err(EXCP0A_TSS, selector & 0xfffc);
+#ifdef VBOX
+# if 0 /** @todo now we ignore loading 0 selectors, need to check what is correct once */
+ cpu_x86_load_seg_cache(env, seg_reg, selector,
+ 0, 0, 0);
+# endif
+#endif /* VBOX */
+ }
+}
+
+#define SWITCH_TSS_JMP 0
+#define SWITCH_TSS_IRET 1
+#define SWITCH_TSS_CALL 2
+
+/* XXX: restore CPU state in registers (PowerPC case) */
+static void switch_tss(int tss_selector,
+ uint32_t e1, uint32_t e2, int source,
+ uint32_t next_eip)
+{
+ int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i;
+ target_ulong tss_base;
+ uint32_t new_regs[8], new_segs[6];
+ uint32_t new_eflags, new_eip, new_cr3, new_ldt, new_trap;
+ uint32_t old_eflags, eflags_mask;
+ SegmentCache *dt;
+#ifndef VBOX
+ int index;
+#else
+ unsigned int index;
+#endif
+ target_ulong ptr;
+
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ LOG_PCALL("switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source);
+
+ /* if task gate, we read the TSS segment and we load it */
+ if (type == 5) {
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+ tss_selector = e1 >> 16;
+ if (tss_selector & 4)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ if (load_segment(&e1, &e2, tss_selector) != 0)
+ raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+ if (e2 & DESC_S_MASK)
+ raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ if ((type & 7) != 1)
+ raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
+ }
+
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
+
+ if (type & 8)
+ tss_limit_max = 103;
+ else
+ tss_limit_max = 43;
+ tss_limit = get_seg_limit(e1, e2);
+ tss_base = get_seg_base(e1, e2);
+ if ((tss_selector & 4) != 0 ||
+ tss_limit < tss_limit_max)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ if (old_type & 8)
+ old_tss_limit_max = 103;
+ else
+ old_tss_limit_max = 43;
+
+#ifndef VBOX /* The old TSS is written first... */
+ /* read all the registers from the new TSS */
+ if (type & 8) {
+ /* 32 bit */
+ new_cr3 = ldl_kernel(tss_base + 0x1c);
+ new_eip = ldl_kernel(tss_base + 0x20);
+ new_eflags = ldl_kernel(tss_base + 0x24);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+ for(i = 0; i < 6; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
+ new_ldt = lduw_kernel(tss_base + 0x60);
+ new_trap = ldl_kernel(tss_base + 0x64);
+ } else {
+ /* 16 bit */
+ new_cr3 = 0;
+ new_eip = lduw_kernel(tss_base + 0x0e);
+ new_eflags = lduw_kernel(tss_base + 0x10);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000;
+ for(i = 0; i < 4; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 4));
+ new_ldt = lduw_kernel(tss_base + 0x2a);
+ new_segs[R_FS] = 0;
+ new_segs[R_GS] = 0;
+ new_trap = 0;
+ }
+#endif
+
+ /* NOTE: we must avoid memory exceptions during the task switch,
+ so we make dummy accesses before */
+ /* XXX: it can still fail in some cases, so a bigger hack is
+ necessary to valid the TLB after having done the accesses */
+
+ v1 = ldub_kernel(env->tr.base);
+ v2 = ldub_kernel(env->tr.base + old_tss_limit_max);
+ stb_kernel(env->tr.base, v1);
+ stb_kernel(env->tr.base + old_tss_limit_max, v2);
+
+ /* clear busy bit (it is restartable) */
+ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
+ target_ulong ptr;
+ uint32_t e2;
+ ptr = env->gdt.base + (env->tr.selector & ~7);
+ e2 = ldl_kernel(ptr + 4);
+ e2 &= ~DESC_TSS_BUSY_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+ old_eflags = compute_eflags();
+ if (source == SWITCH_TSS_IRET)
+ old_eflags &= ~NT_MASK;
+
+ /* save the current state in the old TSS */
+ if (type & 8) {
+ /* 32 bit */
+ stl_kernel(env->tr.base + 0x20, next_eip);
+ stl_kernel(env->tr.base + 0x24, old_eflags);
+ stl_kernel(env->tr.base + (0x28 + 0 * 4), EAX);
+ stl_kernel(env->tr.base + (0x28 + 1 * 4), ECX);
+ stl_kernel(env->tr.base + (0x28 + 2 * 4), EDX);
+ stl_kernel(env->tr.base + (0x28 + 3 * 4), EBX);
+ stl_kernel(env->tr.base + (0x28 + 4 * 4), ESP);
+ stl_kernel(env->tr.base + (0x28 + 5 * 4), EBP);
+ stl_kernel(env->tr.base + (0x28 + 6 * 4), ESI);
+ stl_kernel(env->tr.base + (0x28 + 7 * 4), EDI);
+ for(i = 0; i < 6; i++)
+ stw_kernel(env->tr.base + (0x48 + i * 4), env->segs[i].selector);
+#if defined(VBOX) && defined(DEBUG)
+ printf("TSS 32 bits switch\n");
+ printf("Saving CS=%08X\n", env->segs[R_CS].selector);
+#endif
+ } else {
+ /* 16 bit */
+ stw_kernel(env->tr.base + 0x0e, next_eip);
+ stw_kernel(env->tr.base + 0x10, old_eflags);
+ stw_kernel(env->tr.base + (0x12 + 0 * 2), EAX);
+ stw_kernel(env->tr.base + (0x12 + 1 * 2), ECX);
+ stw_kernel(env->tr.base + (0x12 + 2 * 2), EDX);
+ stw_kernel(env->tr.base + (0x12 + 3 * 2), EBX);
+ stw_kernel(env->tr.base + (0x12 + 4 * 2), ESP);
+ stw_kernel(env->tr.base + (0x12 + 5 * 2), EBP);
+ stw_kernel(env->tr.base + (0x12 + 6 * 2), ESI);
+ stw_kernel(env->tr.base + (0x12 + 7 * 2), EDI);
+ for(i = 0; i < 4; i++)
+ stw_kernel(env->tr.base + (0x22 + i * 2), env->segs[i].selector);
+ }
+
+#ifdef VBOX
+ /* read all the registers from the new TSS - may be the same as the old one */
+ if (type & 8) {
+ /* 32 bit */
+ new_cr3 = ldl_kernel(tss_base + 0x1c);
+ new_eip = ldl_kernel(tss_base + 0x20);
+ new_eflags = ldl_kernel(tss_base + 0x24);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4));
+ for(i = 0; i < 6; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4));
+ new_ldt = lduw_kernel(tss_base + 0x60);
+ new_trap = ldl_kernel(tss_base + 0x64);
+ } else {
+ /* 16 bit */
+ new_cr3 = 0;
+ new_eip = lduw_kernel(tss_base + 0x0e);
+ new_eflags = lduw_kernel(tss_base + 0x10);
+ for(i = 0; i < 8; i++)
+ new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000;
+ for(i = 0; i < 4; i++)
+ new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 2));
+ new_ldt = lduw_kernel(tss_base + 0x2a);
+ new_segs[R_FS] = 0;
+ new_segs[R_GS] = 0;
+ new_trap = 0;
+ }
+#endif
+
+ /* now if an exception occurs, it will occurs in the next task
+ context */
+
+ if (source == SWITCH_TSS_CALL) {
+ stw_kernel(tss_base, env->tr.selector);
+ new_eflags |= NT_MASK;
+ }
+
+ /* set busy bit */
+ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) {
+ target_ulong ptr;
+ uint32_t e2;
+ ptr = env->gdt.base + (tss_selector & ~7);
+ e2 = ldl_kernel(ptr + 4);
+ e2 |= DESC_TSS_BUSY_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+
+ /* set the new CPU state */
+ /* from this point, any exception which occurs can give problems */
+ env->cr[0] |= CR0_TS_MASK;
+ env->hflags |= HF_TS_MASK;
+ env->tr.selector = tss_selector;
+ env->tr.base = tss_base;
+ env->tr.limit = tss_limit;
+#ifndef VBOX
+ env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK;
+#else
+ env->tr.flags = (e2 | DESC_TSS_BUSY_MASK) & DESC_RAW_FLAG_BITS;
+ env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->tr.newselector = 0;
+#endif
+
+ if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) {
+ cpu_x86_update_cr3(env, new_cr3);
+ }
+
+ /* load all registers without an exception, then reload them with
+ possible exception */
+ env->eip = new_eip;
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK |
+ IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK;
+ if (!(type & 8))
+ eflags_mask &= 0xffff;
+ load_eflags(new_eflags, eflags_mask);
+ /* XXX: what to do in 16 bit case ? */
+ EAX = new_regs[0];
+ ECX = new_regs[1];
+ EDX = new_regs[2];
+ EBX = new_regs[3];
+ ESP = new_regs[4];
+ EBP = new_regs[5];
+ ESI = new_regs[6];
+ EDI = new_regs[7];
+ if (new_eflags & VM_MASK) {
+ for(i = 0; i < 6; i++)
+ load_seg_vm(i, new_segs[i]);
+ /* in vm86, CPL is always 3 */
+ cpu_x86_set_cpl(env, 3);
+ } else {
+ /* CPL is set the RPL of CS */
+ cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
+ /* first just selectors as the rest may trigger exceptions */
+ for(i = 0; i < 6; i++)
+ cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
+ }
+
+ env->ldt.selector = new_ldt & ~4;
+ env->ldt.base = 0;
+ env->ldt.limit = 0;
+ env->ldt.flags = 0;
+#ifdef VBOX
+ env->ldt.flags = DESC_INTEL_UNUSABLE;
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+
+ /* load the LDT */
+ if (new_ldt & 4)
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+
+ if ((new_ldt & 0xfffc) != 0) {
+ dt = &env->gdt;
+ index = new_ldt & ~7;
+ if ((index + 7) > dt->limit)
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc);
+ load_seg_cache_raw_dt(&env->ldt, e1, e2);
+ }
+
+ /* load the segments */
+ if (!(new_eflags & VM_MASK)) {
+ tss_load_seg(R_CS, new_segs[R_CS]);
+ tss_load_seg(R_SS, new_segs[R_SS]);
+ tss_load_seg(R_ES, new_segs[R_ES]);
+ tss_load_seg(R_DS, new_segs[R_DS]);
+ tss_load_seg(R_FS, new_segs[R_FS]);
+ tss_load_seg(R_GS, new_segs[R_GS]);
+ }
+
+ /* check that EIP is in the CS segment limits */
+ if (new_eip > env->segs[R_CS].limit) {
+ /* XXX: different exception if CALL ? */
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+
+#ifndef CONFIG_USER_ONLY
+ /* reset local breakpoints */
+ if (env->dr[7] & 0x55) {
+ for (i = 0; i < 4; i++) {
+ if (hw_breakpoint_enabled(env->dr[7], i) == 0x1)
+ hw_breakpoint_remove(env, i);
+ }
+ env->dr[7] &= ~0x55;
+ }
+#endif
+}
+
+/* check if Port I/O is allowed in TSS */
+static inline void check_io(int addr, int size)
+{
+#ifndef VBOX
+ int io_offset, val, mask;
+#else
+ int val, mask;
+ unsigned int io_offset;
+#endif /* VBOX */
+
+ /* TSS must be a valid 32 bit one */
+ if (!(env->tr.flags & DESC_P_MASK) ||
+ ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 ||
+ env->tr.limit < 103)
+ goto fail;
+ io_offset = lduw_kernel(env->tr.base + 0x66);
+ io_offset += (addr >> 3);
+ /* Note: the check needs two bytes */
+ if ((io_offset + 1) > env->tr.limit)
+ goto fail;
+ val = lduw_kernel(env->tr.base + io_offset);
+ val >>= (addr & 7);
+ mask = (1 << size) - 1;
+ /* all bits must be zero to allow the I/O */
+ if ((val & mask) != 0) {
+ fail:
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+}
+
+#ifdef VBOX
+
+/* Keep in sync with gen_check_external_event() */
+void helper_check_external_event()
+{
+ if ( (env->interrupt_request & ( CPU_INTERRUPT_EXTERNAL_FLUSH_TLB
+ | CPU_INTERRUPT_EXTERNAL_EXIT
+ | CPU_INTERRUPT_EXTERNAL_TIMER
+ | CPU_INTERRUPT_EXTERNAL_DMA))
+ || ( (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD)
+ && (env->eflags & IF_MASK)
+ && !(env->hflags & HF_INHIBIT_IRQ_MASK) ) )
+ {
+ helper_external_event();
+ }
+
+}
+
+void helper_sync_seg(uint32_t reg)
+{
+ if (env->segs[reg].newselector)
+ sync_seg(env, reg, env->segs[reg].newselector);
+}
+
+#endif /* VBOX */
+
+void helper_check_iob(uint32_t t0)
+{
+ check_io(t0, 1);
+}
+
+void helper_check_iow(uint32_t t0)
+{
+ check_io(t0, 2);
+}
+
+void helper_check_iol(uint32_t t0)
+{
+ check_io(t0, 4);
+}
+
+void helper_outb(uint32_t port, uint32_t data)
+{
+#ifndef VBOX
+ cpu_outb(port, data & 0xff);
+#else
+ cpu_outb(env, port, data & 0xff);
+#endif
+}
+
+target_ulong helper_inb(uint32_t port)
+{
+#ifndef VBOX
+ return cpu_inb(port);
+#else
+ return cpu_inb(env, port);
+#endif
+}
+
+void helper_outw(uint32_t port, uint32_t data)
+{
+#ifndef VBOX
+ cpu_outw(port, data & 0xffff);
+#else
+ cpu_outw(env, port, data & 0xffff);
+#endif
+}
+
+target_ulong helper_inw(uint32_t port)
+{
+#ifndef VBOX
+ return cpu_inw(port);
+#else
+ return cpu_inw(env, port);
+#endif
+}
+
+void helper_outl(uint32_t port, uint32_t data)
+{
+#ifndef VBOX
+ cpu_outl(port, data);
+#else
+ cpu_outl(env, port, data);
+#endif
+}
+
+target_ulong helper_inl(uint32_t port)
+{
+#ifndef VBOX
+ return cpu_inl(port);
+#else
+ return cpu_inl(env, port);
+#endif
+}
+
+static inline unsigned int get_sp_mask(unsigned int e2)
+{
+ if (e2 & DESC_B_MASK)
+ return 0xffffffff;
+ else
+ return 0xffff;
+}
+
+static int exeption_has_error_code(int intno)
+{
+ switch(intno) {
+ case 8:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 17:
+ return 1;
+ }
+ return 0;
+}
+
+#ifdef TARGET_X86_64
+#define SET_ESP(val, sp_mask)\
+do {\
+ if ((sp_mask) == 0xffff)\
+ ESP = (ESP & ~0xffff) | ((val) & 0xffff);\
+ else if ((sp_mask) == 0xffffffffLL)\
+ ESP = (uint32_t)(val);\
+ else\
+ ESP = (val);\
+} while (0)
+#else
+#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask))
+#endif
+
+/* in 64-bit machines, this can overflow. So this segment addition macro
+ * can be used to trim the value to 32-bit whenever needed */
+#define SEG_ADDL(ssp, sp, sp_mask) ((uint32_t)((ssp) + (sp & (sp_mask))))
+
+/* XXX: add a is_user flag to have proper security support */
+#define PUSHW(ssp, sp, sp_mask, val)\
+{\
+ sp -= 2;\
+ stw_kernel((ssp) + (sp & (sp_mask)), (val));\
+}
+
+#define PUSHL(ssp, sp, sp_mask, val)\
+{\
+ sp -= 4;\
+ stl_kernel(SEG_ADDL(ssp, sp, sp_mask), (uint32_t)(val));\
+}
+
+#define POPW(ssp, sp, sp_mask, val)\
+{\
+ val = lduw_kernel((ssp) + (sp & (sp_mask)));\
+ sp += 2;\
+}
+
+#define POPL(ssp, sp, sp_mask, val)\
+{\
+ val = (uint32_t)ldl_kernel(SEG_ADDL(ssp, sp, sp_mask));\
+ sp += 4;\
+}
+
+/* protected mode interrupt */
+static void do_interrupt_protected(int intno, int is_int, int error_code,
+ unsigned int next_eip, int is_hw)
+{
+ SegmentCache *dt;
+ target_ulong ptr, ssp;
+ int type, dpl, selector, ss_dpl, cpl;
+ int has_error_code, new_stack, shift;
+ uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
+ uint32_t old_eip, sp_mask;
+
+#ifdef VBOX
+ if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
+ cpu_loop_exit();
+#endif
+
+ has_error_code = 0;
+ if (!is_int && !is_hw)
+ has_error_code = exeption_has_error_code(intno);
+ if (is_int)
+ old_eip = next_eip;
+ else
+ old_eip = env->eip;
+
+ dt = &env->idt;
+#ifndef VBOX
+ if (intno * 8 + 7 > dt->limit)
+#else
+ if ((unsigned)intno * 8 + 7 > dt->limit)
+#endif
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ ptr = dt->base + intno * 8;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ /* check gate type */
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+ switch(type) {
+ case 5: /* task gate */
+#ifdef VBOX
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+#endif
+ /* must do that check here to return the correct error code */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+ switch_tss(intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
+ if (has_error_code) {
+ int type;
+ uint32_t mask;
+ /* push the error code */
+ type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ shift = type >> 3;
+ if (env->segs[R_SS].flags & DESC_B_MASK)
+ mask = 0xffffffff;
+ else
+ mask = 0xffff;
+ esp = (ESP - (2 << shift)) & mask;
+ ssp = env->segs[R_SS].base + esp;
+ if (shift)
+ stl_kernel(ssp, error_code);
+ else
+ stw_kernel(ssp, error_code);
+ SET_ESP(esp, mask);
+ }
+ return;
+ case 6: /* 286 interrupt gate */
+ case 7: /* 286 trap gate */
+ case 14: /* 386 interrupt gate */
+ case 15: /* 386 trap gate */
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ break;
+ }
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ /* check valid bit */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2);
+ selector = e1 >> 16;
+ offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+ if ((selector & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+#ifdef VBOX /** @todo figure out when this is done one day... */
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(selector, e2);
+#endif
+ if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+ /* to inner privilege */
+ get_ss_esp_from_tss(&ss, &esp, dpl);
+ if ((ss & 0xfffc) == 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if ((ss & 3) != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#ifdef VBOX /** @todo figure out when this is done one day... */
+ if (!(ss_e2 & DESC_A_MASK))
+ ss_e2 = set_segment_accessed(ss, ss_e2);
+#endif
+ ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+ if (ss_dpl != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_S_MASK) ||
+ (ss_e2 & DESC_CS_MASK) ||
+ !(ss_e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-477 of 253666.pdf */
+ raise_exception_err(EXCP0C_STACK, ss & 0xfffc);
+#else
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#endif
+ new_stack = 1;
+ sp_mask = get_sp_mask(ss_e2);
+ ssp = get_seg_base(ss_e1, ss_e2);
+#if defined(VBOX) && defined(DEBUG)
+ printf("new stack %04X:%08X gate dpl=%d\n", ss, esp, dpl);
+#endif
+ } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+ /* to same privilege */
+ if (env->eflags & VM_MASK)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0;
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ esp = ESP;
+ dpl = cpl;
+ } else {
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0; /* avoid warning */
+ sp_mask = 0; /* avoid warning */
+ ssp = 0; /* avoid warning */
+ esp = 0; /* avoid warning */
+ }
+
+ shift = type >> 3;
+
+#if 0
+ /* XXX: check that enough room is available */
+ push_size = 6 + (new_stack << 2) + (has_error_code << 1);
+ if (env->eflags & VM_MASK)
+ push_size += 8;
+ push_size <<= shift;
+#endif
+ if (shift == 1) {
+ if (new_stack) {
+ if (env->eflags & VM_MASK) {
+ PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector);
+ PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector);
+ PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector);
+ PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector);
+ }
+ PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
+ PUSHL(ssp, esp, sp_mask, ESP);
+ }
+ PUSHL(ssp, esp, sp_mask, compute_eflags());
+ PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, esp, sp_mask, old_eip);
+ if (has_error_code) {
+ PUSHL(ssp, esp, sp_mask, error_code);
+ }
+ } else {
+ if (new_stack) {
+ if (env->eflags & VM_MASK) {
+ PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector);
+ PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector);
+ PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector);
+ PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector);
+ }
+ PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
+ PUSHW(ssp, esp, sp_mask, ESP);
+ }
+ PUSHW(ssp, esp, sp_mask, compute_eflags());
+ PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, esp, sp_mask, old_eip);
+ if (has_error_code) {
+ PUSHW(ssp, esp, sp_mask, error_code);
+ }
+ }
+
+ if (new_stack) {
+ if (env->eflags & VM_MASK) {
+ cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
+ cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0);
+ cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0);
+ cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0);
+ }
+ ss = (ss & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_SS, ss,
+ ssp, get_seg_limit(ss_e1, ss_e2), ss_e2);
+ }
+ SET_ESP(esp, sp_mask);
+
+ selector = (selector & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, dpl);
+ env->eip = offset;
+
+ /* interrupt gate clear IF mask */
+ if ((type & 1) == 0) {
+ env->eflags &= ~IF_MASK;
+ }
+#ifndef VBOX
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+#else
+ /*
+ * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD
+ * gets confused by seemingly changed EFLAGS. See #3491 and
+ * public bug #2341.
+ */
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+#endif
+}
+
+#ifdef VBOX
+
+/* check if VME interrupt redirection is enabled in TSS */
+DECLINLINE(bool) is_vme_irq_redirected(int intno)
+{
+ unsigned int io_offset, intredir_offset;
+ unsigned char val, mask;
+
+ /* TSS must be a valid 32 bit one */
+ if (!(env->tr.flags & DESC_P_MASK) ||
+ ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 ||
+ env->tr.limit < 103)
+ goto fail;
+ io_offset = lduw_kernel(env->tr.base + 0x66);
+ /* Make sure the io bitmap offset is valid; anything less than sizeof(VBOXTSS) means there's none. */
+ if (io_offset < 0x68 + 0x20)
+ io_offset = 0x68 + 0x20;
+ /* the virtual interrupt redirection bitmap is located below the io bitmap */
+ intredir_offset = io_offset - 0x20;
+
+ intredir_offset += (intno >> 3);
+ if ((intredir_offset) > env->tr.limit)
+ goto fail;
+
+ val = ldub_kernel(env->tr.base + intredir_offset);
+ mask = 1 << (unsigned char)(intno & 7);
+
+ /* bit set means no redirection. */
+ if ((val & mask) != 0) {
+ return false;
+ }
+ return true;
+
+fail:
+ raise_exception_err(EXCP0D_GPF, 0);
+ return true;
+}
+
+/* V86 mode software interrupt with CR4.VME=1 */
+static void do_soft_interrupt_vme(int intno, int error_code, unsigned int next_eip)
+{
+ target_ulong ptr, ssp;
+ int selector;
+ uint32_t offset, esp;
+ uint32_t old_cs, old_eflags;
+ uint32_t iopl;
+
+ iopl = ((env->eflags >> IOPL_SHIFT) & 3);
+
+ if (!is_vme_irq_redirected(intno))
+ {
+ if (iopl == 3)
+ {
+ do_interrupt_protected(intno, 1, error_code, next_eip, 0);
+ return;
+ }
+ else
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+
+ /* virtual mode idt is at linear address 0 */
+ ptr = 0 + intno * 4;
+ offset = lduw_kernel(ptr);
+ selector = lduw_kernel(ptr + 2);
+ esp = ESP;
+ ssp = env->segs[R_SS].base;
+ old_cs = env->segs[R_CS].selector;
+
+ old_eflags = compute_eflags();
+ if (iopl < 3)
+ {
+ /* copy VIF into IF and set IOPL to 3 */
+ if (env->eflags & VIF_MASK)
+ old_eflags |= IF_MASK;
+ else
+ old_eflags &= ~IF_MASK;
+
+ old_eflags |= (3 << IOPL_SHIFT);
+ }
+
+ /* XXX: use SS segment size ? */
+ PUSHW(ssp, esp, 0xffff, old_eflags);
+ PUSHW(ssp, esp, 0xffff, old_cs);
+ PUSHW(ssp, esp, 0xffff, next_eip);
+
+ /* update processor state */
+ ESP = (ESP & ~0xffff) | (esp & 0xffff);
+ env->eip = offset;
+ env->segs[R_CS].selector = selector;
+ env->segs[R_CS].base = (selector << 4);
+ env->eflags &= ~(TF_MASK | RF_MASK);
+
+ if (iopl < 3)
+ env->eflags &= ~VIF_MASK;
+ else
+ env->eflags &= ~IF_MASK;
+}
+
+#endif /* VBOX */
+
+#ifdef TARGET_X86_64
+
+#define PUSHQ(sp, val)\
+{\
+ sp -= 8;\
+ stq_kernel(sp, (val));\
+}
+
+#define POPQ(sp, val)\
+{\
+ val = ldq_kernel(sp);\
+ sp += 8;\
+}
+
+static inline target_ulong get_rsp_from_tss(int level)
+{
+ int index;
+
+#if 0
+ printf("TR: base=" TARGET_FMT_lx " limit=%x\n",
+ env->tr.base, env->tr.limit);
+#endif
+
+ if (!(env->tr.flags & DESC_P_MASK))
+ cpu_abort(env, "invalid tss");
+ index = 8 * level + 4;
+ if ((index + 7) > env->tr.limit)
+ raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+ return ldq_kernel(env->tr.base + index);
+}
+
+/* 64 bit interrupt */
+static void do_interrupt64(int intno, int is_int, int error_code,
+ target_ulong next_eip, int is_hw)
+{
+ SegmentCache *dt;
+ target_ulong ptr;
+ int type, dpl, selector, cpl, ist;
+ int has_error_code, new_stack;
+ uint32_t e1, e2, e3, ss;
+ target_ulong old_eip, esp, offset;
+
+#ifdef VBOX
+ if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
+ cpu_loop_exit();
+#endif
+
+ has_error_code = 0;
+ if (!is_int && !is_hw)
+ has_error_code = exeption_has_error_code(intno);
+ if (is_int)
+ old_eip = next_eip;
+ else
+ old_eip = env->eip;
+
+ dt = &env->idt;
+ if (intno * 16 + 15 > dt->limit)
+ raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+ ptr = dt->base + intno * 16;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ e3 = ldl_kernel(ptr + 8);
+ /* check gate type */
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+ switch(type) {
+ case 14: /* 386 interrupt gate */
+ case 15: /* 386 trap gate */
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+ break;
+ }
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, intno * 16 + 2);
+ /* check valid bit */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, intno * 16 + 2);
+ selector = e1 >> 16;
+ offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+ ist = e2 & 7;
+ if ((selector & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) {
+ /* to inner privilege */
+ if (ist != 0)
+ esp = get_rsp_from_tss(ist + 3);
+ else
+ esp = get_rsp_from_tss(dpl);
+ esp &= ~0xfLL; /* align stack */
+ ss = 0;
+ new_stack = 1;
+ } else if ((e2 & DESC_C_MASK) || dpl == cpl) {
+ /* to same privilege */
+ if (env->eflags & VM_MASK)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0;
+ if (ist != 0)
+ esp = get_rsp_from_tss(ist + 3);
+ else
+ esp = ESP;
+ esp &= ~0xfLL; /* align stack */
+ dpl = cpl;
+ } else {
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ new_stack = 0; /* avoid warning */
+ esp = 0; /* avoid warning */
+ }
+
+ PUSHQ(esp, env->segs[R_SS].selector);
+ PUSHQ(esp, ESP);
+ PUSHQ(esp, compute_eflags());
+ PUSHQ(esp, env->segs[R_CS].selector);
+ PUSHQ(esp, old_eip);
+ if (has_error_code) {
+ PUSHQ(esp, error_code);
+ }
+
+ if (new_stack) {
+ ss = 0 | dpl;
+#ifndef VBOX
+ cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
+#else
+ cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, dpl << DESC_DPL_SHIFT);
+#endif
+ }
+ ESP = esp;
+
+ selector = (selector & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, dpl);
+ env->eip = offset;
+
+ /* interrupt gate clear IF mask */
+ if ((type & 1) == 0) {
+ env->eflags &= ~IF_MASK;
+ }
+#ifndef VBOX
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+#else /* VBOX */
+ /*
+ * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD
+ * gets confused by seemingly changed EFLAGS. See #3491 and
+ * public bug #2341.
+ */
+ env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+#endif /* VBOX */
+}
+#endif
+
+#ifdef TARGET_X86_64
+#if defined(CONFIG_USER_ONLY)
+void helper_syscall(int next_eip_addend)
+{
+ env->exception_index = EXCP_SYSCALL;
+ env->exception_next_eip = env->eip + next_eip_addend;
+ cpu_loop_exit();
+}
+#else
+void helper_syscall(int next_eip_addend)
+{
+ int selector;
+
+ if (!(env->efer & MSR_EFER_SCE)) {
+ raise_exception_err(EXCP06_ILLOP, 0);
+ }
+ selector = (env->star >> 32) & 0xffff;
+ if (env->hflags & HF_LMA_MASK) {
+ int code64;
+
+ ECX = env->eip + next_eip_addend;
+ env->regs[11] = compute_eflags();
+
+ code64 = env->hflags & HF_CS64_MASK;
+
+ cpu_x86_set_cpl(env, 0);
+ cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_W_MASK | DESC_A_MASK);
+ env->eflags &= ~env->fmask;
+ load_eflags(env->eflags, 0);
+ if (code64)
+ env->eip = env->lstar;
+ else
+ env->eip = env->cstar;
+ } else {
+ ECX = (uint32_t)(env->eip + next_eip_addend);
+
+ cpu_x86_set_cpl(env, 0);
+ cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_W_MASK | DESC_A_MASK);
+ env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
+ env->eip = (uint32_t)env->star;
+ }
+}
+#endif
+#endif
+
+#ifdef TARGET_X86_64
+void helper_sysret(int dflag)
+{
+ int cpl, selector;
+
+ if (!(env->efer & MSR_EFER_SCE)) {
+ raise_exception_err(EXCP06_ILLOP, 0);
+ }
+ cpl = env->hflags & HF_CPL_MASK;
+ if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) {
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ selector = (env->star >> 48) & 0xffff;
+ if (env->hflags & HF_LMA_MASK) {
+ if (dflag == 2) {
+ cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+ DESC_L_MASK);
+ env->eip = ECX;
+ } else {
+ cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ env->eip = (uint32_t)ECX;
+ }
+ cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ load_eflags((uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK |
+ IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK);
+ cpu_x86_set_cpl(env, 3);
+ } else {
+ cpu_x86_load_seg_cache(env, R_CS, selector | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ env->eip = (uint32_t)ECX;
+ cpu_x86_load_seg_cache(env, R_SS, selector + 8,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ env->eflags |= IF_MASK;
+ cpu_x86_set_cpl(env, 3);
+ }
+}
+#endif
+
+#ifdef VBOX
+
+/**
+ * Checks and processes external VMM events.
+ * Called by op_check_external_event() when any of the flags is set and can be serviced.
+ */
+void helper_external_event(void)
+{
+# if defined(RT_OS_DARWIN) && defined(VBOX_STRICT)
+ uintptr_t uSP;
+# ifdef RT_ARCH_AMD64
+ __asm__ __volatile__("movq %%rsp, %0" : "=r" (uSP));
+# else
+ __asm__ __volatile__("movl %%esp, %0" : "=r" (uSP));
+# endif
+ AssertMsg(!(uSP & 15), ("xSP=%#p\n", uSP));
+# endif
+ /* Keep in sync with flags checked by gen_check_external_event() */
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_HARD);
+ cpu_interrupt(env, CPU_INTERRUPT_HARD);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_EXIT)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_EXIT);
+ cpu_exit(env);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_DMA)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_DMA);
+ remR3DmaRun(env);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_TIMER)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_TIMER);
+ remR3TimersRun(env);
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB)
+ {
+ ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request,
+ ~CPU_INTERRUPT_EXTERNAL_HARD);
+ cpu_interrupt(env, CPU_INTERRUPT_HARD);
+ }
+}
+
+/* helper for recording call instruction addresses for later scanning */
+void helper_record_call()
+{
+ if ( !(env->state & CPU_RAW_RING0)
+ && (env->cr[0] & CR0_PG_MASK)
+ && !(env->eflags & X86_EFL_IF))
+ remR3RecordCall(env);
+}
+
+#endif /* VBOX */
+
+/* real mode interrupt */
+static void do_interrupt_real(int intno, int is_int, int error_code,
+ unsigned int next_eip)
+{
+ SegmentCache *dt;
+ target_ulong ptr, ssp;
+ int selector;
+ uint32_t offset, esp;
+ uint32_t old_cs, old_eip;
+
+ /* real mode (simpler !) */
+ dt = &env->idt;
+#ifndef VBOX
+ if (intno * 4 + 3 > dt->limit)
+#else
+ if ((unsigned)intno * 4 + 3 > dt->limit)
+#endif
+ raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
+ ptr = dt->base + intno * 4;
+ offset = lduw_kernel(ptr);
+ selector = lduw_kernel(ptr + 2);
+ esp = ESP;
+ ssp = env->segs[R_SS].base;
+ if (is_int)
+ old_eip = next_eip;
+ else
+ old_eip = env->eip;
+ old_cs = env->segs[R_CS].selector;
+ /* XXX: use SS segment size ? */
+ PUSHW(ssp, esp, 0xffff, compute_eflags());
+ PUSHW(ssp, esp, 0xffff, old_cs);
+ PUSHW(ssp, esp, 0xffff, old_eip);
+
+ /* update processor state */
+ ESP = (ESP & ~0xffff) | (esp & 0xffff);
+ env->eip = offset;
+ env->segs[R_CS].selector = selector;
+ env->segs[R_CS].base = (selector << 4);
+ env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
+}
+
+/* fake user mode interrupt */
+void do_interrupt_user(int intno, int is_int, int error_code,
+ target_ulong next_eip)
+{
+ SegmentCache *dt;
+ target_ulong ptr;
+ int dpl, cpl, shift;
+ uint32_t e2;
+
+ dt = &env->idt;
+ if (env->hflags & HF_LMA_MASK) {
+ shift = 4;
+ } else {
+ shift = 3;
+ }
+ ptr = dt->base + (intno << shift);
+ e2 = ldl_kernel(ptr + 4);
+
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ /* check privilege if software int */
+ if (is_int && dpl < cpl)
+ raise_exception_err(EXCP0D_GPF, (intno << shift) + 2);
+
+ /* Since we emulate only user space, we cannot do more than
+ exiting the emulation with the suitable exception and error
+ code */
+ if (is_int)
+ EIP = next_eip;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+static void handle_even_inj(int intno, int is_int, int error_code,
+ int is_hw, int rm)
+{
+ uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+ if (!(event_inj & SVM_EVTINJ_VALID)) {
+ int type;
+ if (is_int)
+ type = SVM_EVTINJ_TYPE_SOFT;
+ else
+ type = SVM_EVTINJ_TYPE_EXEPT;
+ event_inj = intno | type | SVM_EVTINJ_VALID;
+ if (!rm && exeption_has_error_code(intno)) {
+ event_inj |= SVM_EVTINJ_VALID_ERR;
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code);
+ }
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj);
+ }
+}
+#endif
+
+/*
+ * Begin execution of an interruption. is_int is TRUE if coming from
+ * the int instruction. next_eip is the EIP value AFTER the interrupt
+ * instruction. It is only relevant if is_int is TRUE.
+ */
+void do_interrupt(int intno, int is_int, int error_code,
+ target_ulong next_eip, int is_hw)
+{
+ if (qemu_loglevel_mask(CPU_LOG_INT)) {
+ if ((env->cr[0] & CR0_PE_MASK)) {
+ static int count;
+ qemu_log("%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx,
+ count, intno, error_code, is_int,
+ env->hflags & HF_CPL_MASK,
+ env->segs[R_CS].selector, EIP,
+ (int)env->segs[R_CS].base + EIP,
+ env->segs[R_SS].selector, ESP);
+ if (intno == 0x0e) {
+ qemu_log(" CR2=" TARGET_FMT_lx, env->cr[2]);
+ } else {
+ qemu_log(" EAX=" TARGET_FMT_lx, EAX);
+ }
+ qemu_log("\n");
+ log_cpu_state(env, X86_DUMP_CCOP);
+#if 0
+ {
+ int i;
+ uint8_t *ptr;
+ qemu_log(" code=");
+ ptr = env->segs[R_CS].base + env->eip;
+ for(i = 0; i < 16; i++) {
+ qemu_log(" %02x", ldub(ptr + i));
+ }
+ qemu_log("\n");
+ }
+#endif
+ count++;
+ }
+ }
+#ifdef VBOX
+ if (RT_UNLIKELY(env->state & CPU_EMULATE_SINGLE_STEP)) {
+ if (is_int) {
+ RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv%s\n",
+ intno, error_code, (RTGCPTR)env->eip, is_hw ? " hw" : "");
+ } else {
+ RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv next=%#RGv%s\n",
+ intno, error_code, (RTGCPTR)env->eip, (RTGCPTR)next_eip, is_hw ? " hw" : "");
+ }
+ }
+#endif
+ if (env->cr[0] & CR0_PE_MASK) {
+#if !defined(CONFIG_USER_ONLY)
+ if (env->hflags & HF_SVMI_MASK)
+ handle_even_inj(intno, is_int, error_code, is_hw, 0);
+#endif
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ do_interrupt64(intno, is_int, error_code, next_eip, is_hw);
+ } else
+#endif
+ {
+#ifdef VBOX
+ /* int xx *, v86 code and VME enabled? */
+ if ( (env->eflags & VM_MASK)
+ && (env->cr[4] & CR4_VME_MASK)
+ && is_int
+ && !is_hw
+ && env->eip + 1 != next_eip /* single byte int 3 goes straight to the protected mode handler */
+ )
+ do_soft_interrupt_vme(intno, error_code, next_eip);
+ else
+#endif /* VBOX */
+ do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw);
+ }
+ } else {
+#if !defined(CONFIG_USER_ONLY)
+ if (env->hflags & HF_SVMI_MASK)
+ handle_even_inj(intno, is_int, error_code, is_hw, 1);
+#endif
+ do_interrupt_real(intno, is_int, error_code, next_eip);
+ }
+
+#if !defined(CONFIG_USER_ONLY)
+ if (env->hflags & HF_SVMI_MASK) {
+ uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
+ }
+#endif
+}
+
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+/*
+ * Check nested exceptions and change to double or triple fault if
+ * needed. It should only be called, if this is not an interrupt.
+ * Returns the new exception number.
+ */
+static int check_exception(int intno, int *error_code)
+{
+ int first_contributory = env->old_exception == 0 ||
+ (env->old_exception >= 10 &&
+ env->old_exception <= 13);
+ int second_contributory = intno == 0 ||
+ (intno >= 10 && intno <= 13);
+
+ qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n",
+ env->old_exception, intno);
+
+#if !defined(CONFIG_USER_ONLY)
+ if (env->old_exception == EXCP08_DBLE) {
+ if (env->hflags & HF_SVMI_MASK)
+ helper_vmexit(SVM_EXIT_SHUTDOWN, 0); /* does not return */
+
+ qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+
+# ifndef VBOX
+ qemu_system_reset_request();
+ return EXCP_HLT;
+# else
+ remR3RaiseRC(env->pVM, VINF_EM_TRIPLE_FAULT);
+ return EXCP_RC;
+# endif
+ }
+#endif
+
+ if ((first_contributory && second_contributory)
+ || (env->old_exception == EXCP0E_PAGE &&
+ (second_contributory || (intno == EXCP0E_PAGE)))) {
+ intno = EXCP08_DBLE;
+ *error_code = 0;
+ }
+
+ if (second_contributory || (intno == EXCP0E_PAGE) ||
+ (intno == EXCP08_DBLE))
+ env->old_exception = intno;
+
+ return intno;
+}
+
+/*
+ * Signal an interruption. It is executed in the main CPU loop.
+ * is_int is TRUE if coming from the int instruction. next_eip is the
+ * EIP value AFTER the interrupt instruction. It is only relevant if
+ * is_int is TRUE.
+ */
+static void QEMU_NORETURN raise_interrupt(int intno, int is_int, int error_code,
+ int next_eip_addend)
+{
+#if defined(VBOX) && defined(DEBUG)
+ Log2(("raise_interrupt: %x %x %x %RGv\n", intno, is_int, error_code, (RTGCPTR)env->eip + next_eip_addend));
+#endif
+ if (!is_int) {
+ helper_svm_check_intercept_param(SVM_EXIT_EXCP_BASE + intno, error_code);
+ intno = check_exception(intno, &error_code);
+ } else {
+ helper_svm_check_intercept_param(SVM_EXIT_SWINT, 0);
+ }
+
+ env->exception_index = intno;
+ env->error_code = error_code;
+ env->exception_is_int = is_int;
+ env->exception_next_eip = env->eip + next_eip_addend;
+ cpu_loop_exit();
+}
+
+/* shortcuts to generate exceptions */
+
+void raise_exception_err(int exception_index, int error_code)
+{
+ raise_interrupt(exception_index, 0, error_code, 0);
+}
+
+void raise_exception(int exception_index)
+{
+ raise_interrupt(exception_index, 0, 0, 0);
+}
+
+void raise_exception_env(int exception_index, CPUState *nenv)
+{
+ env = nenv;
+ raise_exception(exception_index);
+}
+/* SMM support */
+
+#if defined(CONFIG_USER_ONLY)
+
+void do_smm_enter(void)
+{
+}
+
+void helper_rsm(void)
+{
+}
+
+#else
+
+#ifdef TARGET_X86_64
+#define SMM_REVISION_ID 0x00020064
+#else
+#define SMM_REVISION_ID 0x00020000
+#endif
+
+void do_smm_enter(void)
+{
+ target_ulong sm_state;
+ SegmentCache *dt;
+ int i, offset;
+
+ qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
+ log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+
+ env->hflags |= HF_SMM_MASK;
+ cpu_smm_update(env);
+
+ sm_state = env->smbase + 0x8000;
+
+#ifdef TARGET_X86_64
+ for(i = 0; i < 6; i++) {
+ dt = &env->segs[i];
+ offset = 0x7e00 + i * 16;
+ stw_phys(sm_state + offset, dt->selector);
+ stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff);
+ stl_phys(sm_state + offset + 4, dt->limit);
+ stq_phys(sm_state + offset + 8, dt->base);
+ }
+
+ stq_phys(sm_state + 0x7e68, env->gdt.base);
+ stl_phys(sm_state + 0x7e64, env->gdt.limit);
+
+ stw_phys(sm_state + 0x7e70, env->ldt.selector);
+ stq_phys(sm_state + 0x7e78, env->ldt.base);
+ stl_phys(sm_state + 0x7e74, env->ldt.limit);
+ stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff);
+
+ stq_phys(sm_state + 0x7e88, env->idt.base);
+ stl_phys(sm_state + 0x7e84, env->idt.limit);
+
+ stw_phys(sm_state + 0x7e90, env->tr.selector);
+ stq_phys(sm_state + 0x7e98, env->tr.base);
+ stl_phys(sm_state + 0x7e94, env->tr.limit);
+ stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff);
+
+ stq_phys(sm_state + 0x7ed0, env->efer);
+
+ stq_phys(sm_state + 0x7ff8, EAX);
+ stq_phys(sm_state + 0x7ff0, ECX);
+ stq_phys(sm_state + 0x7fe8, EDX);
+ stq_phys(sm_state + 0x7fe0, EBX);
+ stq_phys(sm_state + 0x7fd8, ESP);
+ stq_phys(sm_state + 0x7fd0, EBP);
+ stq_phys(sm_state + 0x7fc8, ESI);
+ stq_phys(sm_state + 0x7fc0, EDI);
+ for(i = 8; i < 16; i++)
+ stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]);
+ stq_phys(sm_state + 0x7f78, env->eip);
+ stl_phys(sm_state + 0x7f70, compute_eflags());
+ stl_phys(sm_state + 0x7f68, env->dr[6]);
+ stl_phys(sm_state + 0x7f60, env->dr[7]);
+
+ stl_phys(sm_state + 0x7f48, env->cr[4]);
+ stl_phys(sm_state + 0x7f50, env->cr[3]);
+ stl_phys(sm_state + 0x7f58, env->cr[0]);
+
+ stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+ stl_phys(sm_state + 0x7f00, env->smbase);
+#else
+ stl_phys(sm_state + 0x7ffc, env->cr[0]);
+ stl_phys(sm_state + 0x7ff8, env->cr[3]);
+ stl_phys(sm_state + 0x7ff4, compute_eflags());
+ stl_phys(sm_state + 0x7ff0, env->eip);
+ stl_phys(sm_state + 0x7fec, EDI);
+ stl_phys(sm_state + 0x7fe8, ESI);
+ stl_phys(sm_state + 0x7fe4, EBP);
+ stl_phys(sm_state + 0x7fe0, ESP);
+ stl_phys(sm_state + 0x7fdc, EBX);
+ stl_phys(sm_state + 0x7fd8, EDX);
+ stl_phys(sm_state + 0x7fd4, ECX);
+ stl_phys(sm_state + 0x7fd0, EAX);
+ stl_phys(sm_state + 0x7fcc, env->dr[6]);
+ stl_phys(sm_state + 0x7fc8, env->dr[7]);
+
+ stl_phys(sm_state + 0x7fc4, env->tr.selector);
+ stl_phys(sm_state + 0x7f64, env->tr.base);
+ stl_phys(sm_state + 0x7f60, env->tr.limit);
+ stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff);
+
+ stl_phys(sm_state + 0x7fc0, env->ldt.selector);
+ stl_phys(sm_state + 0x7f80, env->ldt.base);
+ stl_phys(sm_state + 0x7f7c, env->ldt.limit);
+ stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff);
+
+ stl_phys(sm_state + 0x7f74, env->gdt.base);
+ stl_phys(sm_state + 0x7f70, env->gdt.limit);
+
+ stl_phys(sm_state + 0x7f58, env->idt.base);
+ stl_phys(sm_state + 0x7f54, env->idt.limit);
+
+ for(i = 0; i < 6; i++) {
+ dt = &env->segs[i];
+ if (i < 3)
+ offset = 0x7f84 + i * 12;
+ else
+ offset = 0x7f2c + (i - 3) * 12;
+ stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector);
+ stl_phys(sm_state + offset + 8, dt->base);
+ stl_phys(sm_state + offset + 4, dt->limit);
+ stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff);
+ }
+ stl_phys(sm_state + 0x7f14, env->cr[4]);
+
+ stl_phys(sm_state + 0x7efc, SMM_REVISION_ID);
+ stl_phys(sm_state + 0x7ef8, env->smbase);
+#endif
+ /* init SMM cpu state */
+
+#ifdef TARGET_X86_64
+ cpu_load_efer(env, 0);
+#endif
+ load_eflags(0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ env->eip = 0x00008000;
+ cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
+ 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
+ cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
+
+ cpu_x86_update_cr0(env,
+ env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK));
+ cpu_x86_update_cr4(env, 0);
+ env->dr[7] = 0x00000400;
+ CC_OP = CC_OP_EFLAGS;
+}
+
+void helper_rsm(void)
+{
+#ifdef VBOX
+ cpu_abort(env, "helper_rsm");
+#else /* !VBOX */
+ target_ulong sm_state;
+ int i, offset;
+ uint32_t val;
+
+ sm_state = env->smbase + 0x8000;
+#ifdef TARGET_X86_64
+ cpu_load_efer(env, ldq_phys(sm_state + 0x7ed0));
+
+ for(i = 0; i < 6; i++) {
+ offset = 0x7e00 + i * 16;
+ cpu_x86_load_seg_cache(env, i,
+ lduw_phys(sm_state + offset),
+ ldq_phys(sm_state + offset + 8),
+ ldl_phys(sm_state + offset + 4),
+ (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8);
+ }
+
+ env->gdt.base = ldq_phys(sm_state + 0x7e68);
+ env->gdt.limit = ldl_phys(sm_state + 0x7e64);
+
+ env->ldt.selector = lduw_phys(sm_state + 0x7e70);
+ env->ldt.base = ldq_phys(sm_state + 0x7e78);
+ env->ldt.limit = ldl_phys(sm_state + 0x7e74);
+ env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+
+ env->idt.base = ldq_phys(sm_state + 0x7e88);
+ env->idt.limit = ldl_phys(sm_state + 0x7e84);
+
+ env->tr.selector = lduw_phys(sm_state + 0x7e90);
+ env->tr.base = ldq_phys(sm_state + 0x7e98);
+ env->tr.limit = ldl_phys(sm_state + 0x7e94);
+ env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->tr.newselector = 0;
+#endif
+
+ EAX = ldq_phys(sm_state + 0x7ff8);
+ ECX = ldq_phys(sm_state + 0x7ff0);
+ EDX = ldq_phys(sm_state + 0x7fe8);
+ EBX = ldq_phys(sm_state + 0x7fe0);
+ ESP = ldq_phys(sm_state + 0x7fd8);
+ EBP = ldq_phys(sm_state + 0x7fd0);
+ ESI = ldq_phys(sm_state + 0x7fc8);
+ EDI = ldq_phys(sm_state + 0x7fc0);
+ for(i = 8; i < 16; i++)
+ env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8);
+ env->eip = ldq_phys(sm_state + 0x7f78);
+ load_eflags(ldl_phys(sm_state + 0x7f70),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ env->dr[6] = ldl_phys(sm_state + 0x7f68);
+ env->dr[7] = ldl_phys(sm_state + 0x7f60);
+
+ cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48));
+ cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50));
+ cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58));
+
+ val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+ if (val & 0x20000) {
+ env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff;
+ }
+#else
+ cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc));
+ cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8));
+ load_eflags(ldl_phys(sm_state + 0x7ff4),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ env->eip = ldl_phys(sm_state + 0x7ff0);
+ EDI = ldl_phys(sm_state + 0x7fec);
+ ESI = ldl_phys(sm_state + 0x7fe8);
+ EBP = ldl_phys(sm_state + 0x7fe4);
+ ESP = ldl_phys(sm_state + 0x7fe0);
+ EBX = ldl_phys(sm_state + 0x7fdc);
+ EDX = ldl_phys(sm_state + 0x7fd8);
+ ECX = ldl_phys(sm_state + 0x7fd4);
+ EAX = ldl_phys(sm_state + 0x7fd0);
+ env->dr[6] = ldl_phys(sm_state + 0x7fcc);
+ env->dr[7] = ldl_phys(sm_state + 0x7fc8);
+
+ env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff;
+ env->tr.base = ldl_phys(sm_state + 0x7f64);
+ env->tr.limit = ldl_phys(sm_state + 0x7f60);
+ env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->tr.newselector = 0;
+#endif
+
+ env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff;
+ env->ldt.base = ldl_phys(sm_state + 0x7f80);
+ env->ldt.limit = ldl_phys(sm_state + 0x7f7c);
+ env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8;
+#ifdef VBOX
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+
+ env->gdt.base = ldl_phys(sm_state + 0x7f74);
+ env->gdt.limit = ldl_phys(sm_state + 0x7f70);
+
+ env->idt.base = ldl_phys(sm_state + 0x7f58);
+ env->idt.limit = ldl_phys(sm_state + 0x7f54);
+
+ for(i = 0; i < 6; i++) {
+ if (i < 3)
+ offset = 0x7f84 + i * 12;
+ else
+ offset = 0x7f2c + (i - 3) * 12;
+ cpu_x86_load_seg_cache(env, i,
+ ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff,
+ ldl_phys(sm_state + offset + 8),
+ ldl_phys(sm_state + offset + 4),
+ (ldl_phys(sm_state + offset) & 0xf0ff) << 8);
+ }
+ cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14));
+
+ val = ldl_phys(sm_state + 0x7efc); /* revision ID */
+ if (val & 0x20000) {
+ env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff;
+ }
+#endif
+ CC_OP = CC_OP_EFLAGS;
+ env->hflags &= ~HF_SMM_MASK;
+ cpu_smm_update(env);
+
+ qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
+ log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+#endif /* !VBOX */
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+
+/* division, flags are undefined */
+
+void helper_divb_AL(target_ulong t0)
+{
+ unsigned int num, den, q, r;
+
+ num = (EAX & 0xffff);
+ den = (t0 & 0xff);
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q > 0xff)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xff;
+ r = (num % den) & 0xff;
+ EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_idivb_AL(target_ulong t0)
+{
+ int num, den, q, r;
+
+ num = (int16_t)EAX;
+ den = (int8_t)t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q != (int8_t)q)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xff;
+ r = (num % den) & 0xff;
+ EAX = (EAX & ~0xffff) | (r << 8) | q;
+}
+
+void helper_divw_AX(target_ulong t0)
+{
+ unsigned int num, den, q, r;
+
+ num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+ den = (t0 & 0xffff);
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q > 0xffff)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xffff;
+ r = (num % den) & 0xffff;
+ EAX = (EAX & ~0xffff) | q;
+ EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_idivw_AX(target_ulong t0)
+{
+ int num, den, q, r;
+
+ num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
+ den = (int16_t)t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ if (q != (int16_t)q)
+ raise_exception(EXCP00_DIVZ);
+ q &= 0xffff;
+ r = (num % den) & 0xffff;
+ EAX = (EAX & ~0xffff) | q;
+ EDX = (EDX & ~0xffff) | r;
+}
+
+void helper_divl_EAX(target_ulong t0)
+{
+ unsigned int den, r;
+ uint64_t num, q;
+
+ num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+ den = t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ r = (num % den);
+ if (q > 0xffffffff)
+ raise_exception(EXCP00_DIVZ);
+ EAX = (uint32_t)q;
+ EDX = (uint32_t)r;
+}
+
+void helper_idivl_EAX(target_ulong t0)
+{
+ int den, r;
+ int64_t num, q;
+
+ num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+ den = t0;
+ if (den == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ q = (num / den);
+ r = (num % den);
+ if (q != (int32_t)q)
+ raise_exception(EXCP00_DIVZ);
+ EAX = (uint32_t)q;
+ EDX = (uint32_t)r;
+}
+
+/* bcd */
+
+/* XXX: exception */
+void helper_aam(int base)
+{
+ int al, ah;
+ al = EAX & 0xff;
+ ah = al / base;
+ al = al % base;
+ EAX = (EAX & ~0xffff) | al | (ah << 8);
+ CC_DST = al;
+}
+
+void helper_aad(int base)
+{
+ int al, ah;
+ al = EAX & 0xff;
+ ah = (EAX >> 8) & 0xff;
+ al = ((ah * base) + al) & 0xff;
+ EAX = (EAX & ~0xffff) | al;
+ CC_DST = al;
+}
+
+void helper_aaa(void)
+{
+ int icarry;
+ int al, ah, af;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+ ah = (EAX >> 8) & 0xff;
+
+ icarry = (al > 0xf9);
+ if (((al & 0x0f) > 9 ) || af) {
+ al = (al + 6) & 0x0f;
+ ah = (ah + 1 + icarry) & 0xff;
+ eflags |= CC_C | CC_A;
+ } else {
+ eflags &= ~(CC_C | CC_A);
+ al &= 0x0f;
+ }
+ EAX = (EAX & ~0xffff) | al | (ah << 8);
+ CC_SRC = eflags;
+}
+
+void helper_aas(void)
+{
+ int icarry;
+ int al, ah, af;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+ ah = (EAX >> 8) & 0xff;
+
+ icarry = (al < 6);
+ if (((al & 0x0f) > 9 ) || af) {
+ al = (al - 6) & 0x0f;
+ ah = (ah - 1 - icarry) & 0xff;
+ eflags |= CC_C | CC_A;
+ } else {
+ eflags &= ~(CC_C | CC_A);
+ al &= 0x0f;
+ }
+ EAX = (EAX & ~0xffff) | al | (ah << 8);
+ CC_SRC = eflags;
+}
+
+void helper_daa(void)
+{
+ int al, af, cf;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ cf = eflags & CC_C;
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+
+ eflags = 0;
+ if (((al & 0x0f) > 9 ) || af) {
+ al = (al + 6) & 0xff;
+ eflags |= CC_A;
+ }
+ if ((al > 0x9f) || cf) {
+ al = (al + 0x60) & 0xff;
+ eflags |= CC_C;
+ }
+ EAX = (EAX & ~0xff) | al;
+ /* well, speed is not an issue here, so we compute the flags by hand */
+ eflags |= (al == 0) << 6; /* zf */
+ eflags |= parity_table[al]; /* pf */
+ eflags |= (al & 0x80); /* sf */
+ CC_SRC = eflags;
+}
+
+void helper_das(void)
+{
+ int al, al1, af, cf;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ cf = eflags & CC_C;
+ af = eflags & CC_A;
+ al = EAX & 0xff;
+
+ eflags = 0;
+ al1 = al;
+ if (((al & 0x0f) > 9 ) || af) {
+ eflags |= CC_A;
+ if (al < 6 || cf)
+ eflags |= CC_C;
+ al = (al - 6) & 0xff;
+ }
+ if ((al1 > 0x99) || cf) {
+ al = (al - 0x60) & 0xff;
+ eflags |= CC_C;
+ }
+ EAX = (EAX & ~0xff) | al;
+ /* well, speed is not an issue here, so we compute the flags by hand */
+ eflags |= (al == 0) << 6; /* zf */
+ eflags |= parity_table[al]; /* pf */
+ eflags |= (al & 0x80); /* sf */
+ CC_SRC = eflags;
+}
+
+void helper_into(int next_eip_addend)
+{
+ int eflags;
+ eflags = helper_cc_compute_all(CC_OP);
+ if (eflags & CC_O) {
+ raise_interrupt(EXCP04_INTO, 1, 0, next_eip_addend);
+ }
+}
+
+void helper_cmpxchg8b(target_ulong a0)
+{
+ uint64_t d;
+ int eflags;
+
+ eflags = helper_cc_compute_all(CC_OP);
+ d = ldq(a0);
+ if (d == (((uint64_t)EDX << 32) | (uint32_t)EAX)) {
+ stq(a0, ((uint64_t)ECX << 32) | (uint32_t)EBX);
+ eflags |= CC_Z;
+ } else {
+ /* always do the store */
+ stq(a0, d);
+ EDX = (uint32_t)(d >> 32);
+ EAX = (uint32_t)d;
+ eflags &= ~CC_Z;
+ }
+ CC_SRC = eflags;
+}
+
+#ifdef TARGET_X86_64
+void helper_cmpxchg16b(target_ulong a0)
+{
+ uint64_t d0, d1;
+ int eflags;
+
+ if ((a0 & 0xf) != 0)
+ raise_exception(EXCP0D_GPF);
+ eflags = helper_cc_compute_all(CC_OP);
+ d0 = ldq(a0);
+ d1 = ldq(a0 + 8);
+ if (d0 == EAX && d1 == EDX) {
+ stq(a0, EBX);
+ stq(a0 + 8, ECX);
+ eflags |= CC_Z;
+ } else {
+ /* always do the store */
+ stq(a0, d0);
+ stq(a0 + 8, d1);
+ EDX = d1;
+ EAX = d0;
+ eflags &= ~CC_Z;
+ }
+ CC_SRC = eflags;
+}
+#endif
+
+void helper_single_step(void)
+{
+#ifndef CONFIG_USER_ONLY
+ check_hw_breakpoints(env, 1);
+ env->dr[6] |= DR6_BS;
+#endif
+ raise_exception(EXCP01_DB);
+}
+
+void helper_cpuid(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ helper_svm_check_intercept_param(SVM_EXIT_CPUID, 0);
+
+ cpu_x86_cpuid(env, (uint32_t)EAX, (uint32_t)ECX, &eax, &ebx, &ecx, &edx);
+ EAX = eax;
+ EBX = ebx;
+ ECX = ecx;
+ EDX = edx;
+}
+
+void helper_enter_level(int level, int data32, target_ulong t1)
+{
+ target_ulong ssp;
+ uint32_t esp_mask, esp, ebp;
+
+ esp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ ebp = EBP;
+ esp = ESP;
+ if (data32) {
+ /* 32 bit */
+ esp -= 4;
+ while (--level) {
+ esp -= 4;
+ ebp -= 4;
+ stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask)));
+ }
+ esp -= 4;
+ stl(ssp + (esp & esp_mask), t1);
+ } else {
+ /* 16 bit */
+ esp -= 2;
+ while (--level) {
+ esp -= 2;
+ ebp -= 2;
+ stw(ssp + (esp & esp_mask), lduw(ssp + (ebp & esp_mask)));
+ }
+ esp -= 2;
+ stw(ssp + (esp & esp_mask), t1);
+ }
+}
+
+#ifdef TARGET_X86_64
+void helper_enter64_level(int level, int data64, target_ulong t1)
+{
+ target_ulong esp, ebp;
+ ebp = EBP;
+ esp = ESP;
+
+ if (data64) {
+ /* 64 bit */
+ esp -= 8;
+ while (--level) {
+ esp -= 8;
+ ebp -= 8;
+ stq(esp, ldq(ebp));
+ }
+ esp -= 8;
+ stq(esp, t1);
+ } else {
+ /* 16 bit */
+ esp -= 2;
+ while (--level) {
+ esp -= 2;
+ ebp -= 2;
+ stw(esp, lduw(ebp));
+ }
+ esp -= 2;
+ stw(esp, t1);
+ }
+}
+#endif
+
+void helper_lldt(int selector)
+{
+ SegmentCache *dt;
+ uint32_t e1, e2;
+#ifndef VBOX
+ int index, entry_limit;
+#else
+ unsigned int index, entry_limit;
+#endif
+ target_ulong ptr;
+
+#ifdef VBOX
+ Log(("helper_lldt_T0: old ldtr=%RTsel {.base=%RGv, .limit=%RGv} new=%RTsel\n",
+ (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit, (RTSEL)(selector & 0xffff)));
+#endif
+
+ selector &= 0xffff;
+ if ((selector & 0xfffc) == 0) {
+ /* XXX: NULL selector case: invalid LDT */
+ env->ldt.base = 0;
+ env->ldt.limit = 0;
+#ifdef VBOX
+ env->ldt.flags = DESC_INTEL_UNUSABLE;
+ env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID;
+ env->ldt.newselector = 0;
+#endif
+ } else {
+ if (selector & 0x4)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dt = &env->gdt;
+ index = selector & ~7;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK)
+ entry_limit = 15;
+ else
+#endif
+ entry_limit = 7;
+ if ((index + entry_limit) > dt->limit)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint32_t e3;
+ e3 = ldl_kernel(ptr + 8);
+ load_seg_cache_raw_dt(&env->ldt, e1, e2);
+ env->ldt.base |= (target_ulong)e3 << 32;
+ } else
+#endif
+ {
+ load_seg_cache_raw_dt(&env->ldt, e1, e2);
+ }
+ }
+ env->ldt.selector = selector;
+#ifdef VBOX
+ Log(("helper_lldt_T0: new ldtr=%RTsel {.base=%RGv, .limit=%RGv}\n",
+ (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit));
+#endif
+}
+
+void helper_ltr(int selector)
+{
+ SegmentCache *dt;
+ uint32_t e1, e2;
+#ifndef VBOX
+ int index, type, entry_limit;
+#else
+ unsigned int index;
+ int type, entry_limit;
+#endif
+ target_ulong ptr;
+
+#ifdef VBOX
+ Log(("helper_ltr: pc=%RGv old tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n",
+ (RTGCPTR)env->eip, (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit,
+ env->tr.flags, (RTSEL)(selector & 0xffff)));
+#endif
+ selector &= 0xffff;
+ if ((selector & 0xfffc) == 0) {
+ /* NULL selector case: invalid TR */
+#ifdef VBOX
+ raise_exception_err(EXCP0A_TSS, 0);
+#else
+ env->tr.base = 0;
+ env->tr.limit = 0;
+ env->tr.flags = 0;
+#endif
+ } else {
+ if (selector & 0x4)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dt = &env->gdt;
+ index = selector & ~7;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK)
+ entry_limit = 15;
+ else
+#endif
+ entry_limit = 7;
+ if ((index + entry_limit) > dt->limit)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ if ((e2 & DESC_S_MASK) ||
+ (type != 1 && type != 9))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ uint32_t e3, e4;
+ e3 = ldl_kernel(ptr + 8);
+ e4 = ldl_kernel(ptr + 12);
+ if ((e4 >> DESC_TYPE_SHIFT) & 0xf)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ load_seg_cache_raw_dt(&env->tr, e1, e2);
+ env->tr.base |= (target_ulong)e3 << 32;
+ } else
+#endif
+ {
+ load_seg_cache_raw_dt(&env->tr, e1, e2);
+ }
+ env->tr.flags |= DESC_TSS_BUSY_MASK;
+ e2 |= DESC_TSS_BUSY_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+ env->tr.selector = selector;
+#ifdef VBOX
+ Log(("helper_ltr: new tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n",
+ (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit,
+ env->tr.flags, (RTSEL)(selector & 0xffff)));
+#endif
+}
+
+/* only works if protected mode and not VM86. seg_reg must be != R_CS */
+void helper_load_seg(int seg_reg, int selector)
+{
+ uint32_t e1, e2;
+ int cpl, dpl, rpl;
+ SegmentCache *dt;
+#ifndef VBOX
+ int index;
+#else
+ unsigned int index;
+#endif
+ target_ulong ptr;
+
+ selector &= 0xffff;
+ cpl = env->hflags & HF_CPL_MASK;
+#ifdef VBOX
+
+ /* Trying to load a selector with CPL=1? */
+ if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0))
+ {
+ Log(("RPL 1 -> sel %04X -> %04X (helper_load_seg)\n", selector, selector & 0xfffc));
+ selector = selector & 0xfffc;
+ }
+#endif /* VBOX */
+ if ((selector & 0xfffc) == 0) {
+ /* null selector case */
+#ifndef VBOX
+ if (seg_reg == R_SS
+#ifdef TARGET_X86_64
+ && (!(env->hflags & HF_CS64_MASK) || cpl == 3)
+#endif
+ )
+ raise_exception_err(EXCP0D_GPF, 0);
+ cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0);
+#else
+ if (seg_reg == R_SS) {
+ if (!(env->hflags & HF_CS64_MASK) || cpl == 3)
+ raise_exception_err(EXCP0D_GPF, 0);
+ e2 = (cpl << DESC_DPL_SHIFT) | DESC_INTEL_UNUSABLE;
+ } else {
+ e2 = DESC_INTEL_UNUSABLE;
+ }
+ cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, 0, 0, e2);
+#endif
+ } else {
+
+ if (selector & 0x4)
+ dt = &env->ldt;
+ else
+ dt = &env->gdt;
+ index = selector & ~7;
+ if ((index + 7) > dt->limit)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ ptr = dt->base + index;
+ e1 = ldl_kernel(ptr);
+ e2 = ldl_kernel(ptr + 4);
+
+ if (!(e2 & DESC_S_MASK))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (seg_reg == R_SS) {
+ /* must be writable segment */
+ if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (rpl != cpl || dpl != cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ } else {
+ /* must be readable segment */
+ if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+
+ if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+ /* if not conforming code, test rights */
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ }
+ }
+
+ if (!(e2 & DESC_P_MASK)) {
+ if (seg_reg == R_SS)
+ raise_exception_err(EXCP0C_STACK, selector & 0xfffc);
+ else
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+ }
+
+ /* set the access bit if not already set */
+ if (!(e2 & DESC_A_MASK)) {
+ e2 |= DESC_A_MASK;
+ stl_kernel(ptr + 4, e2);
+ }
+
+ cpu_x86_load_seg_cache(env, seg_reg, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+#if 0
+ qemu_log("load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n",
+ selector, (unsigned long)sc->base, sc->limit, sc->flags);
+#endif
+ }
+}
+
+/* protected mode jump */
+void helper_ljmp_protected(int new_cs, target_ulong new_eip,
+ int next_eip_addend)
+{
+ int gate_cs, type;
+ uint32_t e1, e2, cpl, dpl, rpl, limit;
+ target_ulong next_eip;
+
+#ifdef VBOX /** @todo Why do we do this? */
+ e1 = e2 = 0;
+#endif
+ if ((new_cs & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+ if (load_segment(&e1, &e2, new_cs) != 0)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_S_MASK) {
+ if (!(e2 & DESC_CS_MASK))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (e2 & DESC_C_MASK) {
+ /* conforming code segment */
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ } else {
+ /* non conforming code segment */
+ rpl = new_cs & 3;
+ if (rpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ if (dpl != cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ limit = get_seg_limit(e1, e2);
+ if (new_eip > limit &&
+ !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+#endif
+ cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2), limit, e2);
+ EIP = new_eip;
+ } else {
+ /* jump to call or task gate */
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ rpl = new_cs & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ switch(type) {
+ case 1: /* 286 TSS */
+ case 9: /* 386 TSS */
+ case 5: /* task gate */
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ next_eip = env->eip + next_eip_addend;
+ switch_tss(new_cs, e1, e2, SWITCH_TSS_JMP, next_eip);
+ CC_OP = CC_OP_EFLAGS;
+ break;
+ case 4: /* 286 call gate */
+ case 12: /* 386 call gate */
+ if ((dpl < cpl) || (dpl < rpl))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ gate_cs = e1 >> 16;
+ new_eip = (e1 & 0xffff);
+ if (type == 12)
+ new_eip |= (e2 & 0xffff0000);
+ if (load_segment(&e1, &e2, gate_cs) != 0)
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ /* must be code segment */
+ if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) !=
+ (DESC_S_MASK | DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+ if (((e2 & DESC_C_MASK) && (dpl > cpl)) ||
+ (!(e2 & DESC_C_MASK) && (dpl != cpl)))
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-514 of 253666.pdf */
+ raise_exception_err(EXCP0B_NOSEG, gate_cs & 0xfffc);
+#else
+ raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc);
+#endif
+ limit = get_seg_limit(e1, e2);
+ if (new_eip > limit)
+ raise_exception_err(EXCP0D_GPF, 0);
+ cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2), limit, e2);
+ EIP = new_eip;
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ break;
+ }
+ }
+}
+
+/* real mode call */
+void helper_lcall_real(int new_cs, target_ulong new_eip1,
+ int shift, int next_eip)
+{
+ int new_eip;
+ uint32_t esp, esp_mask;
+ target_ulong ssp;
+
+ new_eip = new_eip1;
+ esp = ESP;
+ esp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ if (shift) {
+ PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, esp, esp_mask, next_eip);
+ } else {
+ PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, esp, esp_mask, next_eip);
+ }
+
+ SET_ESP(esp, esp_mask);
+ env->eip = new_eip;
+ env->segs[R_CS].selector = new_cs;
+ env->segs[R_CS].base = (new_cs << 4);
+}
+
+/* protected mode call */
+void helper_lcall_protected(int new_cs, target_ulong new_eip,
+ int shift, int next_eip_addend)
+{
+ int new_stack, i;
+ uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count;
+ uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask;
+ uint32_t val, limit, old_sp_mask;
+ target_ulong ssp, old_ssp, next_eip;
+
+#ifdef VBOX /** @todo Why do we do this? */
+ e1 = e2 = 0;
+#endif
+ next_eip = env->eip + next_eip_addend;
+ LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift);
+ LOG_PCALL_STATE(env);
+ if ((new_cs & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+ if (load_segment(&e1, &e2, new_cs) != 0)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ cpl = env->hflags & HF_CPL_MASK;
+ LOG_PCALL("desc=%08x:%08x\n", e1, e2);
+ if (e2 & DESC_S_MASK) {
+ if (!(e2 & DESC_CS_MASK))
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (e2 & DESC_C_MASK) {
+ /* conforming code segment */
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ } else {
+ /* non conforming code segment */
+ rpl = new_cs & 3;
+ if (rpl > cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ if (dpl != cpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+#endif
+
+#ifdef TARGET_X86_64
+ /* XXX: check 16/32 bit cases in long mode */
+ if (shift == 2) {
+ target_ulong rsp;
+ /* 64 bit case */
+ rsp = ESP;
+ PUSHQ(rsp, env->segs[R_CS].selector);
+ PUSHQ(rsp, next_eip);
+ /* from this point, not restartable */
+ ESP = rsp;
+ cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2), e2);
+ EIP = new_eip;
+ } else
+#endif
+ {
+ sp = ESP;
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ if (shift) {
+ PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, sp, sp_mask, next_eip);
+ } else {
+ PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, sp, sp_mask, next_eip);
+ }
+
+ limit = get_seg_limit(e1, e2);
+ if (new_eip > limit)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ /* from this point, not restartable */
+ SET_ESP(sp, sp_mask);
+ cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
+ get_seg_base(e1, e2), limit, e2);
+ EIP = new_eip;
+ }
+ } else {
+ /* check gate type */
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ rpl = new_cs & 3;
+ switch(type) {
+ case 1: /* available 286 TSS */
+ case 9: /* available 386 TSS */
+ case 5: /* task gate */
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ switch_tss(new_cs, e1, e2, SWITCH_TSS_CALL, next_eip);
+ CC_OP = CC_OP_EFLAGS;
+ return;
+ case 4: /* 286 call gate */
+ case 12: /* 386 call gate */
+ break;
+ default:
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ break;
+ }
+ shift = type >> 3;
+
+ if (dpl < cpl || dpl < rpl)
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ /* check valid bit */
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ selector = e1 >> 16;
+ offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+ param_count = e2 & 0x1f;
+ if ((selector & 0xfffc) == 0)
+ raise_exception_err(EXCP0D_GPF, 0);
+
+ if (load_segment(&e1, &e2, selector) != 0)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK)))
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl > cpl)
+ raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
+ if (!(e2 & DESC_P_MASK))
+ raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
+
+ if (!(e2 & DESC_C_MASK) && dpl < cpl) {
+ /* to inner privilege */
+ get_ss_esp_from_tss(&ss, &sp, dpl);
+ LOG_PCALL("new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n",
+ ss, sp, param_count, ESP);
+ if ((ss & 0xfffc) == 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if ((ss & 3) != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (load_segment(&ss_e1, &ss_e2, ss) != 0)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+ if (ss_dpl != dpl)
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_S_MASK) ||
+ (ss_e2 & DESC_CS_MASK) ||
+ !(ss_e2 & DESC_W_MASK))
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+ if (!(ss_e2 & DESC_P_MASK))
+#ifdef VBOX /* See page 3-99 of 253666.pdf */
+ raise_exception_err(EXCP0C_STACK, ss & 0xfffc);
+#else
+ raise_exception_err(EXCP0A_TSS, ss & 0xfffc);
+#endif
+
+ // push_size = ((param_count * 2) + 8) << shift;
+
+ old_sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ old_ssp = env->segs[R_SS].base;
+
+ sp_mask = get_sp_mask(ss_e2);
+ ssp = get_seg_base(ss_e1, ss_e2);
+ if (shift) {
+ PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector);
+ PUSHL(ssp, sp, sp_mask, ESP);
+ for(i = param_count - 1; i >= 0; i--) {
+ val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask));
+ PUSHL(ssp, sp, sp_mask, val);
+ }
+ } else {
+ PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector);
+ PUSHW(ssp, sp, sp_mask, ESP);
+ for(i = param_count - 1; i >= 0; i--) {
+ val = lduw_kernel(old_ssp + ((ESP + i * 2) & old_sp_mask));
+ PUSHW(ssp, sp, sp_mask, val);
+ }
+ }
+ new_stack = 1;
+ } else {
+ /* to same privilege */
+ sp = ESP;
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ ssp = env->segs[R_SS].base;
+ // push_size = (4 << shift);
+ new_stack = 0;
+ }
+
+ if (shift) {
+ PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHL(ssp, sp, sp_mask, next_eip);
+ } else {
+ PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector);
+ PUSHW(ssp, sp, sp_mask, next_eip);
+ }
+
+ /* from this point, not restartable */
+
+ if (new_stack) {
+ ss = (ss & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_SS, ss,
+ ssp,
+ get_seg_limit(ss_e1, ss_e2),
+ ss_e2);
+ }
+
+ selector = (selector & ~3) | dpl;
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, dpl);
+ SET_ESP(sp, sp_mask);
+ EIP = offset;
+ }
+}
+
+/* real and vm86 mode iret */
+void helper_iret_real(int shift)
+{
+ uint32_t sp, new_cs, new_eip, new_eflags, sp_mask;
+ target_ulong ssp;
+ int eflags_mask;
+#ifdef VBOX
+ bool fVME = false;
+
+ remR3TrapClear(env->pVM);
+#endif /* VBOX */
+
+ sp_mask = 0xffff; /* XXXX: use SS segment size ? */
+ sp = ESP;
+ ssp = env->segs[R_SS].base;
+ if (shift == 1) {
+ /* 32 bits */
+ POPL(ssp, sp, sp_mask, new_eip);
+ POPL(ssp, sp, sp_mask, new_cs);
+ new_cs &= 0xffff;
+ POPL(ssp, sp, sp_mask, new_eflags);
+ } else {
+ /* 16 bits */
+ POPW(ssp, sp, sp_mask, new_eip);
+ POPW(ssp, sp, sp_mask, new_cs);
+ POPW(ssp, sp, sp_mask, new_eflags);
+ }
+#ifdef VBOX
+ if ( (env->eflags & VM_MASK)
+ && ((env->eflags >> IOPL_SHIFT) & 3) != 3
+ && (env->cr[4] & CR4_VME_MASK)) /* implied or else we would fault earlier */
+ {
+ fVME = true;
+ /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */
+ /* if TF will be set -> #GP */
+ if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK))
+ || (new_eflags & TF_MASK))
+ raise_exception(EXCP0D_GPF);
+ }
+#endif /* VBOX */
+ ESP = (ESP & ~sp_mask) | (sp & sp_mask);
+ env->segs[R_CS].selector = new_cs;
+ env->segs[R_CS].base = (new_cs << 4);
+ env->eip = new_eip;
+#ifdef VBOX
+ if (fVME)
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+ else
+#endif
+ if (env->eflags & VM_MASK)
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK;
+ else
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK;
+ if (shift == 0)
+ eflags_mask &= 0xffff;
+ load_eflags(new_eflags, eflags_mask);
+ env->hflags2 &= ~HF2_NMI_MASK;
+#ifdef VBOX
+ if (fVME)
+ {
+ if (new_eflags & IF_MASK)
+ env->eflags |= VIF_MASK;
+ else
+ env->eflags &= ~VIF_MASK;
+ }
+#endif /* VBOX */
+}
+
+static inline void validate_seg(int seg_reg, int cpl)
+{
+ int dpl;
+ uint32_t e2;
+
+ /* XXX: on x86_64, we do not want to nullify FS and GS because
+ they may still contain a valid base. I would be interested to
+ know how a real x86_64 CPU behaves */
+ if ((seg_reg == R_FS || seg_reg == R_GS) &&
+ (env->segs[seg_reg].selector & 0xfffc) == 0)
+ return;
+
+ e2 = env->segs[seg_reg].flags;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) {
+ /* data or non conforming code segment */
+ if (dpl < cpl) {
+ cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0);
+ }
+ }
+}
+
+/* protected mode iret */
+static inline void helper_ret_protected(int shift, int is_iret, int addend)
+{
+ uint32_t new_cs, new_eflags, new_ss;
+ uint32_t new_es, new_ds, new_fs, new_gs;
+ uint32_t e1, e2, ss_e1, ss_e2;
+ int cpl, dpl, rpl, eflags_mask, iopl;
+ target_ulong ssp, sp, new_eip, new_esp, sp_mask;
+
+#ifdef VBOX /** @todo Why do we do this? */
+ ss_e1 = ss_e2 = e1 = e2 = 0;
+#endif
+
+#ifdef TARGET_X86_64
+ if (shift == 2)
+ sp_mask = -1;
+ else
+#endif
+ sp_mask = get_sp_mask(env->segs[R_SS].flags);
+ sp = ESP;
+ ssp = env->segs[R_SS].base;
+ new_eflags = 0; /* avoid warning */
+#ifdef TARGET_X86_64
+ if (shift == 2) {
+ POPQ(sp, new_eip);
+ POPQ(sp, new_cs);
+ new_cs &= 0xffff;
+ if (is_iret) {
+ POPQ(sp, new_eflags);
+ }
+ } else
+#endif
+ if (shift == 1) {
+ /* 32 bits */
+ POPL(ssp, sp, sp_mask, new_eip);
+ POPL(ssp, sp, sp_mask, new_cs);
+ new_cs &= 0xffff;
+ if (is_iret) {
+ POPL(ssp, sp, sp_mask, new_eflags);
+#define LOG_GROUP LOG_GROUP_REM
+#if defined(VBOX) && defined(DEBUG)
+ Log(("iret: new CS %04X (old=%x)\n", new_cs, env->segs[R_CS].selector));
+ Log(("iret: new EIP %08X\n", (uint32_t)new_eip));
+ Log(("iret: new EFLAGS %08X\n", new_eflags));
+ Log(("iret: EAX=%08x\n", (uint32_t)EAX));
+#endif
+ if (new_eflags & VM_MASK)
+ goto return_to_vm86;
+ }
+#ifdef VBOX
+ if ((new_cs & 0x3) == 1 && (env->state & CPU_RAW_RING0))
+ {
+ if ( !EMIsRawRing1Enabled(env->pVM)
+ || env->segs[R_CS].selector == (new_cs & 0xfffc))
+ {
+ Log(("RPL 1 -> new_cs %04X -> %04X\n", new_cs, new_cs & 0xfffc));
+ new_cs = new_cs & 0xfffc;
+ }
+ else
+ {
+ /* Ugly assumption: assume a genuine switch to ring-1. */
+ Log(("Genuine switch to ring-1 (iret)\n"));
+ }
+ }
+ else if ((new_cs & 0x3) == 2 && (env->state & CPU_RAW_RING0) && EMIsRawRing1Enabled(env->pVM))
+ {
+ Log(("RPL 2 -> new_cs %04X -> %04X\n", new_cs, (new_cs & 0xfffc) | 1));
+ new_cs = (new_cs & 0xfffc) | 1;
+ }
+#endif
+ } else {
+ /* 16 bits */
+ POPW(ssp, sp, sp_mask, new_eip);
+ POPW(ssp, sp, sp_mask, new_cs);
+ if (is_iret)
+ POPW(ssp, sp, sp_mask, new_eflags);
+ }
+ LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n",
+ new_cs, new_eip, shift, addend);
+ LOG_PCALL_STATE(env);
+ if ((new_cs & 0xfffc) == 0)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_cs & 0xfffc) == 0\n"));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (load_segment(&e1, &e2, new_cs) != 0)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("load_segment failed\n"));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ if (!(e2 & DESC_S_MASK) ||
+ !(e2 & DESC_CS_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("e2 mask %08x\n", e2));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ cpl = env->hflags & HF_CPL_MASK;
+ rpl = new_cs & 3;
+ if (rpl < cpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("rpl < cpl (%d vs %d)\n", rpl, cpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+
+ if (e2 & DESC_C_MASK) {
+ if (dpl > rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("dpl > rpl (%d vs %d)\n", dpl, rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ } else {
+ if (dpl != rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("dpl != rpl (%d vs %d) e1=%x e2=%x\n", dpl, rpl, e1, e2));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
+ }
+ }
+ if (!(e2 & DESC_P_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("DESC_P_MASK e2=%08x\n", e2));
+#endif
+ raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc);
+ }
+
+ sp += addend;
+ if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) ||
+ ((env->hflags & HF_CS64_MASK) && !is_iret))) {
+ /* return to same privilege level */
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+#endif
+ cpu_x86_load_seg_cache(env, R_CS, new_cs,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ } else {
+ /* return to different privilege level */
+#ifdef TARGET_X86_64
+ if (shift == 2) {
+ POPQ(sp, new_esp);
+ POPQ(sp, new_ss);
+ new_ss &= 0xffff;
+ } else
+#endif
+ if (shift == 1) {
+ /* 32 bits */
+ POPL(ssp, sp, sp_mask, new_esp);
+ POPL(ssp, sp, sp_mask, new_ss);
+ new_ss &= 0xffff;
+ } else {
+ /* 16 bits */
+ POPW(ssp, sp, sp_mask, new_esp);
+ POPW(ssp, sp, sp_mask, new_ss);
+ }
+ LOG_PCALL("new ss:esp=%04x:" TARGET_FMT_lx "\n",
+ new_ss, new_esp);
+ if ((new_ss & 0xfffc) == 0) {
+#ifdef TARGET_X86_64
+ /* NULL ss is allowed in long mode if cpl != 3*/
+# ifndef VBOX
+ /* XXX: test CS64 ? */
+ if ((env->hflags & HF_LMA_MASK) && rpl != 3) {
+ cpu_x86_load_seg_cache(env, R_SS, new_ss,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (rpl << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */
+ } else
+# else /* VBOX */
+ if ((env->hflags & HF_LMA_MASK) && rpl != 3 && (e2 & DESC_L_MASK)) {
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+ cpu_x86_load_seg_cache_with_clean_flags(env, R_SS, new_ss,
+ 0, 0xffffffff,
+ DESC_INTEL_UNUSABLE | (rpl << DESC_DPL_SHIFT) );
+ ss_e2 = DESC_B_MASK; /* not really used */
+ } else
+# endif
+#endif
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("NULL ss, rpl=%d\n", rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ } else {
+ if ((new_ss & 3) != rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%x != rpl=%d\n", new_ss, rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ if (load_segment(&ss_e1, &ss_e2, new_ss) != 0)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%x load error\n", new_ss));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ if (!(ss_e2 & DESC_S_MASK) ||
+ (ss_e2 & DESC_CS_MASK) ||
+ !(ss_e2 & DESC_W_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%x ss_e2=%#x bad type\n", new_ss, ss_e2));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+ if (dpl != rpl)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("SS.dpl=%u != rpl=%u\n", dpl, rpl));
+#endif
+ raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc);
+ }
+ if (!(ss_e2 & DESC_P_MASK))
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("new_ss=%#x #NP\n", new_ss));
+#endif
+ raise_exception_err(EXCP0B_NOSEG, new_ss & 0xfffc);
+ }
+#ifdef VBOX
+ if (!(e2 & DESC_A_MASK))
+ e2 = set_segment_accessed(new_cs, e2);
+ if (!(ss_e2 & DESC_A_MASK))
+ ss_e2 = set_segment_accessed(new_ss, ss_e2);
+#endif
+ cpu_x86_load_seg_cache(env, R_SS, new_ss,
+ get_seg_base(ss_e1, ss_e2),
+ get_seg_limit(ss_e1, ss_e2),
+ ss_e2);
+ }
+
+ cpu_x86_load_seg_cache(env, R_CS, new_cs,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ cpu_x86_set_cpl(env, rpl);
+ sp = new_esp;
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_CS64_MASK)
+ sp_mask = -1;
+ else
+#endif
+ sp_mask = get_sp_mask(ss_e2);
+
+ /* validate data segments */
+ validate_seg(R_ES, rpl);
+ validate_seg(R_DS, rpl);
+ validate_seg(R_FS, rpl);
+ validate_seg(R_GS, rpl);
+
+ sp += addend;
+ }
+ SET_ESP(sp, sp_mask);
+ env->eip = new_eip;
+ if (is_iret) {
+ /* NOTE: 'cpl' is the _old_ CPL */
+ eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK;
+ if (cpl == 0)
+#ifdef VBOX
+ eflags_mask |= IOPL_MASK | VIF_MASK | VIP_MASK;
+#else
+ eflags_mask |= IOPL_MASK;
+#endif
+ iopl = (env->eflags >> IOPL_SHIFT) & 3;
+ if (cpl <= iopl)
+ eflags_mask |= IF_MASK;
+ if (shift == 0)
+ eflags_mask &= 0xffff;
+ load_eflags(new_eflags, eflags_mask);
+ }
+ return;
+
+ return_to_vm86:
+ POPL(ssp, sp, sp_mask, new_esp);
+ POPL(ssp, sp, sp_mask, new_ss);
+ POPL(ssp, sp, sp_mask, new_es);
+ POPL(ssp, sp, sp_mask, new_ds);
+ POPL(ssp, sp, sp_mask, new_fs);
+ POPL(ssp, sp, sp_mask, new_gs);
+
+ /* modify processor state */
+ load_eflags(new_eflags, TF_MASK | AC_MASK | ID_MASK |
+ IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK);
+ load_seg_vm(R_CS, new_cs & 0xffff);
+ cpu_x86_set_cpl(env, 3);
+ load_seg_vm(R_SS, new_ss & 0xffff);
+ load_seg_vm(R_ES, new_es & 0xffff);
+ load_seg_vm(R_DS, new_ds & 0xffff);
+ load_seg_vm(R_FS, new_fs & 0xffff);
+ load_seg_vm(R_GS, new_gs & 0xffff);
+
+ env->eip = new_eip & 0xffff;
+ ESP = new_esp;
+}
+
+void helper_iret_protected(int shift, int next_eip)
+{
+ int tss_selector, type;
+ uint32_t e1, e2;
+
+#ifdef VBOX
+ Log(("iret (shift=%d new_eip=%#x)\n", shift, next_eip));
+ e1 = e2 = 0; /** @todo Why do we do this? */
+ remR3TrapClear(env->pVM);
+#endif
+
+ /* specific case for TSS */
+ if (env->eflags & NT_MASK) {
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK)
+ {
+#if defined(VBOX) && defined(DEBUG)
+ Log(("eflags.NT=1 on iret in long mode\n"));
+#endif
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+#endif
+ tss_selector = lduw_kernel(env->tr.base + 0);
+ if (tss_selector & 4)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ if (load_segment(&e1, &e2, tss_selector) != 0)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ type = (e2 >> DESC_TYPE_SHIFT) & 0x17;
+ /* NOTE: we check both segment and busy TSS */
+ if (type != 3)
+ raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc);
+ switch_tss(tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip);
+ } else {
+ helper_ret_protected(shift, 1, 0);
+ }
+ env->hflags2 &= ~HF2_NMI_MASK;
+}
+
+void helper_lret_protected(int shift, int addend)
+{
+ helper_ret_protected(shift, 0, addend);
+}
+
+void helper_sysenter(void)
+{
+ if (env->sysenter_cs == 0) {
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+ cpu_x86_set_cpl(env, 0);
+
+#ifdef TARGET_X86_64
+ if (env->hflags & HF_LMA_MASK) {
+ cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+ } else
+#endif
+ {
+ cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ }
+ cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK |
+ DESC_W_MASK | DESC_A_MASK);
+ ESP = env->sysenter_esp;
+ EIP = env->sysenter_eip;
+}
+
+void helper_sysexit(int dflag)
+{
+ int cpl;
+
+ cpl = env->hflags & HF_CPL_MASK;
+ if (env->sysenter_cs == 0 || cpl != 0) {
+ raise_exception_err(EXCP0D_GPF, 0);
+ }
+ cpu_x86_set_cpl(env, 3);
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 40) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ } else
+#endif
+ {
+ cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+ cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3,
+ 0, 0xffffffff,
+ DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+ DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
+ DESC_W_MASK | DESC_A_MASK);
+ }
+ ESP = ECX;
+ EIP = EDX;
+}
+
+#if defined(CONFIG_USER_ONLY)
+target_ulong helper_read_crN(int reg)
+{
+ return 0;
+}
+
+void helper_write_crN(int reg, target_ulong t0)
+{
+}
+
+void helper_movl_drN_T0(int reg, target_ulong t0)
+{
+}
+#else
+target_ulong helper_read_crN(int reg)
+{
+ target_ulong val;
+
+ helper_svm_check_intercept_param(SVM_EXIT_READ_CR0 + reg, 0);
+ switch(reg) {
+ default:
+ val = env->cr[reg];
+ break;
+ case 8:
+ if (!(env->hflags2 & HF2_VINTR_MASK)) {
+#ifndef VBOX
+ val = cpu_get_apic_tpr(env->apic_state);
+#else /* VBOX */
+ val = cpu_get_apic_tpr(env);
+#endif /* VBOX */
+ } else {
+ val = env->v_tpr;
+ }
+ break;
+ }
+ return val;
+}
+
+void helper_write_crN(int reg, target_ulong t0)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_WRITE_CR0 + reg, 0);
+ switch(reg) {
+ case 0:
+ cpu_x86_update_cr0(env, t0);
+ break;
+ case 3:
+ cpu_x86_update_cr3(env, t0);
+ break;
+ case 4:
+ cpu_x86_update_cr4(env, t0);
+ break;
+ case 8:
+ if (!(env->hflags2 & HF2_VINTR_MASK)) {
+#ifndef VBOX
+ cpu_set_apic_tpr(env->apic_state, t0);
+#else /* VBOX */
+ cpu_set_apic_tpr(env, t0);
+#endif /* VBOX */
+ }
+ env->v_tpr = t0 & 0x0f;
+ break;
+ default:
+ env->cr[reg] = t0;
+ break;
+ }
+}
+
+void helper_movl_drN_T0(int reg, target_ulong t0)
+{
+ int i;
+
+ if (reg < 4) {
+ hw_breakpoint_remove(env, reg);
+ env->dr[reg] = t0;
+ hw_breakpoint_insert(env, reg);
+# ifndef VBOX
+ } else if (reg == 7) {
+# else
+ } else if (reg == 7 || reg == 5) { /* (DR5 is an alias for DR7.) */
+ if (t0 & X86_DR7_MBZ_MASK)
+ raise_exception_err(EXCP0D_GPF, 0);
+ t0 |= X86_DR7_RA1_MASK;
+ t0 &= ~X86_DR7_RAZ_MASK;
+# endif
+ for (i = 0; i < 4; i++)
+ hw_breakpoint_remove(env, i);
+ env->dr[7] = t0;
+ for (i = 0; i < 4; i++)
+ hw_breakpoint_insert(env, i);
+ } else {
+# ifndef VBOX
+ env->dr[reg] = t0;
+# else
+ if (t0 & X86_DR6_MBZ_MASK)
+ raise_exception_err(EXCP0D_GPF, 0);
+ t0 |= X86_DR6_RA1_MASK;
+ t0 &= ~X86_DR6_RAZ_MASK;
+ env->dr[6] = t0; /* (DR4 is an alias for DR6.) */
+# endif
+ }
+}
+#endif
+
+void helper_lmsw(target_ulong t0)
+{
+ /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
+ if already set to one. */
+ t0 = (env->cr[0] & ~0xe) | (t0 & 0xf);
+ helper_write_crN(0, t0);
+}
+
+void helper_clts(void)
+{
+ env->cr[0] &= ~CR0_TS_MASK;
+ env->hflags &= ~HF_TS_MASK;
+}
+
+void helper_invlpg(target_ulong addr)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_INVLPG, 0);
+ tlb_flush_page(env, addr);
+}
+
+void helper_rdtsc(void)
+{
+ uint64_t val;
+
+ if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+ raise_exception(EXCP0D_GPF);
+ }
+ helper_svm_check_intercept_param(SVM_EXIT_RDTSC, 0);
+
+ val = cpu_get_tsc(env) + env->tsc_offset;
+ EAX = (uint32_t)(val);
+ EDX = (uint32_t)(val >> 32);
+}
+
+void helper_rdtscp(void)
+{
+ helper_rdtsc();
+#ifndef VBOX
+ ECX = (uint32_t)(env->tsc_aux);
+#else /* VBOX */
+ uint64_t val;
+ if (cpu_rdmsr(env, MSR_K8_TSC_AUX, &val) == 0)
+ ECX = (uint32_t)(val);
+ else
+ ECX = 0;
+#endif /* VBOX */
+}
+
+void helper_rdpmc(void)
+{
+#ifdef VBOX
+ /* If X86_CR4_PCE is *not* set, then CPL must be zero. */
+ if (!(env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+ raise_exception(EXCP0D_GPF);
+ }
+ /* Just return zero here; rather tricky to properly emulate this, especially as the specs are a mess. */
+ EAX = 0;
+ EDX = 0;
+#else /* !VBOX */
+ if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
+ raise_exception(EXCP0D_GPF);
+ }
+ helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0);
+
+ /* currently unimplemented */
+ raise_exception_err(EXCP06_ILLOP, 0);
+#endif /* !VBOX */
+}
+
+#if defined(CONFIG_USER_ONLY)
+void helper_wrmsr(void)
+{
+}
+
+void helper_rdmsr(void)
+{
+}
+#else
+void helper_wrmsr(void)
+{
+ uint64_t val;
+
+ helper_svm_check_intercept_param(SVM_EXIT_MSR, 1);
+
+ val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32);
+
+ switch((uint32_t)ECX) {
+ case MSR_IA32_SYSENTER_CS:
+ env->sysenter_cs = val & 0xffff;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ env->sysenter_esp = val;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ env->sysenter_eip = val;
+ break;
+ case MSR_IA32_APICBASE:
+# ifndef VBOX /* The CPUMSetGuestMsr call below does this now. */
+ cpu_set_apic_base(env->apic_state, val);
+# endif
+ break;
+ case MSR_EFER:
+ {
+ uint64_t update_mask;
+ update_mask = 0;
+ if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL)
+ update_mask |= MSR_EFER_SCE;
+ if (env->cpuid_ext2_features & CPUID_EXT2_LM)
+ update_mask |= MSR_EFER_LME;
+ if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+ update_mask |= MSR_EFER_FFXSR;
+ if (env->cpuid_ext2_features & CPUID_EXT2_NX)
+ update_mask |= MSR_EFER_NXE;
+ if (env->cpuid_ext3_features & CPUID_EXT3_SVM)
+ update_mask |= MSR_EFER_SVME;
+ if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR)
+ update_mask |= MSR_EFER_FFXSR;
+ cpu_load_efer(env, (env->efer & ~update_mask) |
+ (val & update_mask));
+ }
+ break;
+ case MSR_STAR:
+ env->star = val;
+ break;
+ case MSR_PAT:
+ env->pat = val;
+ break;
+ case MSR_VM_HSAVE_PA:
+ env->vm_hsave = val;
+ break;
+#ifdef TARGET_X86_64
+ case MSR_LSTAR:
+ env->lstar = val;
+ break;
+ case MSR_CSTAR:
+ env->cstar = val;
+ break;
+ case MSR_FMASK:
+ env->fmask = val;
+ break;
+ case MSR_FSBASE:
+ env->segs[R_FS].base = val;
+ break;
+ case MSR_GSBASE:
+ env->segs[R_GS].base = val;
+ break;
+ case MSR_KERNELGSBASE:
+ env->kernelgsbase = val;
+ break;
+#endif
+# ifndef VBOX
+ case MSR_MTRRphysBase(0):
+ case MSR_MTRRphysBase(1):
+ case MSR_MTRRphysBase(2):
+ case MSR_MTRRphysBase(3):
+ case MSR_MTRRphysBase(4):
+ case MSR_MTRRphysBase(5):
+ case MSR_MTRRphysBase(6):
+ case MSR_MTRRphysBase(7):
+ env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base = val;
+ break;
+ case MSR_MTRRphysMask(0):
+ case MSR_MTRRphysMask(1):
+ case MSR_MTRRphysMask(2):
+ case MSR_MTRRphysMask(3):
+ case MSR_MTRRphysMask(4):
+ case MSR_MTRRphysMask(5):
+ case MSR_MTRRphysMask(6):
+ case MSR_MTRRphysMask(7):
+ env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask = val;
+ break;
+ case MSR_MTRRfix64K_00000:
+ env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix64K_00000] = val;
+ break;
+ case MSR_MTRRfix16K_80000:
+ case MSR_MTRRfix16K_A0000:
+ env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1] = val;
+ break;
+ case MSR_MTRRfix4K_C0000:
+ case MSR_MTRRfix4K_C8000:
+ case MSR_MTRRfix4K_D0000:
+ case MSR_MTRRfix4K_D8000:
+ case MSR_MTRRfix4K_E0000:
+ case MSR_MTRRfix4K_E8000:
+ case MSR_MTRRfix4K_F0000:
+ case MSR_MTRRfix4K_F8000:
+ env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3] = val;
+ break;
+ case MSR_MTRRdefType:
+ env->mtrr_deftype = val;
+ break;
+ case MSR_MCG_STATUS:
+ env->mcg_status = val;
+ break;
+ case MSR_MCG_CTL:
+ if ((env->mcg_cap & MCG_CTL_P)
+ && (val == 0 || val == ~(uint64_t)0))
+ env->mcg_ctl = val;
+ break;
+ case MSR_TSC_AUX:
+ env->tsc_aux = val;
+ break;
+# endif /* !VBOX */
+ default:
+# ifndef VBOX
+ if ((uint32_t)ECX >= MSR_MC0_CTL
+ && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+ uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+ if ((offset & 0x3) != 0
+ || (val == 0 || val == ~(uint64_t)0))
+ env->mce_banks[offset] = val;
+ break;
+ }
+ /* XXX: exception ? */
+# endif
+ break;
+ }
+
+# ifdef VBOX
+ /* call CPUM. */
+ if (cpu_wrmsr(env, (uint32_t)ECX, val) != 0)
+ {
+ /** @todo be a brave man and raise a \#GP(0) here as we should... */
+ }
+# endif
+}
+
+void helper_rdmsr(void)
+{
+ uint64_t val;
+
+ helper_svm_check_intercept_param(SVM_EXIT_MSR, 0);
+
+ switch((uint32_t)ECX) {
+ case MSR_IA32_SYSENTER_CS:
+ val = env->sysenter_cs;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ val = env->sysenter_esp;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ val = env->sysenter_eip;
+ break;
+ case MSR_IA32_APICBASE:
+#ifndef VBOX
+ val = cpu_get_apic_base(env->apic_state);
+#else /* VBOX */
+ val = cpu_get_apic_base(env);
+#endif /* VBOX */
+ break;
+ case MSR_EFER:
+ val = env->efer;
+ break;
+ case MSR_STAR:
+ val = env->star;
+ break;
+ case MSR_PAT:
+ val = env->pat;
+ break;
+ case MSR_VM_HSAVE_PA:
+ val = env->vm_hsave;
+ break;
+# ifndef VBOX /* forward to CPUMQueryGuestMsr. */
+ case MSR_IA32_PERF_STATUS:
+ /* tsc_increment_by_tick */
+ val = 1000ULL;
+ /* CPU multiplier */
+ val |= (((uint64_t)4ULL) << 40);
+ break;
+# endif /* !VBOX */
+#ifdef TARGET_X86_64
+ case MSR_LSTAR:
+ val = env->lstar;
+ break;
+ case MSR_CSTAR:
+ val = env->cstar;
+ break;
+ case MSR_FMASK:
+ val = env->fmask;
+ break;
+ case MSR_FSBASE:
+ val = env->segs[R_FS].base;
+ break;
+ case MSR_GSBASE:
+ val = env->segs[R_GS].base;
+ break;
+ case MSR_KERNELGSBASE:
+ val = env->kernelgsbase;
+ break;
+# ifndef VBOX
+ case MSR_TSC_AUX:
+ val = env->tsc_aux;
+ break;
+# endif /*!VBOX*/
+#endif
+# ifndef VBOX
+ case MSR_MTRRphysBase(0):
+ case MSR_MTRRphysBase(1):
+ case MSR_MTRRphysBase(2):
+ case MSR_MTRRphysBase(3):
+ case MSR_MTRRphysBase(4):
+ case MSR_MTRRphysBase(5):
+ case MSR_MTRRphysBase(6):
+ case MSR_MTRRphysBase(7):
+ val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base;
+ break;
+ case MSR_MTRRphysMask(0):
+ case MSR_MTRRphysMask(1):
+ case MSR_MTRRphysMask(2):
+ case MSR_MTRRphysMask(3):
+ case MSR_MTRRphysMask(4):
+ case MSR_MTRRphysMask(5):
+ case MSR_MTRRphysMask(6):
+ case MSR_MTRRphysMask(7):
+ val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask;
+ break;
+ case MSR_MTRRfix64K_00000:
+ val = env->mtrr_fixed[0];
+ break;
+ case MSR_MTRRfix16K_80000:
+ case MSR_MTRRfix16K_A0000:
+ val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1];
+ break;
+ case MSR_MTRRfix4K_C0000:
+ case MSR_MTRRfix4K_C8000:
+ case MSR_MTRRfix4K_D0000:
+ case MSR_MTRRfix4K_D8000:
+ case MSR_MTRRfix4K_E0000:
+ case MSR_MTRRfix4K_E8000:
+ case MSR_MTRRfix4K_F0000:
+ case MSR_MTRRfix4K_F8000:
+ val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3];
+ break;
+ case MSR_MTRRdefType:
+ val = env->mtrr_deftype;
+ break;
+ case MSR_MTRRcap:
+ if (env->cpuid_features & CPUID_MTRR)
+ val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT | MSR_MTRRcap_WC_SUPPORTED;
+ else
+ /* XXX: exception ? */
+ val = 0;
+ break;
+ case MSR_MCG_CAP:
+ val = env->mcg_cap;
+ break;
+ case MSR_MCG_CTL:
+ if (env->mcg_cap & MCG_CTL_P)
+ val = env->mcg_ctl;
+ else
+ val = 0;
+ break;
+ case MSR_MCG_STATUS:
+ val = env->mcg_status;
+ break;
+# endif /* !VBOX */
+ default:
+# ifndef VBOX
+ if ((uint32_t)ECX >= MSR_MC0_CTL
+ && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+ uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+ val = env->mce_banks[offset];
+ break;
+ }
+ /* XXX: exception ? */
+ val = 0;
+# else /* VBOX */
+ if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0)
+ {
+ /** @todo be a brave man and raise a \#GP(0) here as we should... */
+ val = 0;
+ }
+# endif /* VBOX */
+ break;
+ }
+ EAX = (uint32_t)(val);
+ EDX = (uint32_t)(val >> 32);
+
+# ifdef VBOX_STRICT
+ if ((uint32_t)ECX != MSR_IA32_TSC) {
+ if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0)
+ val = 0;
+ AssertMsg(val == RT_MAKE_U64(EAX, EDX), ("idMsr=%#x val=%#llx eax:edx=%#llx\n", (uint32_t)ECX, val, RT_MAKE_U64(EAX, EDX)));
+ }
+# endif
+}
+#endif
+
+target_ulong helper_lsl(target_ulong selector1)
+{
+ unsigned int limit;
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl, type;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_S_MASK) {
+ if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+ /* conforming */
+ } else {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ }
+ } else {
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ switch(type) {
+ case 1:
+ case 2:
+ case 3:
+ case 9:
+ case 11:
+ break;
+ default:
+ goto fail;
+ }
+ if (dpl < cpl || dpl < rpl) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return 0;
+ }
+ }
+ limit = get_seg_limit(e1, e2);
+ CC_SRC = eflags | CC_Z;
+ return limit;
+}
+
+target_ulong helper_lar(target_ulong selector1)
+{
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl, type;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_S_MASK) {
+ if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) {
+ /* conforming */
+ } else {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ }
+ } else {
+ type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+ switch(type) {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 9:
+ case 11:
+ case 12:
+ break;
+ default:
+ goto fail;
+ }
+ if (dpl < cpl || dpl < rpl) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return 0;
+ }
+ }
+ CC_SRC = eflags | CC_Z;
+#ifdef VBOX /* AMD says 0x00ffff00, while intel says 0x00fxff00. Bochs and IEM does like AMD says (x=f). */
+ return e2 & 0x00ffff00;
+#else
+ return e2 & 0x00f0ff00;
+#endif
+}
+
+void helper_verr(target_ulong selector1)
+{
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ if (!(e2 & DESC_S_MASK))
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_CS_MASK) {
+ if (!(e2 & DESC_R_MASK))
+ goto fail;
+ if (!(e2 & DESC_C_MASK)) {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ }
+ } else {
+ if (dpl < cpl || dpl < rpl) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return;
+ }
+ }
+ CC_SRC = eflags | CC_Z;
+}
+
+void helper_verw(target_ulong selector1)
+{
+ uint32_t e1, e2, eflags, selector;
+ int rpl, dpl, cpl;
+
+ selector = selector1 & 0xffff;
+ eflags = helper_cc_compute_all(CC_OP);
+ if ((selector & 0xfffc) == 0)
+ goto fail;
+ if (load_segment(&e1, &e2, selector) != 0)
+ goto fail;
+ if (!(e2 & DESC_S_MASK))
+ goto fail;
+ rpl = selector & 3;
+ dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+ cpl = env->hflags & HF_CPL_MASK;
+ if (e2 & DESC_CS_MASK) {
+ goto fail;
+ } else {
+ if (dpl < cpl || dpl < rpl)
+ goto fail;
+ if (!(e2 & DESC_W_MASK)) {
+ fail:
+ CC_SRC = eflags & ~CC_Z;
+ return;
+ }
+ }
+ CC_SRC = eflags | CC_Z;
+}
+
+/* x87 FPU helpers */
+
+static void fpu_set_exception(int mask)
+{
+ env->fpus |= mask;
+ if (env->fpus & (~env->fpuc & FPUC_EM))
+ env->fpus |= FPUS_SE | FPUS_B;
+}
+
+static inline CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b)
+{
+ if (b == 0.0)
+ fpu_set_exception(FPUS_ZE);
+ return a / b;
+}
+
+static void fpu_raise_exception(void)
+{
+ if (env->cr[0] & CR0_NE_MASK) {
+ raise_exception(EXCP10_COPR);
+ }
+#if !defined(CONFIG_USER_ONLY)
+ else {
+ cpu_set_ferr(env);
+ }
+#endif
+}
+
+void helper_flds_FT0(uint32_t val)
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.i = val;
+ FT0 = float32_to_floatx(u.f, &env->fp_status);
+}
+
+void helper_fldl_FT0(uint64_t val)
+{
+ union {
+ float64 f;
+ uint64_t i;
+ } u;
+ u.i = val;
+ FT0 = float64_to_floatx(u.f, &env->fp_status);
+}
+
+void helper_fildl_FT0(int32_t val)
+{
+ FT0 = int32_to_floatx(val, &env->fp_status);
+}
+
+void helper_flds_ST0(uint32_t val)
+{
+ int new_fpstt;
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ new_fpstt = (env->fpstt - 1) & 7;
+ u.i = val;
+ env->fpregs[new_fpstt].d = float32_to_floatx(u.f, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fldl_ST0(uint64_t val)
+{
+ int new_fpstt;
+ union {
+ float64 f;
+ uint64_t i;
+ } u;
+ new_fpstt = (env->fpstt - 1) & 7;
+ u.i = val;
+ env->fpregs[new_fpstt].d = float64_to_floatx(u.f, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildl_ST0(int32_t val)
+{
+ int new_fpstt;
+ new_fpstt = (env->fpstt - 1) & 7;
+ env->fpregs[new_fpstt].d = int32_to_floatx(val, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildll_ST0(int64_t val)
+{
+ int new_fpstt;
+ new_fpstt = (env->fpstt - 1) & 7;
+ env->fpregs[new_fpstt].d = int64_to_floatx(val, &env->fp_status);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+#ifndef VBOX
+uint32_t helper_fsts_ST0(void)
+#else
+RTCCUINTREG helper_fsts_ST0(void)
+#endif
+{
+ union {
+ float32 f;
+ uint32_t i;
+ } u;
+ u.f = floatx_to_float32(ST0, &env->fp_status);
+ return u.i;
+}
+
+uint64_t helper_fstl_ST0(void)
+{
+ union {
+ float64 f;
+ uint64_t i;
+ } u;
+ u.f = floatx_to_float64(ST0, &env->fp_status);
+ return u.i;
+}
+
+#ifndef VBOX
+int32_t helper_fist_ST0(void)
+#else
+RTCCINTREG helper_fist_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32(ST0, &env->fp_status);
+ if (val != (int16_t)val)
+ val = -32768;
+ return val;
+}
+
+#ifndef VBOX
+int32_t helper_fistl_ST0(void)
+#else
+RTCCINTREG helper_fistl_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32(ST0, &env->fp_status);
+ return val;
+}
+
+int64_t helper_fistll_ST0(void)
+{
+ int64_t val;
+ val = floatx_to_int64(ST0, &env->fp_status);
+ return val;
+}
+
+#ifndef VBOX
+int32_t helper_fistt_ST0(void)
+#else
+RTCCINTREG helper_fistt_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+ if (val != (int16_t)val)
+ val = -32768;
+ return val;
+}
+
+#ifndef VBOX
+int32_t helper_fisttl_ST0(void)
+#else
+RTCCINTREG helper_fisttl_ST0(void)
+#endif
+{
+ int32_t val;
+ val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+ return val;
+}
+
+int64_t helper_fisttll_ST0(void)
+{
+ int64_t val;
+ val = floatx_to_int64_round_to_zero(ST0, &env->fp_status);
+ return val;
+}
+
+void helper_fldt_ST0(target_ulong ptr)
+{
+ int new_fpstt;
+ new_fpstt = (env->fpstt - 1) & 7;
+ env->fpregs[new_fpstt].d = helper_fldt(ptr);
+ env->fpstt = new_fpstt;
+ env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fstt_ST0(target_ulong ptr)
+{
+ helper_fstt(ST0, ptr);
+}
+
+void helper_fpush(void)
+{
+ fpush();
+}
+
+void helper_fpop(void)
+{
+ fpop();
+}
+
+void helper_fdecstp(void)
+{
+ env->fpstt = (env->fpstt - 1) & 7;
+ env->fpus &= (~0x4700);
+}
+
+void helper_fincstp(void)
+{
+ env->fpstt = (env->fpstt + 1) & 7;
+ env->fpus &= (~0x4700);
+}
+
+/* FPU move */
+
+void helper_ffree_STN(int st_index)
+{
+ env->fptags[(env->fpstt + st_index) & 7] = 1;
+}
+
+void helper_fmov_ST0_FT0(void)
+{
+ ST0 = FT0;
+}
+
+void helper_fmov_FT0_STN(int st_index)
+{
+ FT0 = ST(st_index);
+}
+
+void helper_fmov_ST0_STN(int st_index)
+{
+ ST0 = ST(st_index);
+}
+
+void helper_fmov_STN_ST0(int st_index)
+{
+ ST(st_index) = ST0;
+}
+
+void helper_fxchg_ST0_STN(int st_index)
+{
+ CPU86_LDouble tmp;
+ tmp = ST(st_index);
+ ST(st_index) = ST0;
+ ST0 = tmp;
+}
+
+/* FPU operations */
+
+static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
+
+void helper_fcom_ST0_FT0(void)
+{
+ int ret;
+
+ ret = floatx_compare(ST0, FT0, &env->fp_status);
+ env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+}
+
+void helper_fucom_ST0_FT0(void)
+{
+ int ret;
+
+ ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+ env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
+}
+
+static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_fcomi_ST0_FT0(void)
+{
+ int eflags;
+ int ret;
+
+ ret = floatx_compare(ST0, FT0, &env->fp_status);
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+ CC_SRC = eflags;
+}
+
+void helper_fucomi_ST0_FT0(void)
+{
+ int eflags;
+ int ret;
+
+ ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+ eflags = helper_cc_compute_all(CC_OP);
+ eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+ CC_SRC = eflags;
+}
+
+void helper_fadd_ST0_FT0(void)
+{
+ ST0 += FT0;
+}
+
+void helper_fmul_ST0_FT0(void)
+{
+ ST0 *= FT0;
+}
+
+void helper_fsub_ST0_FT0(void)
+{
+ ST0 -= FT0;
+}
+
+void helper_fsubr_ST0_FT0(void)
+{
+ ST0 = FT0 - ST0;
+}
+
+void helper_fdiv_ST0_FT0(void)
+{
+ ST0 = helper_fdiv(ST0, FT0);
+}
+
+void helper_fdivr_ST0_FT0(void)
+{
+ ST0 = helper_fdiv(FT0, ST0);
+}
+
+/* fp operations between STN and ST0 */
+
+void helper_fadd_STN_ST0(int st_index)
+{
+ ST(st_index) += ST0;
+}
+
+void helper_fmul_STN_ST0(int st_index)
+{
+ ST(st_index) *= ST0;
+}
+
+void helper_fsub_STN_ST0(int st_index)
+{
+ ST(st_index) -= ST0;
+}
+
+void helper_fsubr_STN_ST0(int st_index)
+{
+ CPU86_LDouble *p;
+ p = &ST(st_index);
+ *p = ST0 - *p;
+}
+
+void helper_fdiv_STN_ST0(int st_index)
+{
+ CPU86_LDouble *p;
+ p = &ST(st_index);
+ *p = helper_fdiv(*p, ST0);
+}
+
+void helper_fdivr_STN_ST0(int st_index)
+{
+ CPU86_LDouble *p;
+ p = &ST(st_index);
+ *p = helper_fdiv(ST0, *p);
+}
+
+/* misc FPU operations */
+void helper_fchs_ST0(void)
+{
+ ST0 = floatx_chs(ST0);
+}
+
+void helper_fabs_ST0(void)
+{
+ ST0 = floatx_abs(ST0);
+}
+
+void helper_fld1_ST0(void)
+{
+ ST0 = f15rk[1];
+}
+
+void helper_fldl2t_ST0(void)
+{
+ ST0 = f15rk[6];
+}
+
+void helper_fldl2e_ST0(void)
+{
+ ST0 = f15rk[5];
+}
+
+void helper_fldpi_ST0(void)
+{
+ ST0 = f15rk[2];
+}
+
+void helper_fldlg2_ST0(void)
+{
+ ST0 = f15rk[3];
+}
+
+void helper_fldln2_ST0(void)
+{
+ ST0 = f15rk[4];
+}
+
+void helper_fldz_ST0(void)
+{
+ ST0 = f15rk[0];
+}
+
+void helper_fldz_FT0(void)
+{
+ FT0 = f15rk[0];
+}
+
+#ifndef VBOX
+uint32_t helper_fnstsw(void)
+#else
+RTCCUINTREG helper_fnstsw(void)
+#endif
+{
+ return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+}
+
+#ifndef VBOX
+uint32_t helper_fnstcw(void)
+#else
+RTCCUINTREG helper_fnstcw(void)
+#endif
+{
+ return env->fpuc;
+}
+
+static void update_fp_status(void)
+{
+ int rnd_type;
+
+ /* set rounding mode */
+ switch(env->fpuc & RC_MASK) {
+ default:
+ case RC_NEAR:
+ rnd_type = float_round_nearest_even;
+ break;
+ case RC_DOWN:
+ rnd_type = float_round_down;
+ break;
+ case RC_UP:
+ rnd_type = float_round_up;
+ break;
+ case RC_CHOP:
+ rnd_type = float_round_to_zero;
+ break;
+ }
+ set_float_rounding_mode(rnd_type, &env->fp_status);
+#ifdef FLOATX80
+ switch((env->fpuc >> 8) & 3) {
+ case 0:
+ rnd_type = 32;
+ break;
+ case 2:
+ rnd_type = 64;
+ break;
+ case 3:
+ default:
+ rnd_type = 80;
+ break;
+ }
+ set_floatx80_rounding_precision(rnd_type, &env->fp_status);
+#endif
+}
+
+void helper_fldcw(uint32_t val)
+{
+ env->fpuc = val;
+ update_fp_status();
+}
+
+void helper_fclex(void)
+{
+ env->fpus &= 0x7f00;
+}
+
+void helper_fwait(void)
+{
+ if (env->fpus & FPUS_SE)
+ fpu_raise_exception();
+}
+
+void helper_fninit(void)
+{
+ env->fpus = 0;
+ env->fpstt = 0;
+ env->fpuc = 0x37f;
+ env->fptags[0] = 1;
+ env->fptags[1] = 1;
+ env->fptags[2] = 1;
+ env->fptags[3] = 1;
+ env->fptags[4] = 1;
+ env->fptags[5] = 1;
+ env->fptags[6] = 1;
+ env->fptags[7] = 1;
+}
+
+/* BCD ops */
+
+void helper_fbld_ST0(target_ulong ptr)
+{
+ CPU86_LDouble tmp;
+ uint64_t val;
+ unsigned int v;
+ int i;
+
+ val = 0;
+ for(i = 8; i >= 0; i--) {
+ v = ldub(ptr + i);
+ val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
+ }
+ tmp = val;
+ if (ldub(ptr + 9) & 0x80)
+ tmp = -tmp;
+ fpush();
+ ST0 = tmp;
+}
+
+void helper_fbst_ST0(target_ulong ptr)
+{
+ int v;
+ target_ulong mem_ref, mem_end;
+ int64_t val;
+
+ val = floatx_to_int64(ST0, &env->fp_status);
+ mem_ref = ptr;
+ mem_end = mem_ref + 9;
+ if (val < 0) {
+ stb(mem_end, 0x80);
+ val = -val;
+ } else {
+ stb(mem_end, 0x00);
+ }
+ while (mem_ref < mem_end) {
+ if (val == 0)
+ break;
+ v = val % 100;
+ val = val / 100;
+ v = ((v / 10) << 4) | (v % 10);
+ stb(mem_ref++, v);
+ }
+ while (mem_ref < mem_end) {
+ stb(mem_ref++, 0);
+ }
+}
+
+void helper_f2xm1(void)
+{
+ ST0 = pow(2.0,ST0) - 1.0;
+}
+
+void helper_fyl2x(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if (fptemp>0.0){
+ fptemp = log(fptemp)/log(2.0); /* log2(ST) */
+ ST1 *= fptemp;
+ fpop();
+ } else {
+ env->fpus &= (~0x4700);
+ env->fpus |= 0x400;
+ }
+}
+
+void helper_fptan(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = tan(fptemp);
+ fpush();
+ ST0 = 1.0;
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**52 only */
+ }
+}
+
+void helper_fpatan(void)
+{
+ CPU86_LDouble fptemp, fpsrcop;
+
+ fpsrcop = ST1;
+ fptemp = ST0;
+ ST1 = atan2(fpsrcop,fptemp);
+ fpop();
+}
+
+void helper_fxtract(void)
+{
+ CPU86_LDoubleU temp;
+ unsigned int expdif;
+
+ temp.d = ST0;
+ expdif = EXPD(temp) - EXPBIAS;
+ /*DP exponent bias*/
+ ST0 = expdif;
+ fpush();
+ BIASEXPONENT(temp);
+ ST0 = temp.d;
+}
+
+void helper_fprem1(void)
+{
+ CPU86_LDouble dblq, fpsrcop, fptemp;
+ CPU86_LDoubleU fpsrcop1, fptemp1;
+ int expdif;
+ signed long long int q;
+
+#ifndef VBOX /* Unfortunately, we cannot handle isinf/isnan easily in wrapper */
+ if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
+#else
+ if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) {
+#endif
+ ST0 = 0.0 / 0.0; /* NaN */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ return;
+ }
+
+ fpsrcop = ST0;
+ fptemp = ST1;
+ fpsrcop1.d = fpsrcop;
+ fptemp1.d = fptemp;
+ expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+ if (expdif < 0) {
+ /* optimisation? taken from the AMD docs */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* ST0 is unchanged */
+ return;
+ }
+
+ if (expdif < 53) {
+ dblq = fpsrcop / fptemp;
+ /* round dblq towards nearest integer */
+ dblq = rint(dblq);
+ ST0 = fpsrcop - fptemp * dblq;
+
+ /* convert dblq to q by truncating towards zero */
+ if (dblq < 0.0)
+ q = (signed long long int)(-dblq);
+ else
+ q = (signed long long int)dblq;
+
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* (C0,C3,C1) <-- (q2,q1,q0) */
+ env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
+ env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+ env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
+ } else {
+ env->fpus |= 0x400; /* C2 <-- 1 */
+ fptemp = pow(2.0, expdif - 50);
+ fpsrcop = (ST0 / ST1) / fptemp;
+ /* fpsrcop = integer obtained by chopping */
+ fpsrcop = (fpsrcop < 0.0) ?
+ -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+ ST0 -= (ST1 * fpsrcop * fptemp);
+ }
+}
+
+void helper_fprem(void)
+{
+ CPU86_LDouble dblq, fpsrcop, fptemp;
+ CPU86_LDoubleU fpsrcop1, fptemp1;
+ int expdif;
+ signed long long int q;
+
+#ifndef VBOX /* Unfortunately, we cannot easily handle isinf/isnan in wrapper */
+ if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
+#else
+ if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) {
+#endif
+ ST0 = 0.0 / 0.0; /* NaN */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ return;
+ }
+
+ fpsrcop = (CPU86_LDouble)ST0;
+ fptemp = (CPU86_LDouble)ST1;
+ fpsrcop1.d = fpsrcop;
+ fptemp1.d = fptemp;
+ expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+ if (expdif < 0) {
+ /* optimisation? taken from the AMD docs */
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* ST0 is unchanged */
+ return;
+ }
+
+ if ( expdif < 53 ) {
+ dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/;
+ /* round dblq towards zero */
+ dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
+ ST0 = fpsrcop/*ST0*/ - fptemp * dblq;
+
+ /* convert dblq to q by truncating towards zero */
+ if (dblq < 0.0)
+ q = (signed long long int)(-dblq);
+ else
+ q = (signed long long int)dblq;
+
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ /* (C0,C3,C1) <-- (q2,q1,q0) */
+ env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
+ env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+ env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
+ } else {
+ int N = 32 + (expdif % 32); /* as per AMD docs */
+ env->fpus |= 0x400; /* C2 <-- 1 */
+ fptemp = pow(2.0, (double)(expdif - N));
+ fpsrcop = (ST0 / ST1) / fptemp;
+ /* fpsrcop = integer obtained by chopping */
+ fpsrcop = (fpsrcop < 0.0) ?
+ -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+ ST0 -= (ST1 * fpsrcop * fptemp);
+ }
+}
+
+void helper_fyl2xp1(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp+1.0)>0.0) {
+ fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
+ ST1 *= fptemp;
+ fpop();
+ } else {
+ env->fpus &= (~0x4700);
+ env->fpus |= 0x400;
+ }
+}
+
+void helper_fsqrt(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if (fptemp<0.0) {
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ env->fpus |= 0x400;
+ }
+ ST0 = sqrt(fptemp);
+}
+
+void helper_fsincos(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = sin(fptemp);
+ fpush();
+ ST0 = cos(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**63 only */
+ }
+}
+
+void helper_frndint(void)
+{
+ ST0 = floatx_round_to_int(ST0, &env->fp_status);
+}
+
+void helper_fscale(void)
+{
+ ST0 = ldexp (ST0, (int)(ST1));
+}
+
+void helper_fsin(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = sin(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg| < 2**53 only */
+ }
+}
+
+void helper_fcos(void)
+{
+ CPU86_LDouble fptemp;
+
+ fptemp = ST0;
+ if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
+ env->fpus |= 0x400;
+ } else {
+ ST0 = cos(fptemp);
+ env->fpus &= (~0x400); /* C2 <-- 0 */
+ /* the above code is for |arg5 < 2**63 only */
+ }
+}
+
+void helper_fxam_ST0(void)
+{
+ CPU86_LDoubleU temp;
+ int expdif;
+
+ temp.d = ST0;
+
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
+ if (SIGND(temp))
+ env->fpus |= 0x200; /* C1 <-- 1 */
+
+ /* XXX: test fptags too */
+ expdif = EXPD(temp);
+ if (expdif == MAXEXPD) {
+#ifdef USE_X86LDOUBLE
+ if (MANTD(temp) == 0x8000000000000000ULL)
+#else
+ if (MANTD(temp) == 0)
+#endif
+ env->fpus |= 0x500 /*Infinity*/;
+ else
+ env->fpus |= 0x100 /*NaN*/;
+ } else if (expdif == 0) {
+ if (MANTD(temp) == 0)
+ env->fpus |= 0x4000 /*Zero*/;
+ else
+ env->fpus |= 0x4400 /*Denormal*/;
+ } else {
+ env->fpus |= 0x400;
+ }
+}
+
+void helper_fstenv(target_ulong ptr, int data32)
+{
+ int fpus, fptag, exp, i;
+ uint64_t mant;
+ CPU86_LDoubleU tmp;
+
+ fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for (i=7; i>=0; i--) {
+ fptag <<= 2;
+ if (env->fptags[i]) {
+ fptag |= 3;
+ } else {
+ tmp.d = env->fpregs[i].d;
+ exp = EXPD(tmp);
+ mant = MANTD(tmp);
+ if (exp == 0 && mant == 0) {
+ /* zero */
+ fptag |= 1;
+ } else if (exp == 0 || exp == MAXEXPD
+#ifdef USE_X86LDOUBLE
+ || (mant & (1LL << 63)) == 0
+#endif
+ ) {
+ /* NaNs, infinity, denormal */
+ fptag |= 2;
+ }
+ }
+ }
+ if (data32) {
+ /* 32 bit */
+ stl(ptr, env->fpuc);
+ stl(ptr + 4, fpus);
+ stl(ptr + 8, fptag);
+ stl(ptr + 12, 0); /* fpip */
+ stl(ptr + 16, 0); /* fpcs */
+ stl(ptr + 20, 0); /* fpoo */
+ stl(ptr + 24, 0); /* fpos */
+ } else {
+ /* 16 bit */
+ stw(ptr, env->fpuc);
+ stw(ptr + 2, fpus);
+ stw(ptr + 4, fptag);
+ stw(ptr + 6, 0);
+ stw(ptr + 8, 0);
+ stw(ptr + 10, 0);
+ stw(ptr + 12, 0);
+ }
+}
+
+void helper_fldenv(target_ulong ptr, int data32)
+{
+ int i, fpus, fptag;
+
+ if (data32) {
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 4);
+ fptag = lduw(ptr + 8);
+ }
+ else {
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 2);
+ fptag = lduw(ptr + 4);
+ }
+ env->fpstt = (fpus >> 11) & 7;
+ env->fpus = fpus & ~0x3800;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag & 3) == 3);
+ fptag >>= 2;
+ }
+}
+
+void helper_fsave(target_ulong ptr, int data32)
+{
+ CPU86_LDouble tmp;
+ int i;
+
+ helper_fstenv(ptr, data32);
+
+ ptr += (14 << data32);
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt(tmp, ptr);
+ ptr += 10;
+ }
+
+ /* fninit */
+ env->fpus = 0;
+ env->fpstt = 0;
+ env->fpuc = 0x37f;
+ env->fptags[0] = 1;
+ env->fptags[1] = 1;
+ env->fptags[2] = 1;
+ env->fptags[3] = 1;
+ env->fptags[4] = 1;
+ env->fptags[5] = 1;
+ env->fptags[6] = 1;
+ env->fptags[7] = 1;
+}
+
+void helper_frstor(target_ulong ptr, int data32)
+{
+ CPU86_LDouble tmp;
+ int i;
+
+ helper_fldenv(ptr, data32);
+ ptr += (14 << data32);
+
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt(ptr);
+ ST(i) = tmp;
+ ptr += 10;
+ }
+}
+
+void helper_fxsave(target_ulong ptr, int data64)
+{
+ int fpus, fptag, i, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ target_ulong addr;
+
+ /* The operand must be 16 byte aligned */
+ if (ptr & 0xf) {
+ raise_exception(EXCP0D_GPF);
+ }
+
+ fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for(i = 0; i < 8; i++) {
+ fptag |= (env->fptags[i] << i);
+ }
+ stw(ptr, env->fpuc);
+ stw(ptr + 2, fpus);
+ stw(ptr + 4, fptag ^ 0xff);
+#ifdef TARGET_X86_64
+ if (data64) {
+ stq(ptr + 0x08, 0); /* rip */
+ stq(ptr + 0x10, 0); /* rdp */
+ } else
+#endif
+ {
+ stl(ptr + 0x08, 0); /* eip */
+ stl(ptr + 0x0c, 0); /* sel */
+ stl(ptr + 0x10, 0); /* dp */
+ stl(ptr + 0x14, 0); /* sel */
+ }
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt(tmp, addr);
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it */
+ stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+ stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+ if (env->hflags & HF_CS64_MASK)
+ nb_xmm_regs = 16;
+ else
+ nb_xmm_regs = 8;
+ addr = ptr + 0xa0;
+ /* Fast FXSAVE leaves out the XMM registers */
+ if (!(env->efer & MSR_EFER_FFXSR)
+ || (env->hflags & HF_CPL_MASK)
+ || !(env->hflags & HF_LMA_MASK)) {
+ for(i = 0; i < nb_xmm_regs; i++) {
+ stq(addr, env->xmm_regs[i].XMM_Q(0));
+ stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+ addr += 16;
+ }
+ }
+ }
+}
+
+void helper_fxrstor(target_ulong ptr, int data64)
+{
+ int i, fpus, fptag, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ target_ulong addr;
+
+ /* The operand must be 16 byte aligned */
+ if (ptr & 0xf) {
+ raise_exception(EXCP0D_GPF);
+ }
+
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 2);
+ fptag = lduw(ptr + 4);
+ env->fpstt = (fpus >> 11) & 7;
+ env->fpus = fpus & ~0x3800;
+ fptag ^= 0xff;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag >> i) & 1);
+ }
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt(addr);
+ ST(i) = tmp;
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it */
+ env->mxcsr = ldl(ptr + 0x18);
+ //ldl(ptr + 0x1c);
+ if (env->hflags & HF_CS64_MASK)
+ nb_xmm_regs = 16;
+ else
+ nb_xmm_regs = 8;
+ addr = ptr + 0xa0;
+ /* Fast FXRESTORE leaves out the XMM registers */
+ if (!(env->efer & MSR_EFER_FFXSR)
+ || (env->hflags & HF_CPL_MASK)
+ || !(env->hflags & HF_LMA_MASK)) {
+ for(i = 0; i < nb_xmm_regs; i++) {
+#if !defined(VBOX) || __GNUC__ < 4
+ env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+ env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */
+# if 1
+ env->xmm_regs[i].XMM_L(0) = ldl(addr);
+ env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+ env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+ env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+# else
+ /* this works fine on Mac OS X, gcc 4.0.1 */
+ uint64_t u64 = ldq(addr);
+ env->xmm_regs[i].XMM_Q(0);
+ u64 = ldq(addr + 4);
+ env->xmm_regs[i].XMM_Q(1) = u64;
+# endif
+#endif
+ addr += 16;
+ }
+ }
+ }
+}
+
+#ifndef USE_X86LDOUBLE
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+ CPU86_LDoubleU temp;
+ int e;
+
+ temp.d = f;
+ /* mantissa */
+ *pmant = (MANTD(temp) << 11) | (1LL << 63);
+ /* exponent + sign */
+ e = EXPD(temp) - EXPBIAS + 16383;
+ e |= SIGND(temp) >> 16;
+ *pexp = e;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+ CPU86_LDoubleU temp;
+ int e;
+ uint64_t ll;
+
+ /* XXX: handle overflow ? */
+ e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
+ e |= (upper >> 4) & 0x800; /* sign */
+ ll = (mant >> 11) & ((1LL << 52) - 1);
+#ifdef __arm__
+ temp.l.upper = (e << 20) | (ll >> 32);
+ temp.l.lower = ll;
+#else
+ temp.ll = ll | ((uint64_t)e << 52);
+#endif
+ return temp.d;
+}
+
+#else
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+{
+ CPU86_LDoubleU temp;
+
+ temp.d = f;
+ *pmant = temp.l.lower;
+ *pexp = temp.l.upper;
+}
+
+CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+ CPU86_LDoubleU temp;
+
+ temp.l.upper = upper;
+ temp.l.lower = mant;
+ return temp.d;
+}
+#endif
+
+#ifdef TARGET_X86_64
+
+//#define DEBUG_MULDIV
+
+static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+ *plow += a;
+ /* carry test */
+ if (*plow < a)
+ (*phigh)++;
+ *phigh += b;
+}
+
+static void neg128(uint64_t *plow, uint64_t *phigh)
+{
+ *plow = ~ *plow;
+ *phigh = ~ *phigh;
+ add128(plow, phigh, 1, 0);
+}
+
+/* return TRUE if overflow */
+static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
+{
+ uint64_t q, r, a1, a0;
+ int i, qb, ab;
+
+ a0 = *plow;
+ a1 = *phigh;
+ if (a1 == 0) {
+ q = a0 / b;
+ r = a0 % b;
+ *plow = q;
+ *phigh = r;
+ } else {
+ if (a1 >= b)
+ return 1;
+ /* XXX: use a better algorithm */
+ for(i = 0; i < 64; i++) {
+ ab = a1 >> 63;
+ a1 = (a1 << 1) | (a0 >> 63);
+ if (ab || a1 >= b) {
+ a1 -= b;
+ qb = 1;
+ } else {
+ qb = 0;
+ }
+ a0 = (a0 << 1) | qb;
+ }
+#if defined(DEBUG_MULDIV)
+ printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
+ *phigh, *plow, b, a0, a1);
+#endif
+ *plow = a0;
+ *phigh = a1;
+ }
+ return 0;
+}
+
+/* return TRUE if overflow */
+static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
+{
+ int sa, sb;
+ sa = ((int64_t)*phigh < 0);
+ if (sa)
+ neg128(plow, phigh);
+ sb = (b < 0);
+ if (sb)
+ b = -b;
+ if (div64(plow, phigh, b) != 0)
+ return 1;
+ if (sa ^ sb) {
+ if (*plow > (1ULL << 63))
+ return 1;
+ *plow = - *plow;
+ } else {
+ if (*plow >= (1ULL << 63))
+ return 1;
+ }
+ if (sa)
+ *phigh = - *phigh;
+ return 0;
+}
+
+void helper_mulq_EAX_T0(target_ulong t0)
+{
+ uint64_t r0, r1;
+
+ mulu64(&r0, &r1, EAX, t0);
+ EAX = r0;
+ EDX = r1;
+ CC_DST = r0;
+ CC_SRC = r1;
+}
+
+void helper_imulq_EAX_T0(target_ulong t0)
+{
+ uint64_t r0, r1;
+
+ muls64(&r0, &r1, EAX, t0);
+ EAX = r0;
+ EDX = r1;
+ CC_DST = r0;
+ CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+}
+
+target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1)
+{
+ uint64_t r0, r1;
+
+ muls64(&r0, &r1, t0, t1);
+ CC_DST = r0;
+ CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+ return r0;
+}
+
+void helper_divq_EAX(target_ulong t0)
+{
+ uint64_t r0, r1;
+ if (t0 == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ r0 = EAX;
+ r1 = EDX;
+ if (div64(&r0, &r1, t0))
+ raise_exception(EXCP00_DIVZ);
+ EAX = r0;
+ EDX = r1;
+}
+
+void helper_idivq_EAX(target_ulong t0)
+{
+ uint64_t r0, r1;
+ if (t0 == 0) {
+ raise_exception(EXCP00_DIVZ);
+ }
+ r0 = EAX;
+ r1 = EDX;
+ if (idiv64(&r0, &r1, t0))
+ raise_exception(EXCP00_DIVZ);
+ EAX = r0;
+ EDX = r1;
+}
+#endif
+
+static void do_hlt(void)
+{
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+ env->halted = 1;
+ env->exception_index = EXCP_HLT;
+ cpu_loop_exit();
+}
+
+void helper_hlt(int next_eip_addend)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_HLT, 0);
+ EIP += next_eip_addend;
+
+ do_hlt();
+}
+
+void helper_monitor(target_ulong ptr)
+{
+#ifdef VBOX
+ if ((uint32_t)ECX > 1)
+ raise_exception(EXCP0D_GPF);
+#else /* !VBOX */
+ if ((uint32_t)ECX != 0)
+ raise_exception(EXCP0D_GPF);
+#endif /* !VBOX */
+ /* XXX: store address ? */
+ helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0);
+}
+
+void helper_mwait(int next_eip_addend)
+{
+ if ((uint32_t)ECX != 0)
+ raise_exception(EXCP0D_GPF);
+#ifdef VBOX
+ helper_hlt(next_eip_addend);
+#else /* !VBOX */
+ helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0);
+ EIP += next_eip_addend;
+
+ /* XXX: not complete but not completely erroneous */
+ if (env->cpu_index != 0 || env->next_cpu != NULL) {
+ /* more than one CPU: do not sleep because another CPU may
+ wake this one */
+ } else {
+ do_hlt();
+ }
+#endif /* !VBOX */
+}
+
+void helper_debug(void)
+{
+ env->exception_index = EXCP_DEBUG;
+ cpu_loop_exit();
+}
+
+void helper_reset_rf(void)
+{
+ env->eflags &= ~RF_MASK;
+}
+
+void helper_raise_interrupt(int intno, int next_eip_addend)
+{
+ raise_interrupt(intno, 1, 0, next_eip_addend);
+}
+
+void helper_raise_exception(int exception_index)
+{
+ raise_exception(exception_index);
+}
+
+void helper_cli(void)
+{
+ env->eflags &= ~IF_MASK;
+}
+
+void helper_sti(void)
+{
+ env->eflags |= IF_MASK;
+}
+
+#ifdef VBOX
+void helper_cli_vme(void)
+{
+ env->eflags &= ~VIF_MASK;
+}
+
+void helper_sti_vme(void)
+{
+ /* First check, then change eflags according to the AMD manual */
+ if (env->eflags & VIP_MASK) {
+ raise_exception(EXCP0D_GPF);
+ }
+ env->eflags |= VIF_MASK;
+}
+#endif /* VBOX */
+
+#if 0
+/* vm86plus instructions */
+void helper_cli_vm(void)
+{
+ env->eflags &= ~VIF_MASK;
+}
+
+void helper_sti_vm(void)
+{
+ env->eflags |= VIF_MASK;
+ if (env->eflags & VIP_MASK) {
+ raise_exception(EXCP0D_GPF);
+ }
+}
+#endif
+
+void helper_set_inhibit_irq(void)
+{
+ env->hflags |= HF_INHIBIT_IRQ_MASK;
+}
+
+void helper_reset_inhibit_irq(void)
+{
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+}
+
+void helper_boundw(target_ulong a0, int v)
+{
+ int low, high;
+ low = ldsw(a0);
+ high = ldsw(a0 + 2);
+ v = (int16_t)v;
+ if (v < low || v > high) {
+ raise_exception(EXCP05_BOUND);
+ }
+}
+
+void helper_boundl(target_ulong a0, int v)
+{
+ int low, high;
+ low = ldl(a0);
+ high = ldl(a0 + 4);
+ if (v < low || v > high) {
+ raise_exception(EXCP05_BOUND);
+ }
+}
+
+static float approx_rsqrt(float a)
+{
+ return 1.0 / sqrt(a);
+}
+
+static float approx_rcp(float a)
+{
+ return 1.0 / a;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define MMUSUFFIX _mmu
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+#endif
+
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+/* This code assumes real physical address always fit into host CPU reg,
+ which is wrong in general, but true for our current use cases. */
+RTCCUINTREG REGPARM __ldb_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadS8(addr);
+}
+RTCCUINTREG REGPARM __ldub_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU8(addr);
+}
+void REGPARM __stb_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val)
+{
+ remR3PhysWriteU8(addr, val);
+}
+RTCCUINTREG REGPARM __ldw_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadS16(addr);
+}
+RTCCUINTREG REGPARM __lduw_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU16(addr);
+}
+void REGPARM __stw_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val)
+{
+ remR3PhysWriteU16(addr, val);
+}
+RTCCUINTREG REGPARM __ldl_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadS32(addr);
+}
+RTCCUINTREG REGPARM __ldul_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU32(addr);
+}
+void REGPARM __stl_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val)
+{
+ remR3PhysWriteU32(addr, val);
+}
+uint64_t REGPARM __ldq_vbox_phys(RTCCUINTREG addr)
+{
+ return remR3PhysReadU64(addr);
+}
+void REGPARM __stq_vbox_phys(RTCCUINTREG addr, uint64_t val)
+{
+ remR3PhysWriteU64(addr, val);
+}
+#endif /* VBOX */
+
+#if !defined(CONFIG_USER_ONLY)
+/* try to fill the TLB and return an exception if error. If retaddr is
+ NULL, it means that the function was called in C code (i.e. not
+ from generated code or from helper.c) */
+/* XXX: fix it to restore all registers */
+void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
+{
+ TranslationBlock *tb;
+ int ret;
+ uintptr_t pc;
+ CPUX86State *saved_env;
+
+ /* XXX: hack to restore env in all cases, even if not called from
+ generated code */
+ saved_env = env;
+ env = cpu_single_env;
+
+ ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
+ if (ret) {
+ if (retaddr) {
+ /* now we have a real cpu fault */
+ pc = (uintptr_t)retaddr;
+ tb = tb_find_pc(pc);
+ if (tb) {
+ /* the PC is inside the translated code. It means that we have
+ a virtual CPU fault */
+ cpu_restore_state(tb, env, pc, NULL);
+ }
+ }
+ raise_exception_err(env->exception_index, env->error_code);
+ }
+ env = saved_env;
+}
+#endif
+
+#ifdef VBOX
+
+/**
+ * Correctly computes the eflags.
+ * @returns eflags.
+ * @param env1 CPU environment.
+ */
+uint32_t raw_compute_eflags(CPUX86State *env1)
+{
+ CPUX86State *savedenv = env;
+ uint32_t efl;
+ env = env1;
+ efl = compute_eflags();
+ env = savedenv;
+ return efl;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ */
+uint8_t read_byte(CPUX86State *env1, target_ulong addr)
+{
+ CPUX86State *savedenv = env;
+ uint8_t u8;
+ env = env1;
+ u8 = ldub_kernel(addr);
+ env = savedenv;
+ return u8;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ */
+uint16_t read_word(CPUX86State *env1, target_ulong addr)
+{
+ CPUX86State *savedenv = env;
+ uint16_t u16;
+ env = env1;
+ u16 = lduw_kernel(addr);
+ env = savedenv;
+ return u16;
+}
+
+/**
+ * Reads byte from virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ */
+uint32_t read_dword(CPUX86State *env1, target_ulong addr)
+{
+ CPUX86State *savedenv = env;
+ uint32_t u32;
+ env = env1;
+ u32 = ldl_kernel(addr);
+ env = savedenv;
+ return u32;
+}
+
+/**
+ * Writes byte to virtual address in guest memory area.
+ * XXX: is it working for any addresses? swapped out pages?
+ * @returns read data byte.
+ * @param env1 CPU environment.
+ * @param pvAddr GC Virtual address.
+ * @param val byte value
+ */
+void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val)
+{
+ CPUX86State *savedenv = env;
+ env = env1;
+ stb(addr, val);
+ env = savedenv;
+}
+
+void write_word(CPUX86State *env1, target_ulong addr, uint16_t val)
+{
+ CPUX86State *savedenv = env;
+ env = env1;
+ stw(addr, val);
+ env = savedenv;
+}
+
+void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val)
+{
+ CPUX86State *savedenv = env;
+ env = env1;
+ stl(addr, val);
+ env = savedenv;
+}
+
+/**
+ * Correctly loads selector into segment register with updating internal
+ * qemu data/caches.
+ * @param env1 CPU environment.
+ * @param seg_reg Segment register.
+ * @param selector Selector to load.
+ */
+void sync_seg(CPUX86State *env1, int seg_reg, int selector)
+{
+ CPUX86State *savedenv = env;
+#ifdef FORCE_SEGMENT_SYNC
+ jmp_buf old_buf;
+#endif
+
+ env = env1;
+
+ if ( env->eflags & X86_EFL_VM
+ || !(env->cr[0] & X86_CR0_PE))
+ {
+ load_seg_vm(seg_reg, selector);
+
+ env = savedenv;
+
+ /* Successful sync. */
+ Assert(env1->segs[seg_reg].newselector == 0);
+ }
+ else
+ {
+ /* For some reasons, it works even w/o save/restore of the jump buffer, so as code is
+ time critical - let's not do that */
+#ifdef FORCE_SEGMENT_SYNC
+ memcpy(&old_buf, &env1->jmp_env, sizeof(old_buf));
+#endif
+ if (setjmp(env1->jmp_env) == 0)
+ {
+ if (seg_reg == R_CS)
+ {
+ uint32_t e1, e2;
+ e1 = e2 = 0;
+ load_segment(&e1, &e2, selector);
+ cpu_x86_load_seg_cache(env, R_CS, selector,
+ get_seg_base(e1, e2),
+ get_seg_limit(e1, e2),
+ e2);
+ }
+ else
+ helper_load_seg(seg_reg, selector);
+ /* We used to use tss_load_seg(seg_reg, selector); which, for some reasons ignored
+ loading 0 selectors, what, in order, lead to subtle problems like #3588 */
+
+ env = savedenv;
+
+ /* Successful sync. */
+ Assert(env1->segs[seg_reg].newselector == 0);
+ }
+ else
+ {
+ env = savedenv;
+
+ /* Postpone sync until the guest uses the selector. */
+ env1->segs[seg_reg].selector = selector; /* hidden values are now incorrect, but will be resynced when this register is accessed. */
+ env1->segs[seg_reg].newselector = selector;
+ Log(("sync_seg: out of sync seg_reg=%d selector=%#x\n", seg_reg, selector));
+ env1->exception_index = -1;
+ env1->error_code = 0;
+ env1->old_exception = -1;
+ }
+#ifdef FORCE_SEGMENT_SYNC
+ memcpy(&env1->jmp_env, &old_buf, sizeof(old_buf));
+#endif
+ }
+
+}
+
+DECLINLINE(void) tb_reset_jump(TranslationBlock *tb, int n)
+{
+ tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
+}
+
+
+int emulate_single_instr(CPUX86State *env1)
+{
+ TranslationBlock *tb;
+ TranslationBlock *current;
+ int flags;
+ uint8_t *tc_ptr;
+ target_ulong old_eip;
+
+ /* ensures env is loaded! */
+ CPUX86State *savedenv = env;
+ env = env1;
+
+ RAWEx_ProfileStart(env, STATS_EMULATE_SINGLE_INSTR);
+
+ current = env->current_tb;
+ env->current_tb = NULL;
+ flags = env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
+
+ /*
+ * Translate only one instruction.
+ */
+ ASMAtomicOrU32(&env->state, CPU_EMULATE_SINGLE_INSTR);
+ tb = tb_gen_code(env, env->eip + env->segs[R_CS].base,
+ env->segs[R_CS].base, flags, 0);
+
+ ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR);
+
+
+ /* tb_link_phys: */
+ tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2);
+ tb->jmp_next[0] = NULL;
+ tb->jmp_next[1] = NULL;
+ Assert(tb->jmp_next[0] == NULL);
+ Assert(tb->jmp_next[1] == NULL);
+ if (tb->tb_next_offset[0] != 0xffff)
+ tb_reset_jump(tb, 0);
+ if (tb->tb_next_offset[1] != 0xffff)
+ tb_reset_jump(tb, 1);
+
+ /*
+ * Execute it using emulation
+ */
+ old_eip = env->eip;
+ env->current_tb = tb;
+
+ /*
+ * eip remains the same for repeated instructions; no idea why qemu doesn't do a jump inside the generated code
+ * perhaps not a very safe hack
+ */
+ while (old_eip == env->eip)
+ {
+ tc_ptr = tb->tc_ptr;
+
+#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM)
+ int fake_ret;
+ tcg_qemu_tb_exec(tc_ptr, fake_ret);
+#else
+ tcg_qemu_tb_exec(tc_ptr);
+#endif
+
+ /*
+ * Exit once we detect an external interrupt and interrupts are enabled
+ */
+ if ( (env->interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_TIMER))
+ || ( (env->eflags & IF_MASK)
+ && !(env->hflags & HF_INHIBIT_IRQ_MASK)
+ && (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) )
+ )
+ {
+ break;
+ }
+ if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB) {
+ tlb_flush(env, true);
+ }
+ }
+ env->current_tb = current;
+
+ tb_phys_invalidate(tb, -1);
+ tb_free(tb);
+/*
+ Assert(tb->tb_next_offset[0] == 0xffff);
+ Assert(tb->tb_next_offset[1] == 0xffff);
+ Assert(tb->tb_next[0] == 0xffff);
+ Assert(tb->tb_next[1] == 0xffff);
+ Assert(tb->jmp_next[0] == NULL);
+ Assert(tb->jmp_next[1] == NULL);
+ Assert(tb->jmp_first == NULL); */
+
+ RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR);
+
+ /*
+ * Execute the next instruction when we encounter instruction fusing.
+ */
+ if (env->hflags & HF_INHIBIT_IRQ_MASK)
+ {
+ Log(("REM: Emulating next instruction due to instruction fusing (HF_INHIBIT_IRQ_MASK) at %RGv\n", env->eip));
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+ emulate_single_instr(env);
+ }
+
+ env = savedenv;
+ return 0;
+}
+
+/**
+ * Correctly loads a new ldtr selector.
+ *
+ * @param env1 CPU environment.
+ * @param selector Selector to load.
+ */
+void sync_ldtr(CPUX86State *env1, int selector)
+{
+ CPUX86State *saved_env = env;
+ if (setjmp(env1->jmp_env) == 0)
+ {
+ env = env1;
+ helper_lldt(selector);
+ env = saved_env;
+ }
+ else
+ {
+ env = saved_env;
+#ifdef VBOX_STRICT
+ cpu_abort(env1, "sync_ldtr: selector=%#x\n", selector);
+#endif
+ }
+}
+
+int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr,
+ uint32_t *esp_ptr, int dpl)
+{
+ int type, index, shift;
+
+ CPUX86State *savedenv = env;
+ env = env1;
+
+ if (!(env->tr.flags & DESC_P_MASK))
+ cpu_abort(env, "invalid tss");
+ type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf;
+ if ((type & 7) != 3)
+ cpu_abort(env, "invalid tss type %d", type);
+ shift = type >> 3;
+ index = (dpl * 4 + 2) << shift;
+ if (index + (4 << shift) - 1 > env->tr.limit)
+ {
+ env = savedenv;
+ return 0;
+ }
+ //raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
+
+ if (shift == 0) {
+ *esp_ptr = lduw_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 2);
+ } else {
+ *esp_ptr = ldl_kernel(env->tr.base + index);
+ *ss_ptr = lduw_kernel(env->tr.base + index + 4);
+ }
+
+ env = savedenv;
+ return 1;
+}
+
+//*****************************************************************************
+// Needs to be at the bottom of the file (overriding macros)
+
+static inline CPU86_LDouble helper_fldt_raw(uint8_t *ptr)
+{
+#ifdef USE_X86LDOUBLE
+ CPU86_LDoubleU tmp;
+ tmp.l.lower = *(uint64_t const *)ptr;
+ tmp.l.upper = *(uint16_t const *)(ptr + 8);
+ return tmp.d;
+#else
+# error "Busted FPU saving/restoring!"
+ return *(CPU86_LDouble *)ptr;
+#endif
+}
+
+static inline void helper_fstt_raw(CPU86_LDouble f, uint8_t *ptr)
+{
+#ifdef USE_X86LDOUBLE
+ CPU86_LDoubleU tmp;
+ tmp.d = f;
+ *(uint64_t *)(ptr + 0) = tmp.l.lower;
+ *(uint16_t *)(ptr + 8) = tmp.l.upper;
+ *(uint16_t *)(ptr + 10) = 0;
+ *(uint32_t *)(ptr + 12) = 0;
+ AssertCompile(sizeof(long double) > 8);
+#else
+# error "Busted FPU saving/restoring!"
+ *(CPU86_LDouble *)ptr = f;
+#endif
+}
+
+#undef stw
+#undef stl
+#undef stq
+#define stw(a,b) *(uint16_t *)(a) = (uint16_t)(b)
+#define stl(a,b) *(uint32_t *)(a) = (uint32_t)(b)
+#define stq(a,b) *(uint64_t *)(a) = (uint64_t)(b)
+
+//*****************************************************************************
+void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr)
+{
+ int fpus, fptag, i, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ uint8_t *addr;
+ int data64 = !!(env->hflags & HF_LMA_MASK);
+
+ if (env->cpuid_features & CPUID_FXSR)
+ {
+ fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for(i = 0; i < 8; i++) {
+ fptag |= (env->fptags[i] << i);
+ }
+ stw(ptr, env->fpuc);
+ stw(ptr + 2, fpus);
+ stw(ptr + 4, fptag ^ 0xff);
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt_raw(tmp, addr);
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it */
+ stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+ stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+ nb_xmm_regs = 8 << data64;
+ addr = ptr + 0xa0;
+ for(i = 0; i < nb_xmm_regs; i++) {
+#if __GNUC__ < 4
+ stq(addr, env->xmm_regs[i].XMM_Q(0));
+ stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */
+ stl(addr, env->xmm_regs[i].XMM_L(0));
+ stl(addr + 4, env->xmm_regs[i].XMM_L(1));
+ stl(addr + 8, env->xmm_regs[i].XMM_L(2));
+ stl(addr + 12, env->xmm_regs[i].XMM_L(3));
+#endif
+ addr += 16;
+ }
+ }
+ }
+ else
+ {
+ PX86FPUSTATE fp = (PX86FPUSTATE)ptr;
+ int fptag;
+
+ fp->FCW = env->fpuc;
+ fp->FSW = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+ fptag = 0;
+ for (i=7; i>=0; i--) {
+ fptag <<= 2;
+ if (env->fptags[i]) {
+ fptag |= 3;
+ } else {
+ /* the FPU automatically computes it */
+ }
+ }
+ fp->FTW = fptag;
+
+ for(i = 0;i < 8; i++) {
+ tmp = ST(i);
+ helper_fstt_raw(tmp, &fp->regs[i].au8[0]);
+ }
+ }
+}
+
+//*****************************************************************************
+#undef lduw
+#undef ldl
+#undef ldq
+#define lduw(a) *(uint16_t *)(a)
+#define ldl(a) *(uint32_t *)(a)
+#define ldq(a) *(uint64_t *)(a)
+//*****************************************************************************
+void save_raw_fp_state(CPUX86State *env, uint8_t *ptr)
+{
+ int i, fpus, fptag, nb_xmm_regs;
+ CPU86_LDouble tmp;
+ uint8_t *addr;
+ int data64 = !!(env->hflags & HF_LMA_MASK); /* don't use HF_CS64_MASK here as cs hasn't been synced when this function is called. */
+
+ if (env->cpuid_features & CPUID_FXSR)
+ {
+ env->fpuc = lduw(ptr);
+ fpus = lduw(ptr + 2);
+ fptag = lduw(ptr + 4);
+ env->fpstt = (fpus >> 11) & 7;
+ env->fpus = fpus & ~0x3800;
+ fptag ^= 0xff;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag >> i) & 1);
+ }
+
+ addr = ptr + 0x20;
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt_raw(addr);
+ ST(i) = tmp;
+ addr += 16;
+ }
+
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ /* XXX: finish it, endianness */
+ env->mxcsr = ldl(ptr + 0x18);
+ //ldl(ptr + 0x1c);
+ nb_xmm_regs = 8 << data64;
+ addr = ptr + 0xa0;
+ for(i = 0; i < nb_xmm_regs; i++) {
+#if HC_ARCH_BITS == 32
+ /* this is a workaround for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35135 */
+ env->xmm_regs[i].XMM_L(0) = ldl(addr);
+ env->xmm_regs[i].XMM_L(1) = ldl(addr + 4);
+ env->xmm_regs[i].XMM_L(2) = ldl(addr + 8);
+ env->xmm_regs[i].XMM_L(3) = ldl(addr + 12);
+#else
+ env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+ env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+#endif
+ addr += 16;
+ }
+ }
+ }
+ else
+ {
+ PX86FPUSTATE fp = (PX86FPUSTATE)ptr;
+ int fptag, j;
+
+ env->fpuc = fp->FCW;
+ env->fpstt = (fp->FSW >> 11) & 7;
+ env->fpus = fp->FSW & ~0x3800;
+ fptag = fp->FTW;
+ for(i = 0;i < 8; i++) {
+ env->fptags[i] = ((fptag & 3) == 3);
+ fptag >>= 2;
+ }
+ j = env->fpstt;
+ for(i = 0;i < 8; i++) {
+ tmp = helper_fldt_raw(&fp->regs[i].au8[0]);
+ ST(i) = tmp;
+ }
+ }
+}
+//*****************************************************************************
+//*****************************************************************************
+
+#endif /* VBOX */
+
+/* Secure Virtual Machine helpers */
+
+#if defined(CONFIG_USER_ONLY)
+
+void helper_vmrun(int aflag, int next_eip_addend)
+{
+}
+void helper_vmmcall(void)
+{
+}
+void helper_vmload(int aflag)
+{
+}
+void helper_vmsave(int aflag)
+{
+}
+void helper_stgi(void)
+{
+}
+void helper_clgi(void)
+{
+}
+void helper_skinit(void)
+{
+}
+void helper_invlpga(int aflag)
+{
+}
+void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
+{
+}
+void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
+{
+}
+
+void helper_svm_check_io(uint32_t port, uint32_t param,
+ uint32_t next_eip_addend)
+{
+}
+#else
+
+static inline void svm_save_seg(target_phys_addr_t addr,
+ const SegmentCache *sc)
+{
+ stw_phys(addr + offsetof(struct vmcb_seg, selector),
+ sc->selector);
+ stq_phys(addr + offsetof(struct vmcb_seg, base),
+ sc->base);
+ stl_phys(addr + offsetof(struct vmcb_seg, limit),
+ sc->limit);
+ stw_phys(addr + offsetof(struct vmcb_seg, attrib),
+ ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
+}
+
+static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc)
+{
+ unsigned int flags;
+
+ sc->selector = lduw_phys(addr + offsetof(struct vmcb_seg, selector));
+ sc->base = ldq_phys(addr + offsetof(struct vmcb_seg, base));
+ sc->limit = ldl_phys(addr + offsetof(struct vmcb_seg, limit));
+ flags = lduw_phys(addr + offsetof(struct vmcb_seg, attrib));
+ sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
+}
+
+static inline void svm_load_seg_cache(target_phys_addr_t addr,
+ CPUState *env, int seg_reg)
+{
+ SegmentCache sc1, *sc = &sc1;
+ svm_load_seg(addr, sc);
+ cpu_x86_load_seg_cache(env, seg_reg, sc->selector,
+ sc->base, sc->limit, sc->flags);
+}
+
+void helper_vmrun(int aflag, int next_eip_addend)
+{
+ target_ulong addr;
+ uint32_t event_inj;
+ uint32_t int_ctl;
+
+ helper_svm_check_intercept_param(SVM_EXIT_VMRUN, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr);
+
+ env->vm_vmcb = addr;
+
+ /* save the current CPU state in the hsave page */
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+ stl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+ stl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags());
+
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es),
+ &env->segs[R_ES]);
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs),
+ &env->segs[R_CS]);
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss),
+ &env->segs[R_SS]);
+ svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds),
+ &env->segs[R_DS]);
+
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip),
+ EIP + next_eip_addend);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp), ESP);
+ stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax), EAX);
+
+ /* load the interception bitmaps so we do not need to access the
+ vmcb in svm mode */
+ env->intercept = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept));
+ env->intercept_cr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read));
+ env->intercept_cr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write));
+ env->intercept_dr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read));
+ env->intercept_dr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write));
+ env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions));
+
+ /* enable intercepts */
+ env->hflags |= HF_SVMI_MASK;
+
+ env->tsc_offset = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset));
+
+ env->gdt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base));
+ env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit));
+
+ env->idt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base));
+ env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit));
+
+ /* clear exit_info_2 so we behave like the real hardware */
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
+
+ cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0)));
+ cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4)));
+ cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3)));
+ env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2));
+ int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+ env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+ if (int_ctl & V_INTR_MASKING_MASK) {
+ env->v_tpr = int_ctl & V_TPR_MASK;
+ env->hflags2 |= HF2_VINTR_MASK;
+ if (env->eflags & IF_MASK)
+ env->hflags2 |= HF2_HIF_MASK;
+ }
+
+ cpu_load_efer(env,
+ ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer)));
+ env->eflags = 0;
+ load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ CC_OP = CC_OP_EFLAGS;
+
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.es),
+ env, R_ES);
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.cs),
+ env, R_CS);
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ss),
+ env, R_SS);
+ svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ds),
+ env, R_DS);
+
+ EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip));
+ env->eip = EIP;
+ ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp));
+ EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax));
+ env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7));
+ env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
+ cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
+
+ /* FIXME: guest state consistency checks */
+
+ switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) {
+ case TLB_CONTROL_DO_NOTHING:
+ break;
+ case TLB_CONTROL_FLUSH_ALL_ASID:
+ /* FIXME: this is not 100% correct but should work for now */
+ tlb_flush(env, 1);
+ break;
+ }
+
+ env->hflags2 |= HF2_GIF_MASK;
+
+ if (int_ctl & V_IRQ_MASK) {
+ env->interrupt_request |= CPU_INTERRUPT_VIRQ;
+ }
+
+ /* maybe we need to inject an event */
+ event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+ if (event_inj & SVM_EVTINJ_VALID) {
+ uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
+ uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
+ uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
+ /* FIXME: need to implement valid_err */
+ switch (event_inj & SVM_EVTINJ_TYPE_MASK) {
+ case SVM_EVTINJ_TYPE_INTR:
+ env->exception_index = vector;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 0;
+ env->exception_next_eip = -1;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR");
+ /* XXX: is it always correct ? */
+ do_interrupt(vector, 0, 0, 0, 1);
+ break;
+ case SVM_EVTINJ_TYPE_NMI:
+ env->exception_index = EXCP02_NMI;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 0;
+ env->exception_next_eip = EIP;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI");
+ cpu_loop_exit();
+ break;
+ case SVM_EVTINJ_TYPE_EXEPT:
+ env->exception_index = vector;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 0;
+ env->exception_next_eip = -1;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT");
+ cpu_loop_exit();
+ break;
+ case SVM_EVTINJ_TYPE_SOFT:
+ env->exception_index = vector;
+ env->error_code = event_inj_err;
+ env->exception_is_int = 1;
+ env->exception_next_eip = EIP;
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT");
+ cpu_loop_exit();
+ break;
+ }
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code);
+ }
+}
+
+void helper_vmmcall(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_VMMCALL, 0);
+ raise_exception(EXCP06_ILLOP);
+}
+
+void helper_vmload(int aflag)
+{
+ target_ulong addr;
+ helper_svm_check_intercept_param(SVM_EXIT_VMLOAD, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+ addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+ env->segs[R_FS].base);
+
+ svm_load_seg_cache(addr + offsetof(struct vmcb, save.fs),
+ env, R_FS);
+ svm_load_seg_cache(addr + offsetof(struct vmcb, save.gs),
+ env, R_GS);
+ svm_load_seg(addr + offsetof(struct vmcb, save.tr),
+ &env->tr);
+ svm_load_seg(addr + offsetof(struct vmcb, save.ldtr),
+ &env->ldt);
+
+#ifdef TARGET_X86_64
+ env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base));
+ env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar));
+ env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar));
+ env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask));
+#endif
+ env->star = ldq_phys(addr + offsetof(struct vmcb, save.star));
+ env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs));
+ env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp));
+ env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip));
+}
+
+void helper_vmsave(int aflag)
+{
+ target_ulong addr;
+ helper_svm_check_intercept_param(SVM_EXIT_VMSAVE, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
+ addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
+ env->segs[R_FS].base);
+
+ svm_save_seg(addr + offsetof(struct vmcb, save.fs),
+ &env->segs[R_FS]);
+ svm_save_seg(addr + offsetof(struct vmcb, save.gs),
+ &env->segs[R_GS]);
+ svm_save_seg(addr + offsetof(struct vmcb, save.tr),
+ &env->tr);
+ svm_save_seg(addr + offsetof(struct vmcb, save.ldtr),
+ &env->ldt);
+
+#ifdef TARGET_X86_64
+ stq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase);
+ stq_phys(addr + offsetof(struct vmcb, save.lstar), env->lstar);
+ stq_phys(addr + offsetof(struct vmcb, save.cstar), env->cstar);
+ stq_phys(addr + offsetof(struct vmcb, save.sfmask), env->fmask);
+#endif
+ stq_phys(addr + offsetof(struct vmcb, save.star), env->star);
+ stq_phys(addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs);
+ stq_phys(addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp);
+ stq_phys(addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip);
+}
+
+void helper_stgi(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_STGI, 0);
+ env->hflags2 |= HF2_GIF_MASK;
+}
+
+void helper_clgi(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_CLGI, 0);
+ env->hflags2 &= ~HF2_GIF_MASK;
+}
+
+void helper_skinit(void)
+{
+ helper_svm_check_intercept_param(SVM_EXIT_SKINIT, 0);
+ /* XXX: not implemented */
+ raise_exception(EXCP06_ILLOP);
+}
+
+void helper_invlpga(int aflag)
+{
+ target_ulong addr;
+ helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0);
+
+ if (aflag == 2)
+ addr = EAX;
+ else
+ addr = (uint32_t)EAX;
+
+ /* XXX: could use the ASID to see if it is needed to do the
+ flush */
+ tlb_flush_page(env, addr);
+}
+
+void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
+{
+ if (likely(!(env->hflags & HF_SVMI_MASK)))
+ return;
+#ifndef VBOX
+ switch(type) {
+ case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8:
+ if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8:
+ if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7:
+ if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7:
+ if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31:
+ if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ case SVM_EXIT_MSR:
+ if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) {
+ /* FIXME: this should be read in at vmrun (faster this way?) */
+ uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa));
+ uint32_t t0, t1;
+ switch((uint32_t)ECX) {
+ case 0 ... 0x1fff:
+ t0 = (ECX * 2) % 8;
+ t1 = ECX / 8;
+ break;
+ case 0xc0000000 ... 0xc0001fff:
+ t0 = (8192 + ECX - 0xc0000000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ case 0xc0010000 ... 0xc0011fff:
+ t0 = (16384 + ECX - 0xc0010000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ default:
+ helper_vmexit(type, param);
+ t0 = 0;
+ t1 = 0;
+ break;
+ }
+ if (ldub_phys(addr + t1) & ((1 << param) << t0))
+ helper_vmexit(type, param);
+ }
+ break;
+ default:
+ if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) {
+ helper_vmexit(type, param);
+ }
+ break;
+ }
+#else /* VBOX */
+ AssertMsgFailed(("We shouldn't be here, HM supported differently!"));
+#endif /* VBOX */
+}
+
+void helper_svm_check_io(uint32_t port, uint32_t param,
+ uint32_t next_eip_addend)
+{
+ if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) {
+ /* FIXME: this should be read in at vmrun (faster this way?) */
+ uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa));
+ uint16_t mask = (1 << ((param >> 4) & 7)) - 1;
+ if(lduw_phys(addr + port / 8) & (mask << (port & 7))) {
+ /* next EIP */
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+ env->eip + next_eip_addend);
+ helper_vmexit(SVM_EXIT_IOIO, param | (port << 16));
+ }
+ }
+}
+
+/* Note: currently only 32 bits of exit_code are used */
+void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
+{
+ uint32_t int_ctl;
+
+ qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n",
+ exit_code, exit_info_1,
+ ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)),
+ EIP);
+
+ if(env->hflags & HF_INHIBIT_IRQ_MASK) {
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK);
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+ } else {
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
+ }
+
+ /* Save the VM state in the vmcb */
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es),
+ &env->segs[R_ES]);
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs),
+ &env->segs[R_CS]);
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss),
+ &env->segs[R_SS]);
+ svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds),
+ &env->segs[R_DS]);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base);
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
+
+ int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
+ int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK);
+ int_ctl |= env->v_tpr & V_TPR_MASK;
+ if (env->interrupt_request & CPU_INTERRUPT_VIRQ)
+ int_ctl |= V_IRQ_MASK;
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
+
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags), compute_eflags());
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp), ESP);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax), EAX);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]);
+ stb_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK);
+
+ /* Reload the host state from vm_hsave */
+ env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
+ env->hflags &= ~HF_SVMI_MASK;
+ env->intercept = 0;
+ env->intercept_exceptions = 0;
+ env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+ env->tsc_offset = 0;
+
+ env->gdt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base));
+ env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit));
+
+ env->idt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base));
+ env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit));
+
+ cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK);
+ cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4)));
+ cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
+ /* we need to set the efer after the crs so the hidden flags get
+ set properly */
+ cpu_load_efer(env,
+ ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer)));
+ env->eflags = 0;
+ load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
+ ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+ CC_OP = CC_OP_EFLAGS;
+
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.es),
+ env, R_ES);
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.cs),
+ env, R_CS);
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ss),
+ env, R_SS);
+ svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ds),
+ env, R_DS);
+
+ EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip));
+ ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp));
+ EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax));
+
+ env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6));
+ env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7));
+
+ /* other setups */
+ cpu_x86_set_cpl(env, 0);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
+ stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
+
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info),
+ ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err),
+ ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
+ stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), 0);
+
+ env->hflags2 &= ~HF2_GIF_MASK;
+ /* FIXME: Resets the current ASID register to zero (host ASID). */
+
+ /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
+
+ /* Clears the TSC_OFFSET inside the processor. */
+
+ /* If the host is in PAE mode, the processor reloads the host's PDPEs
+ from the page table indicated the host's CR3. If the PDPEs contain
+ illegal state, the processor causes a shutdown. */
+
+ /* Forces CR0.PE = 1, RFLAGS.VM = 0. */
+ env->cr[0] |= CR0_PE_MASK;
+ env->eflags &= ~VM_MASK;
+
+ /* Disables all breakpoints in the host DR7 register. */
+
+ /* Checks the reloaded host state for consistency. */
+
+ /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
+ host's code segment or non-canonical (in the case of long mode), a
+ #GP fault is delivered inside the host.) */
+
+ /* remove any pending exception */
+ env->exception_index = -1;
+ env->error_code = 0;
+ env->old_exception = -1;
+
+ cpu_loop_exit();
+}
+
+#endif
+
+/* MMX/SSE */
+/* XXX: optimize by storing fptt and fptags in the static cpu state */
+void helper_enter_mmx(void)
+{
+ env->fpstt = 0;
+ *(uint32_t *)(env->fptags) = 0;
+ *(uint32_t *)(env->fptags + 4) = 0;
+}
+
+void helper_emms(void)
+{
+ /* set to empty state */
+ *(uint32_t *)(env->fptags) = 0x01010101;
+ *(uint32_t *)(env->fptags + 4) = 0x01010101;
+}
+
+/* XXX: suppress */
+void helper_movq(void *d, void *s)
+{
+ *(uint64_t *)d = *(uint64_t *)s;
+}
+
+#define SHIFT 0
+#include "ops_sse.h"
+
+#define SHIFT 1
+#include "ops_sse.h"
+
+#define SHIFT 0
+#include "helper_template.h"
+#undef SHIFT
+
+#define SHIFT 1
+#include "helper_template.h"
+#undef SHIFT
+
+#define SHIFT 2
+#include "helper_template.h"
+#undef SHIFT
+
+#ifdef TARGET_X86_64
+
+#define SHIFT 3
+#include "helper_template.h"
+#undef SHIFT
+
+#endif
+
+/* bit operations */
+target_ulong helper_bsf(target_ulong t0)
+{
+ int count;
+ target_ulong res;
+
+ res = t0;
+ count = 0;
+ while ((res & 1) == 0) {
+ count++;
+ res >>= 1;
+ }
+ return count;
+}
+
+target_ulong helper_lzcnt(target_ulong t0, int wordsize)
+{
+ int count;
+ target_ulong res, mask;
+
+ if (wordsize > 0 && t0 == 0) {
+ return wordsize;
+ }
+ res = t0;
+ count = TARGET_LONG_BITS - 1;
+ mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
+ while ((res & mask) == 0) {
+ count--;
+ res <<= 1;
+ }
+ if (wordsize > 0) {
+ return wordsize - 1 - count;
+ }
+ return count;
+}
+
+target_ulong helper_bsr(target_ulong t0)
+{
+ return helper_lzcnt(t0, 0);
+}
+
+static int compute_all_eflags(void)
+{
+ return CC_SRC;
+}
+
+static int compute_c_eflags(void)
+{
+ return CC_SRC & CC_C;
+}
+
+uint32_t helper_cc_compute_all(int op)
+{
+ switch (op) {
+ default: /* should never happen */ return 0;
+
+ case CC_OP_EFLAGS: return compute_all_eflags();
+
+ case CC_OP_MULB: return compute_all_mulb();
+ case CC_OP_MULW: return compute_all_mulw();
+ case CC_OP_MULL: return compute_all_mull();
+
+ case CC_OP_ADDB: return compute_all_addb();
+ case CC_OP_ADDW: return compute_all_addw();
+ case CC_OP_ADDL: return compute_all_addl();
+
+ case CC_OP_ADCB: return compute_all_adcb();
+ case CC_OP_ADCW: return compute_all_adcw();
+ case CC_OP_ADCL: return compute_all_adcl();
+
+ case CC_OP_SUBB: return compute_all_subb();
+ case CC_OP_SUBW: return compute_all_subw();
+ case CC_OP_SUBL: return compute_all_subl();
+
+ case CC_OP_SBBB: return compute_all_sbbb();
+ case CC_OP_SBBW: return compute_all_sbbw();
+ case CC_OP_SBBL: return compute_all_sbbl();
+
+ case CC_OP_LOGICB: return compute_all_logicb();
+ case CC_OP_LOGICW: return compute_all_logicw();
+ case CC_OP_LOGICL: return compute_all_logicl();
+
+ case CC_OP_INCB: return compute_all_incb();
+ case CC_OP_INCW: return compute_all_incw();
+ case CC_OP_INCL: return compute_all_incl();
+
+ case CC_OP_DECB: return compute_all_decb();
+ case CC_OP_DECW: return compute_all_decw();
+ case CC_OP_DECL: return compute_all_decl();
+
+ case CC_OP_SHLB: return compute_all_shlb();
+ case CC_OP_SHLW: return compute_all_shlw();
+ case CC_OP_SHLL: return compute_all_shll();
+
+ case CC_OP_SARB: return compute_all_sarb();
+ case CC_OP_SARW: return compute_all_sarw();
+ case CC_OP_SARL: return compute_all_sarl();
+
+#ifdef TARGET_X86_64
+ case CC_OP_MULQ: return compute_all_mulq();
+
+ case CC_OP_ADDQ: return compute_all_addq();
+
+ case CC_OP_ADCQ: return compute_all_adcq();
+
+ case CC_OP_SUBQ: return compute_all_subq();
+
+ case CC_OP_SBBQ: return compute_all_sbbq();
+
+ case CC_OP_LOGICQ: return compute_all_logicq();
+
+ case CC_OP_INCQ: return compute_all_incq();
+
+ case CC_OP_DECQ: return compute_all_decq();
+
+ case CC_OP_SHLQ: return compute_all_shlq();
+
+ case CC_OP_SARQ: return compute_all_sarq();
+#endif
+ }
+}
+
+uint32_t helper_cc_compute_c(int op)
+{
+ switch (op) {
+ default: /* should never happen */ return 0;
+
+ case CC_OP_EFLAGS: return compute_c_eflags();
+
+ case CC_OP_MULB: return compute_c_mull();
+ case CC_OP_MULW: return compute_c_mull();
+ case CC_OP_MULL: return compute_c_mull();
+
+ case CC_OP_ADDB: return compute_c_addb();
+ case CC_OP_ADDW: return compute_c_addw();
+ case CC_OP_ADDL: return compute_c_addl();
+
+ case CC_OP_ADCB: return compute_c_adcb();
+ case CC_OP_ADCW: return compute_c_adcw();
+ case CC_OP_ADCL: return compute_c_adcl();
+
+ case CC_OP_SUBB: return compute_c_subb();
+ case CC_OP_SUBW: return compute_c_subw();
+ case CC_OP_SUBL: return compute_c_subl();
+
+ case CC_OP_SBBB: return compute_c_sbbb();
+ case CC_OP_SBBW: return compute_c_sbbw();
+ case CC_OP_SBBL: return compute_c_sbbl();
+
+ case CC_OP_LOGICB: return compute_c_logicb();
+ case CC_OP_LOGICW: return compute_c_logicw();
+ case CC_OP_LOGICL: return compute_c_logicl();
+
+ case CC_OP_INCB: return compute_c_incl();
+ case CC_OP_INCW: return compute_c_incl();
+ case CC_OP_INCL: return compute_c_incl();
+
+ case CC_OP_DECB: return compute_c_incl();
+ case CC_OP_DECW: return compute_c_incl();
+ case CC_OP_DECL: return compute_c_incl();
+
+ case CC_OP_SHLB: return compute_c_shlb();
+ case CC_OP_SHLW: return compute_c_shlw();
+ case CC_OP_SHLL: return compute_c_shll();
+
+ case CC_OP_SARB: return compute_c_sarl();
+ case CC_OP_SARW: return compute_c_sarl();
+ case CC_OP_SARL: return compute_c_sarl();
+
+#ifdef TARGET_X86_64
+ case CC_OP_MULQ: return compute_c_mull();
+
+ case CC_OP_ADDQ: return compute_c_addq();
+
+ case CC_OP_ADCQ: return compute_c_adcq();
+
+ case CC_OP_SUBQ: return compute_c_subq();
+
+ case CC_OP_SBBQ: return compute_c_sbbq();
+
+ case CC_OP_LOGICQ: return compute_c_logicq();
+
+ case CC_OP_INCQ: return compute_c_incl();
+
+ case CC_OP_DECQ: return compute_c_incl();
+
+ case CC_OP_SHLQ: return compute_c_shlq();
+
+ case CC_OP_SARQ: return compute_c_sarl();
+#endif
+ }
+}
diff --git a/src/recompiler/target-i386/ops_sse.h b/src/recompiler/target-i386/ops_sse.h
new file mode 100644
index 00000000..4c8b89e2
--- /dev/null
+++ b/src/recompiler/target-i386/ops_sse.h
@@ -0,0 +1,2111 @@
+/*
+ * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
+ *
+ * Copyright (c) 2005 Fabrice Bellard
+ * Copyright (c) 2008 Intel Corporation <andrew.zaborowski@intel.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#if SHIFT == 0
+#define Reg MMXReg
+#define XMM_ONLY(...)
+#define B(n) MMX_B(n)
+#define W(n) MMX_W(n)
+#define L(n) MMX_L(n)
+#define Q(n) q
+#define SUFFIX _mmx
+#else
+#define Reg XMMReg
+#define XMM_ONLY(...) __VA_ARGS__
+#define B(n) XMM_B(n)
+#define W(n) XMM_W(n)
+#define L(n) XMM_L(n)
+#define Q(n) XMM_Q(n)
+#define SUFFIX _xmm
+#endif
+
+void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 15) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->W(0) >>= shift;
+ d->W(1) >>= shift;
+ d->W(2) >>= shift;
+ d->W(3) >>= shift;
+#if SHIFT == 1
+ d->W(4) >>= shift;
+ d->W(5) >>= shift;
+ d->W(6) >>= shift;
+ d->W(7) >>= shift;
+#endif
+ }
+}
+
+void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 15) {
+ shift = 15;
+ } else {
+ shift = s->B(0);
+ }
+ d->W(0) = (int16_t)d->W(0) >> shift;
+ d->W(1) = (int16_t)d->W(1) >> shift;
+ d->W(2) = (int16_t)d->W(2) >> shift;
+ d->W(3) = (int16_t)d->W(3) >> shift;
+#if SHIFT == 1
+ d->W(4) = (int16_t)d->W(4) >> shift;
+ d->W(5) = (int16_t)d->W(5) >> shift;
+ d->W(6) = (int16_t)d->W(6) >> shift;
+ d->W(7) = (int16_t)d->W(7) >> shift;
+#endif
+}
+
+void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 15) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->W(0) <<= shift;
+ d->W(1) <<= shift;
+ d->W(2) <<= shift;
+ d->W(3) <<= shift;
+#if SHIFT == 1
+ d->W(4) <<= shift;
+ d->W(5) <<= shift;
+ d->W(6) <<= shift;
+ d->W(7) <<= shift;
+#endif
+ }
+}
+
+void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 31) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->L(0) >>= shift;
+ d->L(1) >>= shift;
+#if SHIFT == 1
+ d->L(2) >>= shift;
+ d->L(3) >>= shift;
+#endif
+ }
+}
+
+void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 31) {
+ shift = 31;
+ } else {
+ shift = s->B(0);
+ }
+ d->L(0) = (int32_t)d->L(0) >> shift;
+ d->L(1) = (int32_t)d->L(1) >> shift;
+#if SHIFT == 1
+ d->L(2) = (int32_t)d->L(2) >> shift;
+ d->L(3) = (int32_t)d->L(3) >> shift;
+#endif
+}
+
+void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 31) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->L(0) <<= shift;
+ d->L(1) <<= shift;
+#if SHIFT == 1
+ d->L(2) <<= shift;
+ d->L(3) <<= shift;
+#endif
+ }
+}
+
+void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 63) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->Q(0) >>= shift;
+#if SHIFT == 1
+ d->Q(1) >>= shift;
+#endif
+ }
+}
+
+void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift;
+
+ if (s->Q(0) > 63) {
+ d->Q(0) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+ } else {
+ shift = s->B(0);
+ d->Q(0) <<= shift;
+#if SHIFT == 1
+ d->Q(1) <<= shift;
+#endif
+ }
+}
+
+#if SHIFT == 1
+void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift, i;
+
+ shift = s->L(0);
+ if (shift > 16)
+ shift = 16;
+ for(i = 0; i < 16 - shift; i++)
+ d->B(i) = d->B(i + shift);
+ for(i = 16 - shift; i < 16; i++)
+ d->B(i) = 0;
+}
+
+void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s)
+{
+ int shift, i;
+
+ shift = s->L(0);
+ if (shift > 16)
+ shift = 16;
+ for(i = 15; i >= shift; i--)
+ d->B(i) = d->B(i - shift);
+ for(i = 0; i < shift; i++)
+ d->B(i) = 0;
+}
+#endif
+
+#define SSE_HELPER_B(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->B(0) = F(d->B(0), s->B(0));\
+ d->B(1) = F(d->B(1), s->B(1));\
+ d->B(2) = F(d->B(2), s->B(2));\
+ d->B(3) = F(d->B(3), s->B(3));\
+ d->B(4) = F(d->B(4), s->B(4));\
+ d->B(5) = F(d->B(5), s->B(5));\
+ d->B(6) = F(d->B(6), s->B(6));\
+ d->B(7) = F(d->B(7), s->B(7));\
+ XMM_ONLY(\
+ d->B(8) = F(d->B(8), s->B(8));\
+ d->B(9) = F(d->B(9), s->B(9));\
+ d->B(10) = F(d->B(10), s->B(10));\
+ d->B(11) = F(d->B(11), s->B(11));\
+ d->B(12) = F(d->B(12), s->B(12));\
+ d->B(13) = F(d->B(13), s->B(13));\
+ d->B(14) = F(d->B(14), s->B(14));\
+ d->B(15) = F(d->B(15), s->B(15));\
+ )\
+}
+
+#define SSE_HELPER_W(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->W(0) = F(d->W(0), s->W(0));\
+ d->W(1) = F(d->W(1), s->W(1));\
+ d->W(2) = F(d->W(2), s->W(2));\
+ d->W(3) = F(d->W(3), s->W(3));\
+ XMM_ONLY(\
+ d->W(4) = F(d->W(4), s->W(4));\
+ d->W(5) = F(d->W(5), s->W(5));\
+ d->W(6) = F(d->W(6), s->W(6));\
+ d->W(7) = F(d->W(7), s->W(7));\
+ )\
+}
+
+#define SSE_HELPER_L(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->L(0) = F(d->L(0), s->L(0));\
+ d->L(1) = F(d->L(1), s->L(1));\
+ XMM_ONLY(\
+ d->L(2) = F(d->L(2), s->L(2));\
+ d->L(3) = F(d->L(3), s->L(3));\
+ )\
+}
+
+#define SSE_HELPER_Q(name, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->Q(0) = F(d->Q(0), s->Q(0));\
+ XMM_ONLY(\
+ d->Q(1) = F(d->Q(1), s->Q(1));\
+ )\
+}
+
+#if SHIFT == 0
+static inline int satub(int x)
+{
+ if (x < 0)
+ return 0;
+ else if (x > 255)
+ return 255;
+ else
+ return x;
+}
+
+static inline int satuw(int x)
+{
+ if (x < 0)
+ return 0;
+ else if (x > 65535)
+ return 65535;
+ else
+ return x;
+}
+
+static inline int satsb(int x)
+{
+ if (x < -128)
+ return -128;
+ else if (x > 127)
+ return 127;
+ else
+ return x;
+}
+
+static inline int satsw(int x)
+{
+ if (x < -32768)
+ return -32768;
+ else if (x > 32767)
+ return 32767;
+ else
+ return x;
+}
+
+#define FADD(a, b) ((a) + (b))
+#define FADDUB(a, b) satub((a) + (b))
+#define FADDUW(a, b) satuw((a) + (b))
+#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
+#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
+
+#define FSUB(a, b) ((a) - (b))
+#define FSUBUB(a, b) satub((a) - (b))
+#define FSUBUW(a, b) satuw((a) - (b))
+#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
+#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b))
+#define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
+#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
+#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
+#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
+
+#define FAND(a, b) (a) & (b)
+#define FANDN(a, b) ((~(a)) & (b))
+#define FOR(a, b) (a) | (b)
+#define FXOR(a, b) (a) ^ (b)
+
+#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0
+#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0
+#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0
+#define FCMPEQ(a, b) (a) == (b) ? -1 : 0
+
+#define FMULLW(a, b) (a) * (b)
+#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16
+#define FMULHUW(a, b) (a) * (b) >> 16
+#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
+
+#define FAVG(a, b) ((a) + (b) + 1) >> 1
+#endif
+
+SSE_HELPER_B(helper_paddb, FADD)
+SSE_HELPER_W(helper_paddw, FADD)
+SSE_HELPER_L(helper_paddl, FADD)
+SSE_HELPER_Q(helper_paddq, FADD)
+
+SSE_HELPER_B(helper_psubb, FSUB)
+SSE_HELPER_W(helper_psubw, FSUB)
+SSE_HELPER_L(helper_psubl, FSUB)
+SSE_HELPER_Q(helper_psubq, FSUB)
+
+SSE_HELPER_B(helper_paddusb, FADDUB)
+SSE_HELPER_B(helper_paddsb, FADDSB)
+SSE_HELPER_B(helper_psubusb, FSUBUB)
+SSE_HELPER_B(helper_psubsb, FSUBSB)
+
+SSE_HELPER_W(helper_paddusw, FADDUW)
+SSE_HELPER_W(helper_paddsw, FADDSW)
+SSE_HELPER_W(helper_psubusw, FSUBUW)
+SSE_HELPER_W(helper_psubsw, FSUBSW)
+
+SSE_HELPER_B(helper_pminub, FMINUB)
+SSE_HELPER_B(helper_pmaxub, FMAXUB)
+
+SSE_HELPER_W(helper_pminsw, FMINSW)
+SSE_HELPER_W(helper_pmaxsw, FMAXSW)
+
+SSE_HELPER_Q(helper_pand, FAND)
+SSE_HELPER_Q(helper_pandn, FANDN)
+SSE_HELPER_Q(helper_por, FOR)
+SSE_HELPER_Q(helper_pxor, FXOR)
+
+SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
+SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
+SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
+
+SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
+SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
+SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
+
+SSE_HELPER_W(helper_pmullw, FMULLW)
+#if SHIFT == 0
+SSE_HELPER_W(helper_pmulhrw, FMULHRW)
+#endif
+SSE_HELPER_W(helper_pmulhuw, FMULHUW)
+SSE_HELPER_W(helper_pmulhw, FMULHW)
+
+SSE_HELPER_B(helper_pavgb, FAVG)
+SSE_HELPER_W(helper_pavgw, FAVG)
+
+void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s)
+{
+ d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
+#if SHIFT == 1
+ d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
+#endif
+}
+
+void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s)
+{
+ int i;
+
+ for(i = 0; i < (2 << SHIFT); i++) {
+ d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) +
+ (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1);
+ }
+}
+
+#if SHIFT == 0
+static inline int abs1(int a)
+{
+ if (a < 0)
+ return -a;
+ else
+ return a;
+}
+#endif
+void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s)
+{
+ unsigned int val;
+
+ val = 0;
+ val += abs1(d->B(0) - s->B(0));
+ val += abs1(d->B(1) - s->B(1));
+ val += abs1(d->B(2) - s->B(2));
+ val += abs1(d->B(3) - s->B(3));
+ val += abs1(d->B(4) - s->B(4));
+ val += abs1(d->B(5) - s->B(5));
+ val += abs1(d->B(6) - s->B(6));
+ val += abs1(d->B(7) - s->B(7));
+ d->Q(0) = val;
+#if SHIFT == 1
+ val = 0;
+ val += abs1(d->B(8) - s->B(8));
+ val += abs1(d->B(9) - s->B(9));
+ val += abs1(d->B(10) - s->B(10));
+ val += abs1(d->B(11) - s->B(11));
+ val += abs1(d->B(12) - s->B(12));
+ val += abs1(d->B(13) - s->B(13));
+ val += abs1(d->B(14) - s->B(14));
+ val += abs1(d->B(15) - s->B(15));
+ d->Q(1) = val;
+#endif
+}
+
+void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s, target_ulong a0)
+{
+ int i;
+ for(i = 0; i < (8 << SHIFT); i++) {
+ if (s->B(i) & 0x80)
+ stb(a0 + i, d->B(i));
+ }
+}
+
+void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val)
+{
+ d->L(0) = val;
+ d->L(1) = 0;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+}
+
+#ifdef TARGET_X86_64
+void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val)
+{
+ d->Q(0) = val;
+#if SHIFT == 1
+ d->Q(1) = 0;
+#endif
+}
+#endif
+
+#if SHIFT == 0
+void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.W(0) = s->W(order & 3);
+ r.W(1) = s->W((order >> 2) & 3);
+ r.W(2) = s->W((order >> 4) & 3);
+ r.W(3) = s->W((order >> 6) & 3);
+ *d = r;
+}
+#else
+void helper_shufps(Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.L(0) = d->L(order & 3);
+ r.L(1) = d->L((order >> 2) & 3);
+ r.L(2) = s->L((order >> 4) & 3);
+ r.L(3) = s->L((order >> 6) & 3);
+ *d = r;
+}
+
+void helper_shufpd(Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.Q(0) = d->Q(order & 1);
+ r.Q(1) = s->Q((order >> 1) & 1);
+ *d = r;
+}
+
+void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.L(0) = s->L(order & 3);
+ r.L(1) = s->L((order >> 2) & 3);
+ r.L(2) = s->L((order >> 4) & 3);
+ r.L(3) = s->L((order >> 6) & 3);
+ *d = r;
+}
+
+void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.W(0) = s->W(order & 3);
+ r.W(1) = s->W((order >> 2) & 3);
+ r.W(2) = s->W((order >> 4) & 3);
+ r.W(3) = s->W((order >> 6) & 3);
+ r.Q(1) = s->Q(1);
+ *d = r;
+}
+
+void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order)
+{
+ Reg r;
+ r.Q(0) = s->Q(0);
+ r.W(4) = s->W(4 + (order & 3));
+ r.W(5) = s->W(4 + ((order >> 2) & 3));
+ r.W(6) = s->W(4 + ((order >> 4) & 3));
+ r.W(7) = s->W(4 + ((order >> 6) & 3));
+ *d = r;
+}
+#endif
+
+#if SHIFT == 1
+/* FPU ops */
+/* XXX: not accurate */
+
+#define SSE_HELPER_S(name, F)\
+void helper_ ## name ## ps (Reg *d, Reg *s)\
+{\
+ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+ d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+ d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+ d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void helper_ ## name ## ss (Reg *d, Reg *s)\
+{\
+ d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void helper_ ## name ## pd (Reg *d, Reg *s)\
+{\
+ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+ d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void helper_ ## name ## sd (Reg *d, Reg *s)\
+{\
+ d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)
+#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
+#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
+#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
+#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)
+#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
+#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
+
+SSE_HELPER_S(add, FPU_ADD)
+SSE_HELPER_S(sub, FPU_SUB)
+SSE_HELPER_S(mul, FPU_MUL)
+SSE_HELPER_S(div, FPU_DIV)
+SSE_HELPER_S(min, FPU_MIN)
+SSE_HELPER_S(max, FPU_MAX)
+SSE_HELPER_S(sqrt, FPU_SQRT)
+
+
+/* float to float conversions */
+void helper_cvtps2pd(Reg *d, Reg *s)
+{
+ float32 s0, s1;
+ s0 = s->XMM_S(0);
+ s1 = s->XMM_S(1);
+ d->XMM_D(0) = float32_to_float64(s0, &env->sse_status);
+ d->XMM_D(1) = float32_to_float64(s1, &env->sse_status);
+}
+
+void helper_cvtpd2ps(Reg *d, Reg *s)
+{
+ d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+ d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status);
+ d->Q(1) = 0;
+}
+
+void helper_cvtss2sd(Reg *d, Reg *s)
+{
+ d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status);
+}
+
+void helper_cvtsd2ss(Reg *d, Reg *s)
+{
+ d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status);
+}
+
+/* integer to float */
+void helper_cvtdq2ps(Reg *d, Reg *s)
+{
+ d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status);
+ d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status);
+ d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status);
+ d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status);
+}
+
+void helper_cvtdq2pd(Reg *d, Reg *s)
+{
+ int32_t l0, l1;
+ l0 = (int32_t)s->XMM_L(0);
+ l1 = (int32_t)s->XMM_L(1);
+ d->XMM_D(0) = int32_to_float64(l0, &env->sse_status);
+ d->XMM_D(1) = int32_to_float64(l1, &env->sse_status);
+}
+
+void helper_cvtpi2ps(XMMReg *d, MMXReg *s)
+{
+ d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
+ d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
+}
+
+void helper_cvtpi2pd(XMMReg *d, MMXReg *s)
+{
+ d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
+ d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
+}
+
+void helper_cvtsi2ss(XMMReg *d, uint32_t val)
+{
+ d->XMM_S(0) = int32_to_float32(val, &env->sse_status);
+}
+
+void helper_cvtsi2sd(XMMReg *d, uint32_t val)
+{
+ d->XMM_D(0) = int32_to_float64(val, &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+void helper_cvtsq2ss(XMMReg *d, uint64_t val)
+{
+ d->XMM_S(0) = int64_to_float32(val, &env->sse_status);
+}
+
+void helper_cvtsq2sd(XMMReg *d, uint64_t val)
+{
+ d->XMM_D(0) = int64_to_float64(val, &env->sse_status);
+}
+#endif
+
+/* float to integer */
+void helper_cvtps2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+ d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+ d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status);
+ d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status);
+}
+
+void helper_cvtpd2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+ d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+ d->XMM_Q(1) = 0;
+}
+
+void helper_cvtps2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status);
+ d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status);
+}
+
+void helper_cvtpd2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status);
+ d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status);
+}
+
+int32_t helper_cvtss2si(XMMReg *s)
+{
+ return float32_to_int32(s->XMM_S(0), &env->sse_status);
+}
+
+int32_t helper_cvtsd2si(XMMReg *s)
+{
+ return float64_to_int32(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+int64_t helper_cvtss2sq(XMMReg *s)
+{
+ return float32_to_int64(s->XMM_S(0), &env->sse_status);
+}
+
+int64_t helper_cvtsd2sq(XMMReg *s)
+{
+ return float64_to_int64(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+/* float to integer truncated */
+void helper_cvttps2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+ d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+ d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status);
+ d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status);
+}
+
+void helper_cvttpd2dq(XMMReg *d, XMMReg *s)
+{
+ d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+ d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+ d->XMM_Q(1) = 0;
+}
+
+void helper_cvttps2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+ d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status);
+}
+
+void helper_cvttpd2pi(MMXReg *d, XMMReg *s)
+{
+ d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+ d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status);
+}
+
+int32_t helper_cvttss2si(XMMReg *s)
+{
+ return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+int32_t helper_cvttsd2si(XMMReg *s)
+{
+ return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+
+#ifdef TARGET_X86_64
+int64_t helper_cvttss2sq(XMMReg *s)
+{
+ return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status);
+}
+
+int64_t helper_cvttsd2sq(XMMReg *s)
+{
+ return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status);
+}
+#endif
+
+void helper_rsqrtps(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+ d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
+ d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
+ d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
+}
+
+void helper_rsqrtss(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+}
+
+void helper_rcpps(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+ d->XMM_S(1) = approx_rcp(s->XMM_S(1));
+ d->XMM_S(2) = approx_rcp(s->XMM_S(2));
+ d->XMM_S(3) = approx_rcp(s->XMM_S(3));
+}
+
+void helper_rcpss(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+}
+
+static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
+{
+ uint64_t mask;
+
+ if (len == 0) {
+ mask = ~0LL;
+ } else {
+ mask = (1ULL << len) - 1;
+ }
+ return (src >> shift) & mask;
+}
+
+void helper_extrq_r(XMMReg *d, XMMReg *s)
+{
+ d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), s->XMM_B(1), s->XMM_B(0));
+}
+
+void helper_extrq_i(XMMReg *d, int index, int length)
+{
+ d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), index, length);
+}
+
+static inline uint64_t helper_insertq(uint64_t src, int shift, int len)
+{
+ uint64_t mask;
+
+ if (len == 0) {
+ mask = ~0ULL;
+ } else {
+ mask = (1ULL << len) - 1;
+ }
+ return (src & ~(mask << shift)) | ((src & mask) << shift);
+}
+
+void helper_insertq_r(XMMReg *d, XMMReg *s)
+{
+ d->XMM_Q(0) = helper_insertq(s->XMM_Q(0), s->XMM_B(9), s->XMM_B(8));
+}
+
+void helper_insertq_i(XMMReg *d, int index, int length)
+{
+ d->XMM_Q(0) = helper_insertq(d->XMM_Q(0), index, length);
+}
+
+void helper_haddps(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1);
+ r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3);
+ r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1);
+ r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3);
+ *d = r;
+}
+
+void helper_haddpd(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1);
+ r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1);
+ *d = r;
+}
+
+void helper_hsubps(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1);
+ r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3);
+ r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1);
+ r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3);
+ *d = r;
+}
+
+void helper_hsubpd(XMMReg *d, XMMReg *s)
+{
+ XMMReg r;
+ r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1);
+ r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1);
+ *d = r;
+}
+
+void helper_addsubps(XMMReg *d, XMMReg *s)
+{
+ d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0);
+ d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1);
+ d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2);
+ d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3);
+}
+
+void helper_addsubpd(XMMReg *d, XMMReg *s)
+{
+ d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0);
+ d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1);
+}
+
+/* XXX: unordered */
+#define SSE_HELPER_CMP(name, F)\
+void helper_ ## name ## ps (Reg *d, Reg *s)\
+{\
+ d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+ d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\
+ d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\
+ d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\
+}\
+\
+void helper_ ## name ## ss (Reg *d, Reg *s)\
+{\
+ d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\
+}\
+void helper_ ## name ## pd (Reg *d, Reg *s)\
+{\
+ d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+ d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\
+}\
+\
+void helper_ ## name ## sd (Reg *d, Reg *s)\
+{\
+ d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
+}
+
+#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
+#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
+
+SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
+SSE_HELPER_CMP(cmplt, FPU_CMPLT)
+SSE_HELPER_CMP(cmple, FPU_CMPLE)
+SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
+SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
+SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
+SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
+SSE_HELPER_CMP(cmpord, FPU_CMPORD)
+
+static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_ucomiss(Reg *d, Reg *s)
+{
+ int ret;
+ float32 s0, s1;
+
+ s0 = d->XMM_S(0);
+ s1 = s->XMM_S(0);
+ ret = float32_compare_quiet(s0, s1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_comiss(Reg *d, Reg *s)
+{
+ int ret;
+ float32 s0, s1;
+
+ s0 = d->XMM_S(0);
+ s1 = s->XMM_S(0);
+ ret = float32_compare(s0, s1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_ucomisd(Reg *d, Reg *s)
+{
+ int ret;
+ float64 d0, d1;
+
+ d0 = d->XMM_D(0);
+ d1 = s->XMM_D(0);
+ ret = float64_compare_quiet(d0, d1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+void helper_comisd(Reg *d, Reg *s)
+{
+ int ret;
+ float64 d0, d1;
+
+ d0 = d->XMM_D(0);
+ d1 = s->XMM_D(0);
+ ret = float64_compare(d0, d1, &env->sse_status);
+ CC_SRC = comis_eflags[ret + 1];
+}
+
+uint32_t helper_movmskps(Reg *s)
+{
+ int b0, b1, b2, b3;
+ b0 = s->XMM_L(0) >> 31;
+ b1 = s->XMM_L(1) >> 31;
+ b2 = s->XMM_L(2) >> 31;
+ b3 = s->XMM_L(3) >> 31;
+ return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
+}
+
+uint32_t helper_movmskpd(Reg *s)
+{
+ int b0, b1;
+ b0 = s->XMM_L(1) >> 31;
+ b1 = s->XMM_L(3) >> 31;
+ return b0 | (b1 << 1);
+}
+
+#endif
+
+uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s)
+{
+ uint32_t val;
+ val = 0;
+ val |= (s->B(0) >> 7);
+ val |= (s->B(1) >> 6) & 0x02;
+ val |= (s->B(2) >> 5) & 0x04;
+ val |= (s->B(3) >> 4) & 0x08;
+ val |= (s->B(4) >> 3) & 0x10;
+ val |= (s->B(5) >> 2) & 0x20;
+ val |= (s->B(6) >> 1) & 0x40;
+ val |= (s->B(7)) & 0x80;
+#if SHIFT == 1
+ val |= (s->B(8) << 1) & 0x0100;
+ val |= (s->B(9) << 2) & 0x0200;
+ val |= (s->B(10) << 3) & 0x0400;
+ val |= (s->B(11) << 4) & 0x0800;
+ val |= (s->B(12) << 5) & 0x1000;
+ val |= (s->B(13) << 6) & 0x2000;
+ val |= (s->B(14) << 7) & 0x4000;
+ val |= (s->B(15) << 8) & 0x8000;
+#endif
+ return val;
+}
+
+void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s)
+{
+ Reg r;
+
+ r.B(0) = satsb((int16_t)d->W(0));
+ r.B(1) = satsb((int16_t)d->W(1));
+ r.B(2) = satsb((int16_t)d->W(2));
+ r.B(3) = satsb((int16_t)d->W(3));
+#if SHIFT == 1
+ r.B(4) = satsb((int16_t)d->W(4));
+ r.B(5) = satsb((int16_t)d->W(5));
+ r.B(6) = satsb((int16_t)d->W(6));
+ r.B(7) = satsb((int16_t)d->W(7));
+#endif
+ r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0));
+ r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1));
+ r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2));
+ r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3));
+#if SHIFT == 1
+ r.B(12) = satsb((int16_t)s->W(4));
+ r.B(13) = satsb((int16_t)s->W(5));
+ r.B(14) = satsb((int16_t)s->W(6));
+ r.B(15) = satsb((int16_t)s->W(7));
+#endif
+ *d = r;
+}
+
+void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s)
+{
+ Reg r;
+
+ r.B(0) = satub((int16_t)d->W(0));
+ r.B(1) = satub((int16_t)d->W(1));
+ r.B(2) = satub((int16_t)d->W(2));
+ r.B(3) = satub((int16_t)d->W(3));
+#if SHIFT == 1
+ r.B(4) = satub((int16_t)d->W(4));
+ r.B(5) = satub((int16_t)d->W(5));
+ r.B(6) = satub((int16_t)d->W(6));
+ r.B(7) = satub((int16_t)d->W(7));
+#endif
+ r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0));
+ r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1));
+ r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2));
+ r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3));
+#if SHIFT == 1
+ r.B(12) = satub((int16_t)s->W(4));
+ r.B(13) = satub((int16_t)s->W(5));
+ r.B(14) = satub((int16_t)s->W(6));
+ r.B(15) = satub((int16_t)s->W(7));
+#endif
+ *d = r;
+}
+
+void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s)
+{
+ Reg r;
+
+ r.W(0) = satsw(d->L(0));
+ r.W(1) = satsw(d->L(1));
+#if SHIFT == 1
+ r.W(2) = satsw(d->L(2));
+ r.W(3) = satsw(d->L(3));
+#endif
+ r.W((2 << SHIFT) + 0) = satsw(s->L(0));
+ r.W((2 << SHIFT) + 1) = satsw(s->L(1));
+#if SHIFT == 1
+ r.W(6) = satsw(s->L(2));
+ r.W(7) = satsw(s->L(3));
+#endif
+ *d = r;
+}
+
+#define UNPCK_OP(base_name, base) \
+ \
+void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.B(0) = d->B((base << (SHIFT + 2)) + 0); \
+ r.B(1) = s->B((base << (SHIFT + 2)) + 0); \
+ r.B(2) = d->B((base << (SHIFT + 2)) + 1); \
+ r.B(3) = s->B((base << (SHIFT + 2)) + 1); \
+ r.B(4) = d->B((base << (SHIFT + 2)) + 2); \
+ r.B(5) = s->B((base << (SHIFT + 2)) + 2); \
+ r.B(6) = d->B((base << (SHIFT + 2)) + 3); \
+ r.B(7) = s->B((base << (SHIFT + 2)) + 3); \
+XMM_ONLY( \
+ r.B(8) = d->B((base << (SHIFT + 2)) + 4); \
+ r.B(9) = s->B((base << (SHIFT + 2)) + 4); \
+ r.B(10) = d->B((base << (SHIFT + 2)) + 5); \
+ r.B(11) = s->B((base << (SHIFT + 2)) + 5); \
+ r.B(12) = d->B((base << (SHIFT + 2)) + 6); \
+ r.B(13) = s->B((base << (SHIFT + 2)) + 6); \
+ r.B(14) = d->B((base << (SHIFT + 2)) + 7); \
+ r.B(15) = s->B((base << (SHIFT + 2)) + 7); \
+) \
+ *d = r; \
+} \
+ \
+void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.W(0) = d->W((base << (SHIFT + 1)) + 0); \
+ r.W(1) = s->W((base << (SHIFT + 1)) + 0); \
+ r.W(2) = d->W((base << (SHIFT + 1)) + 1); \
+ r.W(3) = s->W((base << (SHIFT + 1)) + 1); \
+XMM_ONLY( \
+ r.W(4) = d->W((base << (SHIFT + 1)) + 2); \
+ r.W(5) = s->W((base << (SHIFT + 1)) + 2); \
+ r.W(6) = d->W((base << (SHIFT + 1)) + 3); \
+ r.W(7) = s->W((base << (SHIFT + 1)) + 3); \
+) \
+ *d = r; \
+} \
+ \
+void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.L(0) = d->L((base << SHIFT) + 0); \
+ r.L(1) = s->L((base << SHIFT) + 0); \
+XMM_ONLY( \
+ r.L(2) = d->L((base << SHIFT) + 1); \
+ r.L(3) = s->L((base << SHIFT) + 1); \
+) \
+ *d = r; \
+} \
+ \
+XMM_ONLY( \
+void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s) \
+{ \
+ Reg r; \
+ \
+ r.Q(0) = d->Q(base); \
+ r.Q(1) = s->Q(base); \
+ *d = r; \
+} \
+)
+
+UNPCK_OP(l, 0)
+UNPCK_OP(h, 1)
+
+/* 3DNow! float ops */
+#if SHIFT == 0
+void helper_pi2fd(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
+ d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
+}
+
+void helper_pi2fw(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
+ d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
+}
+
+void helper_pf2id(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
+ d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pf2iw(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
+ d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
+}
+
+void helper_pfacc(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+ r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+ *d = r;
+}
+
+void helper_pfadd(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfcmpeq(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
+ d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfcmpge(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
+ d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfcmpgt(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
+ d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
+}
+
+void helper_pfmax(MMXReg *d, MMXReg *s)
+{
+ if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
+ d->MMX_S(0) = s->MMX_S(0);
+ if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
+ d->MMX_S(1) = s->MMX_S(1);
+}
+
+void helper_pfmin(MMXReg *d, MMXReg *s)
+{
+ if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
+ d->MMX_S(0) = s->MMX_S(0);
+ if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
+ d->MMX_S(1) = s->MMX_S(1);
+}
+
+void helper_pfmul(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfnacc(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+ r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+ *d = r;
+}
+
+void helper_pfpnacc(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
+ r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
+ *d = r;
+}
+
+void helper_pfrcp(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = approx_rcp(s->MMX_S(0));
+ d->MMX_S(1) = d->MMX_S(0);
+}
+
+void helper_pfrsqrt(MMXReg *d, MMXReg *s)
+{
+ d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
+ d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
+ d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
+ d->MMX_L(0) = d->MMX_L(1);
+}
+
+void helper_pfsub(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pfsubr(MMXReg *d, MMXReg *s)
+{
+ d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
+ d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
+}
+
+void helper_pswapd(MMXReg *d, MMXReg *s)
+{
+ MMXReg r;
+ r.MMX_L(0) = s->MMX_L(1);
+ r.MMX_L(1) = s->MMX_L(0);
+ *d = r;
+}
+#endif
+
+/* SSSE3 op helpers */
+void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s)
+{
+ int i;
+ Reg r;
+
+ for (i = 0; i < (8 << SHIFT); i++)
+ r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
+
+ *d = r;
+}
+
+void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
+ d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
+ XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
+ XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
+ d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
+ d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
+ XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
+ XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+}
+
+void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s)
+{
+ d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
+ XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
+ d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
+ XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+}
+
+void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
+ d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
+ XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
+ XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
+ d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
+ d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
+ XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
+ XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+}
+
+void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) +
+ (int8_t)s->B( 1) * (uint8_t)d->B( 1));
+ d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) +
+ (int8_t)s->B( 3) * (uint8_t)d->B( 3));
+ d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) +
+ (int8_t)s->B( 5) * (uint8_t)d->B( 5));
+ d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) +
+ (int8_t)s->B( 7) * (uint8_t)d->B( 7));
+#if SHIFT == 1
+ d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) +
+ (int8_t)s->B( 9) * (uint8_t)d->B( 9));
+ d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) +
+ (int8_t)s->B(11) * (uint8_t)d->B(11));
+ d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) +
+ (int8_t)s->B(13) * (uint8_t)d->B(13));
+ d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) +
+ (int8_t)s->B(15) * (uint8_t)d->B(15));
+#endif
+}
+
+void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1);
+ d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3);
+ XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5));
+ XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7));
+ d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1);
+ d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3);
+ XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5));
+ XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7));
+}
+
+void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s)
+{
+ d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1);
+ XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3));
+ d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1);
+ XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3));
+}
+
+void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1));
+ d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3));
+ XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5)));
+ XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7)));
+ d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1));
+ d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3));
+ XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5)));
+ XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7)));
+}
+
+#define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x
+#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x
+#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x
+SSE_HELPER_B(helper_pabsb, FABSB)
+SSE_HELPER_W(helper_pabsw, FABSW)
+SSE_HELPER_L(helper_pabsd, FABSL)
+
+#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15
+SSE_HELPER_W(helper_pmulhrsw, FMULHRSW)
+
+#define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d
+#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d
+#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d
+SSE_HELPER_B(helper_psignb, FSIGNB)
+SSE_HELPER_W(helper_psignw, FSIGNW)
+SSE_HELPER_L(helper_psignd, FSIGNL)
+
+void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift)
+{
+ Reg r;
+
+ /* XXX could be checked during translation */
+ if (shift >= (16 << SHIFT)) {
+ r.Q(0) = 0;
+ XMM_ONLY(r.Q(1) = 0);
+ } else {
+ shift <<= 3;
+#define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0)
+#if SHIFT == 0
+ r.Q(0) = SHR(s->Q(0), shift - 0) |
+ SHR(d->Q(0), shift - 64);
+#else
+ r.Q(0) = SHR(s->Q(0), shift - 0) |
+ SHR(s->Q(1), shift - 64) |
+ SHR(d->Q(0), shift - 128) |
+ SHR(d->Q(1), shift - 192);
+ r.Q(1) = SHR(s->Q(0), shift + 64) |
+ SHR(s->Q(1), shift - 0) |
+ SHR(d->Q(0), shift - 64) |
+ SHR(d->Q(1), shift - 128);
+#endif
+#undef SHR
+ }
+
+ *d = r;
+}
+
+#define XMM0 env->xmm_regs[0]
+
+#if SHIFT == 1
+#define SSE_HELPER_V(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\
+ d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\
+ if (num > 2) {\
+ d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\
+ d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\
+ if (num > 4) {\
+ d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\
+ d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\
+ d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\
+ d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\
+ if (num > 8) {\
+ d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\
+ d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\
+ d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\
+ d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\
+ d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\
+ d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\
+ d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\
+ d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\
+ }\
+ }\
+ }\
+}
+
+#define SSE_HELPER_I(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\
+{\
+ d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\
+ d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\
+ if (num > 2) {\
+ d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\
+ d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\
+ if (num > 4) {\
+ d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\
+ d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\
+ d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\
+ d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\
+ if (num > 8) {\
+ d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\
+ d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\
+ d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\
+ d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\
+ d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\
+ d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\
+ d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\
+ d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\
+ }\
+ }\
+ }\
+}
+
+/* SSE4.1 op helpers */
+#define FBLENDVB(d, s, m) (m & 0x80) ? s : d
+#define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d
+#define FBLENDVPD(d, s, m) (m & 0x8000000000000000LL) ? s : d
+SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB)
+SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS)
+SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD)
+
+void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s)
+{
+ uint64_t zf = (s->Q(0) & d->Q(0)) | (s->Q(1) & d->Q(1));
+ uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1));
+
+ CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C);
+}
+
+#define SSE_HELPER_F(name, elem, num, F)\
+void glue(name, SUFFIX) (Reg *d, Reg *s)\
+{\
+ d->elem(0) = F(0);\
+ d->elem(1) = F(1);\
+ if (num > 2) {\
+ d->elem(2) = F(2);\
+ d->elem(3) = F(3);\
+ if (num > 4) {\
+ d->elem(4) = F(4);\
+ d->elem(5) = F(5);\
+ d->elem(6) = F(6);\
+ d->elem(7) = F(7);\
+ }\
+ }\
+}
+
+SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B)
+SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W)
+SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W)
+SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L)
+SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B)
+SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B)
+SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B)
+SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W)
+SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W)
+SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L)
+
+void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s)
+{
+ d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0);
+ d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2);
+}
+
+#define FCMPEQQ(d, s) d == s ? -1 : 0
+SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ)
+
+void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satuw((int32_t) d->L(0));
+ d->W(1) = satuw((int32_t) d->L(1));
+ d->W(2) = satuw((int32_t) d->L(2));
+ d->W(3) = satuw((int32_t) d->L(3));
+ d->W(4) = satuw((int32_t) s->L(0));
+ d->W(5) = satuw((int32_t) s->L(1));
+ d->W(6) = satuw((int32_t) s->L(2));
+ d->W(7) = satuw((int32_t) s->L(3));
+}
+
+#define FMINSB(d, s) MIN((int8_t) d, (int8_t) s)
+#define FMINSD(d, s) MIN((int32_t) d, (int32_t) s)
+#define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s)
+#define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s)
+SSE_HELPER_B(helper_pminsb, FMINSB)
+SSE_HELPER_L(helper_pminsd, FMINSD)
+SSE_HELPER_W(helper_pminuw, MIN)
+SSE_HELPER_L(helper_pminud, MIN)
+SSE_HELPER_B(helper_pmaxsb, FMAXSB)
+SSE_HELPER_L(helper_pmaxsd, FMAXSD)
+SSE_HELPER_W(helper_pmaxuw, MAX)
+SSE_HELPER_L(helper_pmaxud, MAX)
+
+#define FMULLD(d, s) (int32_t) d * (int32_t) s
+SSE_HELPER_L(helper_pmulld, FMULLD)
+
+void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s)
+{
+ int idx = 0;
+
+ if (s->W(1) < s->W(idx))
+ idx = 1;
+ if (s->W(2) < s->W(idx))
+ idx = 2;
+ if (s->W(3) < s->W(idx))
+ idx = 3;
+ if (s->W(4) < s->W(idx))
+ idx = 4;
+ if (s->W(5) < s->W(idx))
+ idx = 5;
+ if (s->W(6) < s->W(idx))
+ idx = 6;
+ if (s->W(7) < s->W(idx))
+ idx = 7;
+
+ d->Q(1) = 0;
+ d->L(1) = 0;
+ d->W(1) = idx;
+ d->W(0) = s->W(idx);
+}
+
+void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+ d->L(1) = float64_round_to_int(s->L(1), &env->sse_status);
+ d->L(2) = float64_round_to_int(s->L(2), &env->sse_status);
+ d->L(3) = float64_round_to_int(s->L(3), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+ d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode)
+{
+ signed char prev_rounding_mode;
+
+ prev_rounding_mode = env->sse_status.float_rounding_mode;
+ if (!(mode & (1 << 2)))
+ switch (mode & 3) {
+ case 0:
+ set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
+ break;
+ case 1:
+ set_float_rounding_mode(float_round_down, &env->sse_status);
+ break;
+ case 2:
+ set_float_rounding_mode(float_round_up, &env->sse_status);
+ break;
+ case 3:
+ set_float_rounding_mode(float_round_to_zero, &env->sse_status);
+ break;
+ }
+
+ d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+
+#if 0 /* TODO */
+ if (mode & (1 << 3))
+ set_float_exception_flags(
+ get_float_exception_flags(&env->sse_status) &
+ ~float_flag_inexact,
+ &env->sse_status);
+#endif
+ env->sse_status.float_rounding_mode = prev_rounding_mode;
+}
+
+#define FBLENDP(d, s, m) m ? s : d
+SSE_HELPER_I(helper_blendps, L, 4, FBLENDP)
+SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP)
+SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
+
+void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+{
+ float32 iresult = 0 /*float32_zero*/;
+
+ if (mask & (1 << 4))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(0), s->L(0), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 5))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(1), s->L(1), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 6))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(2), s->L(2), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 7))
+ iresult = float32_add(iresult,
+ float32_mul(d->L(3), s->L(3), &env->sse_status),
+ &env->sse_status);
+ d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/;
+ d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/;
+ d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/;
+ d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/;
+}
+
+void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
+{
+ float64 iresult = 0 /*float64_zero*/;
+
+ if (mask & (1 << 4))
+ iresult = float64_add(iresult,
+ float64_mul(d->Q(0), s->Q(0), &env->sse_status),
+ &env->sse_status);
+ if (mask & (1 << 5))
+ iresult = float64_add(iresult,
+ float64_mul(d->Q(1), s->Q(1), &env->sse_status),
+ &env->sse_status);
+ d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/;
+ d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/;
+}
+
+void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset)
+{
+ int s0 = (offset & 3) << 2;
+ int d0 = (offset & 4) << 0;
+ int i;
+ Reg r;
+
+ for (i = 0; i < 8; i++, d0++) {
+ r.W(i) = 0;
+ r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0));
+ r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1));
+ r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2));
+ r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3));
+ }
+
+ *d = r;
+}
+
+/* SSE4.2 op helpers */
+/* it's unclear whether signed or unsigned */
+#define FCMPGTQ(d, s) d > s ? -1 : 0
+SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ)
+
+static inline int pcmp_elen(int reg, uint32_t ctrl)
+{
+ int val;
+
+ /* Presence of REX.W is indicated by a bit higher than 7 set */
+ if (ctrl >> 8)
+ val = abs1((int64_t) env->regs[reg]);
+ else
+ val = abs1((int32_t) env->regs[reg]);
+
+ if (ctrl & 1) {
+ if (val > 8)
+ return 8;
+ } else
+ if (val > 16)
+ return 16;
+
+ return val;
+}
+
+static inline int pcmp_ilen(Reg *r, uint8_t ctrl)
+{
+ int val = 0;
+
+ if (ctrl & 1) {
+ while (val < 8 && r->W(val))
+ val++;
+ } else
+ while (val < 16 && r->B(val))
+ val++;
+
+ return val;
+}
+
+static inline int pcmp_val(Reg *r, uint8_t ctrl, int i)
+{
+ switch ((ctrl >> 0) & 3) {
+ case 0:
+ return r->B(i);
+ case 1:
+ return r->W(i);
+ case 2:
+ return (int8_t) r->B(i);
+ case 3:
+ default:
+ return (int16_t) r->W(i);
+ }
+}
+
+static inline unsigned pcmpxstrx(Reg *d, Reg *s,
+ int8_t ctrl, int valids, int validd)
+{
+ unsigned int res = 0;
+ int v;
+ int j, i;
+ int upper = (ctrl & 1) ? 7 : 15;
+
+ valids--;
+ validd--;
+
+ CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0);
+
+ switch ((ctrl >> 2) & 3) {
+ case 0:
+ for (j = valids; j >= 0; j--) {
+ res <<= 1;
+ v = pcmp_val(s, ctrl, j);
+ for (i = validd; i >= 0; i--)
+ res |= (v == pcmp_val(d, ctrl, i));
+ }
+ break;
+ case 1:
+ for (j = valids; j >= 0; j--) {
+ res <<= 1;
+ v = pcmp_val(s, ctrl, j);
+ for (i = ((validd - 1) | 1); i >= 0; i -= 2)
+ res |= (pcmp_val(d, ctrl, i - 0) <= v &&
+ pcmp_val(d, ctrl, i - 1) >= v);
+ }
+ break;
+ case 2:
+ res = (2 << (upper - MAX(valids, validd))) - 1;
+ res <<= MAX(valids, validd) - MIN(valids, validd);
+ for (i = MIN(valids, validd); i >= 0; i--) {
+ res <<= 1;
+ v = pcmp_val(s, ctrl, i);
+ res |= (v == pcmp_val(d, ctrl, i));
+ }
+ break;
+ case 3:
+ for (j = valids - validd; j >= 0; j--) {
+ res <<= 1;
+ res |= 1;
+ for (i = MIN(upper - j, validd); i >= 0; i--)
+ res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
+ }
+ break;
+ }
+
+ switch ((ctrl >> 4) & 3) {
+ case 1:
+ res ^= (2 << upper) - 1;
+ break;
+ case 3:
+ res ^= (2 << valids) - 1;
+ break;
+ }
+
+ if (res)
+ CC_SRC |= CC_C;
+ if (res & 1)
+ CC_SRC |= CC_O;
+
+ return res;
+}
+
+static inline int rffs1(unsigned int val)
+{
+ int ret = 1, hi;
+
+ for (hi = sizeof(val) * 4; hi; hi /= 2)
+ if (val >> hi) {
+ val >>= hi;
+ ret += hi;
+ }
+
+ return ret;
+}
+
+static inline int ffs1(unsigned int val)
+{
+ int ret = 1, hi;
+
+ for (hi = sizeof(val) * 4; hi; hi /= 2)
+ if (val << hi) {
+ val <<= hi;
+ ret += hi;
+ }
+
+ return ret;
+}
+
+void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_elen(R_EDX, ctrl),
+ pcmp_elen(R_EAX, ctrl));
+
+ if (res)
+#ifndef VBOX
+ env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
+#else
+ env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1(res) : ffs1(res)) - 1;
+#endif
+ else
+ env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
+}
+
+void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ int i;
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_elen(R_EDX, ctrl),
+ pcmp_elen(R_EAX, ctrl));
+
+ if ((ctrl >> 6) & 1) {
+#ifndef VBOX
+ if (ctrl & 1)
+ for (i = 0; i <= 8; i--, res >>= 1)
+ d->W(i) = (res & 1) ? ~0 : 0;
+ else
+ for (i = 0; i <= 16; i--, res >>= 1)
+ d->B(i) = (res & 1) ? ~0 : 0;
+#else
+ if (ctrl & 1)
+ for (i = 0; i < 8; i++, res >>= 1) {
+ d->W(i) = (res & 1) ? ~0 : 0;
+ }
+ else
+ for (i = 0; i < 16; i++, res >>= 1) {
+ d->B(i) = (res & 1) ? ~0 : 0;
+ }
+#endif
+ } else {
+ d->Q(1) = 0;
+ d->Q(0) = res;
+ }
+}
+
+void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_ilen(s, ctrl),
+ pcmp_ilen(d, ctrl));
+
+ if (res)
+ env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1;
+ else
+ env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
+}
+
+void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl)
+{
+ int i;
+ unsigned int res = pcmpxstrx(d, s, ctrl,
+ pcmp_ilen(s, ctrl),
+ pcmp_ilen(d, ctrl));
+
+ if ((ctrl >> 6) & 1) {
+#ifndef VBOX
+ if (ctrl & 1)
+ for (i = 0; i <= 8; i--, res >>= 1)
+ d->W(i) = (res & 1) ? ~0 : 0;
+ else
+ for (i = 0; i <= 16; i--, res >>= 1)
+ d->B(i) = (res & 1) ? ~0 : 0;
+#else
+ if (ctrl & 1)
+ for (i = 0; i < 8; i++, res >>= 1) {
+ d->W(i) = (res & 1) ? ~0 : 0;
+ }
+ else
+ for (i = 0; i < 16; i++, res >>= 1) {
+ d->B(i) = (res & 1) ? ~0 : 0;
+ }
+#endif
+ } else {
+ d->Q(1) = 0;
+ d->Q(0) = res;
+ }
+}
+
+#define CRCPOLY 0x1edc6f41
+#define CRCPOLY_BITREV 0x82f63b78
+target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
+{
+ target_ulong crc = (msg & ((target_ulong) -1 >>
+ (TARGET_LONG_BITS - len))) ^ crc1;
+
+ while (len--)
+ crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0);
+
+ return crc;
+}
+
+#define POPMASK(i) ((target_ulong) -1 / ((1LL << (1 << i)) + 1))
+#define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i))
+target_ulong helper_popcnt(target_ulong n, uint32_t type)
+{
+ CC_SRC = n ? 0 : CC_Z;
+
+ n = POPCOUNT(n, 0);
+ n = POPCOUNT(n, 1);
+ n = POPCOUNT(n, 2);
+ n = POPCOUNT(n, 3);
+ if (type == 1)
+ return n & 0xff;
+
+ n = POPCOUNT(n, 4);
+#ifndef TARGET_X86_64
+ return n;
+#else
+ if (type == 2)
+ return n & 0xff;
+
+ return POPCOUNT(n, 5);
+#endif
+}
+#endif
+
+#undef SHIFT
+#undef XMM_ONLY
+#undef Reg
+#undef B
+#undef W
+#undef L
+#undef Q
+#undef SUFFIX
diff --git a/src/recompiler/target-i386/ops_sse_header.h b/src/recompiler/target-i386/ops_sse_header.h
new file mode 100644
index 00000000..efa3e905
--- /dev/null
+++ b/src/recompiler/target-i386/ops_sse_header.h
@@ -0,0 +1,359 @@
+/*
+ * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
+ *
+ * Copyright (c) 2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#if SHIFT == 0
+#define Reg MMXReg
+#define SUFFIX _mmx
+#else
+#define Reg XMMReg
+#define SUFFIX _xmm
+#endif
+
+#define dh_alias_Reg ptr
+#define dh_alias_XMMReg ptr
+#define dh_alias_MMXReg ptr
+#define dh_ctype_Reg Reg *
+#define dh_ctype_XMMReg XMMReg *
+#define dh_ctype_MMXReg MMXReg *
+#define dh_is_signed_Reg dh_is_signed_ptr
+#define dh_is_signed_XMMReg dh_is_signed_ptr
+#define dh_is_signed_MMXReg dh_is_signed_ptr
+
+DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg)
+
+#if SHIFT == 1
+DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg)
+#endif
+
+#define SSE_HELPER_B(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_W(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_L(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+#define SSE_HELPER_Q(name, F)\
+ DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg)
+
+SSE_HELPER_B(paddb, FADD)
+SSE_HELPER_W(paddw, FADD)
+SSE_HELPER_L(paddl, FADD)
+SSE_HELPER_Q(paddq, FADD)
+
+SSE_HELPER_B(psubb, FSUB)
+SSE_HELPER_W(psubw, FSUB)
+SSE_HELPER_L(psubl, FSUB)
+SSE_HELPER_Q(psubq, FSUB)
+
+SSE_HELPER_B(paddusb, FADDUB)
+SSE_HELPER_B(paddsb, FADDSB)
+SSE_HELPER_B(psubusb, FSUBUB)
+SSE_HELPER_B(psubsb, FSUBSB)
+
+SSE_HELPER_W(paddusw, FADDUW)
+SSE_HELPER_W(paddsw, FADDSW)
+SSE_HELPER_W(psubusw, FSUBUW)
+SSE_HELPER_W(psubsw, FSUBSW)
+
+SSE_HELPER_B(pminub, FMINUB)
+SSE_HELPER_B(pmaxub, FMAXUB)
+
+SSE_HELPER_W(pminsw, FMINSW)
+SSE_HELPER_W(pmaxsw, FMAXSW)
+
+SSE_HELPER_Q(pand, FAND)
+SSE_HELPER_Q(pandn, FANDN)
+SSE_HELPER_Q(por, FOR)
+SSE_HELPER_Q(pxor, FXOR)
+
+SSE_HELPER_B(pcmpgtb, FCMPGTB)
+SSE_HELPER_W(pcmpgtw, FCMPGTW)
+SSE_HELPER_L(pcmpgtl, FCMPGTL)
+
+SSE_HELPER_B(pcmpeqb, FCMPEQ)
+SSE_HELPER_W(pcmpeqw, FCMPEQ)
+SSE_HELPER_L(pcmpeql, FCMPEQ)
+
+SSE_HELPER_W(pmullw, FMULLW)
+#if SHIFT == 0
+SSE_HELPER_W(pmulhrw, FMULHRW)
+#endif
+SSE_HELPER_W(pmulhuw, FMULHUW)
+SSE_HELPER_W(pmulhw, FMULHW)
+
+SSE_HELPER_B(pavgb, FAVG)
+SSE_HELPER_W(pavgw, FAVG)
+
+DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg)
+
+DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl)
+DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32)
+#ifdef TARGET_X86_64
+DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64)
+#endif
+
+#if SHIFT == 0
+DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int)
+#else
+DEF_HELPER_3(shufps, void, Reg, Reg, int)
+DEF_HELPER_3(shufpd, void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int)
+DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int)
+#endif
+
+#if SHIFT == 1
+/* FPU ops */
+/* XXX: not accurate */
+
+#define SSE_HELPER_S(name, F)\
+ DEF_HELPER_2(name ## ps , void, Reg, Reg) \
+ DEF_HELPER_2(name ## ss , void, Reg, Reg) \
+ DEF_HELPER_2(name ## pd , void, Reg, Reg) \
+ DEF_HELPER_2(name ## sd , void, Reg, Reg)
+
+SSE_HELPER_S(add, FPU_ADD)
+SSE_HELPER_S(sub, FPU_SUB)
+SSE_HELPER_S(mul, FPU_MUL)
+SSE_HELPER_S(div, FPU_DIV)
+SSE_HELPER_S(min, FPU_MIN)
+SSE_HELPER_S(max, FPU_MAX)
+SSE_HELPER_S(sqrt, FPU_SQRT)
+
+
+DEF_HELPER_2(cvtps2pd, void, Reg, Reg)
+DEF_HELPER_2(cvtpd2ps, void, Reg, Reg)
+DEF_HELPER_2(cvtss2sd, void, Reg, Reg)
+DEF_HELPER_2(cvtsd2ss, void, Reg, Reg)
+DEF_HELPER_2(cvtdq2ps, void, Reg, Reg)
+DEF_HELPER_2(cvtdq2pd, void, Reg, Reg)
+DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg)
+DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg)
+DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32)
+DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32)
+
+#ifdef TARGET_X86_64
+DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64)
+DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64)
+#endif
+
+DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg)
+DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg)
+DEF_HELPER_1(cvtss2si, s32, XMMReg)
+DEF_HELPER_1(cvtsd2si, s32, XMMReg)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cvtss2sq, s64, XMMReg)
+DEF_HELPER_1(cvtsd2sq, s64, XMMReg)
+#endif
+
+DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg)
+DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg)
+DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg)
+DEF_HELPER_1(cvttss2si, s32, XMMReg)
+DEF_HELPER_1(cvttsd2si, s32, XMMReg)
+#ifdef TARGET_X86_64
+DEF_HELPER_1(cvttss2sq, s64, XMMReg)
+DEF_HELPER_1(cvttsd2sq, s64, XMMReg)
+#endif
+
+DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg)
+DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg)
+DEF_HELPER_2(rcpps, void, XMMReg, XMMReg)
+DEF_HELPER_2(rcpss, void, XMMReg, XMMReg)
+DEF_HELPER_2(extrq_r, void, XMMReg, XMMReg)
+DEF_HELPER_3(extrq_i, void, XMMReg, int, int)
+DEF_HELPER_2(insertq_r, void, XMMReg, XMMReg)
+DEF_HELPER_3(insertq_i, void, XMMReg, int, int)
+DEF_HELPER_2(haddps, void, XMMReg, XMMReg)
+DEF_HELPER_2(haddpd, void, XMMReg, XMMReg)
+DEF_HELPER_2(hsubps, void, XMMReg, XMMReg)
+DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg)
+DEF_HELPER_2(addsubps, void, XMMReg, XMMReg)
+DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg)
+
+#define SSE_HELPER_CMP(name, F)\
+ DEF_HELPER_2( name ## ps , void, Reg, Reg) \
+ DEF_HELPER_2( name ## ss , void, Reg, Reg) \
+ DEF_HELPER_2( name ## pd , void, Reg, Reg) \
+ DEF_HELPER_2( name ## sd , void, Reg, Reg)
+
+SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
+SSE_HELPER_CMP(cmplt, FPU_CMPLT)
+SSE_HELPER_CMP(cmple, FPU_CMPLE)
+SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
+SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
+SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
+SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
+SSE_HELPER_CMP(cmpord, FPU_CMPORD)
+
+DEF_HELPER_2(ucomiss, void, Reg, Reg)
+DEF_HELPER_2(comiss, void, Reg, Reg)
+DEF_HELPER_2(ucomisd, void, Reg, Reg)
+DEF_HELPER_2(comisd, void, Reg, Reg)
+DEF_HELPER_1(movmskps, i32, Reg)
+DEF_HELPER_1(movmskpd, i32, Reg)
+#endif
+
+DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg)
+DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg)
+#define UNPCK_OP(base_name, base) \
+ DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \
+ DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \
+ DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg)
+
+UNPCK_OP(l, 0)
+UNPCK_OP(h, 1)
+
+#if SHIFT == 1
+DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg)
+#endif
+
+/* 3DNow! float ops */
+#if SHIFT == 0
+DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg)
+DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg)
+DEF_HELPER_2(pf2id, void, MMXReg, MMXReg)
+DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfadd, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmax, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmin, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfmul, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfsub, void, MMXReg, MMXReg)
+DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg)
+DEF_HELPER_2(pswapd, void, MMXReg, MMXReg)
+#endif
+
+/* SSSE3 op helpers */
+DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32)
+
+/* SSE4.1 op helpers */
+#if SHIFT == 1
+DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg)
+DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32)
+#endif
+
+/* SSE4.2 op helpers */
+#if SHIFT == 1
+DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg)
+DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32)
+DEF_HELPER_3(crc32, tl, i32, tl, i32)
+DEF_HELPER_2(popcnt, tl, tl, i32)
+#endif
+
+#undef SHIFT
+#undef Reg
+#undef SUFFIX
+
+#undef SSE_HELPER_B
+#undef SSE_HELPER_W
+#undef SSE_HELPER_L
+#undef SSE_HELPER_Q
+#undef SSE_HELPER_S
+#undef SSE_HELPER_CMP
+#undef UNPCK_OP
diff --git a/src/recompiler/target-i386/svm.h b/src/recompiler/target-i386/svm.h
new file mode 100644
index 00000000..a224aead
--- /dev/null
+++ b/src/recompiler/target-i386/svm.h
@@ -0,0 +1,222 @@
+#ifndef __SVM_H
+#define __SVM_H
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+
+#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXIT_READ_CR0 0x000
+#define SVM_EXIT_READ_CR3 0x003
+#define SVM_EXIT_READ_CR4 0x004
+#define SVM_EXIT_READ_CR8 0x008
+#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR3 0x013
+#define SVM_EXIT_WRITE_CR4 0x014
+#define SVM_EXIT_WRITE_CR8 0x018
+#define SVM_EXIT_READ_DR0 0x020
+#define SVM_EXIT_READ_DR1 0x021
+#define SVM_EXIT_READ_DR2 0x022
+#define SVM_EXIT_READ_DR3 0x023
+#define SVM_EXIT_READ_DR4 0x024
+#define SVM_EXIT_READ_DR5 0x025
+#define SVM_EXIT_READ_DR6 0x026
+#define SVM_EXIT_READ_DR7 0x027
+#define SVM_EXIT_WRITE_DR0 0x030
+#define SVM_EXIT_WRITE_DR1 0x031
+#define SVM_EXIT_WRITE_DR2 0x032
+#define SVM_EXIT_WRITE_DR3 0x033
+#define SVM_EXIT_WRITE_DR4 0x034
+#define SVM_EXIT_WRITE_DR5 0x035
+#define SVM_EXIT_WRITE_DR6 0x036
+#define SVM_EXIT_WRITE_DR7 0x037
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_INTR 0x060
+#define SVM_EXIT_NMI 0x061
+#define SVM_EXIT_SMI 0x062
+#define SVM_EXIT_INIT 0x063
+#define SVM_EXIT_VINTR 0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ 0x066
+#define SVM_EXIT_GDTR_READ 0x067
+#define SVM_EXIT_LDTR_READ 0x068
+#define SVM_EXIT_TR_READ 0x069
+#define SVM_EXIT_IDTR_WRITE 0x06a
+#define SVM_EXIT_GDTR_WRITE 0x06b
+#define SVM_EXIT_LDTR_WRITE 0x06c
+#define SVM_EXIT_TR_WRITE 0x06d
+#define SVM_EXIT_RDTSC 0x06e
+#define SVM_EXIT_RDPMC 0x06f
+#define SVM_EXIT_PUSHF 0x070
+#define SVM_EXIT_POPF 0x071
+#define SVM_EXIT_CPUID 0x072
+#define SVM_EXIT_RSM 0x073
+#define SVM_EXIT_IRET 0x074
+#define SVM_EXIT_SWINT 0x075
+#define SVM_EXIT_INVD 0x076
+#define SVM_EXIT_PAUSE 0x077
+#define SVM_EXIT_HLT 0x078
+#define SVM_EXIT_INVLPG 0x079
+#define SVM_EXIT_INVLPGA 0x07a
+#define SVM_EXIT_IOIO 0x07b
+#define SVM_EXIT_MSR 0x07c
+#define SVM_EXIT_TASK_SWITCH 0x07d
+#define SVM_EXIT_FERR_FREEZE 0x07e
+#define SVM_EXIT_SHUTDOWN 0x07f
+#define SVM_EXIT_VMRUN 0x080
+#define SVM_EXIT_VMMCALL 0x081
+#define SVM_EXIT_VMLOAD 0x082
+#define SVM_EXIT_VMSAVE 0x083
+#define SVM_EXIT_STGI 0x084
+#define SVM_EXIT_CLGI 0x085
+#define SVM_EXIT_SKINIT 0x086
+#define SVM_EXIT_RDTSCP 0x087
+#define SVM_EXIT_ICEBP 0x088
+#define SVM_EXIT_WBINVD 0x089
+/* only included in documentation, maybe wrong */
+#define SVM_EXIT_MONITOR 0x08a
+#define SVM_EXIT_MWAIT 0x08b
+#define SVM_EXIT_NPF 0x400
+
+#define SVM_EXIT_ERR -1
+
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+ uint16_t intercept_cr_read;
+ uint16_t intercept_cr_write;
+ uint16_t intercept_dr_read;
+ uint16_t intercept_dr_write;
+ uint32_t intercept_exceptions;
+ uint64_t intercept;
+ uint8_t reserved_1[44];
+ uint64_t iopm_base_pa;
+ uint64_t msrpm_base_pa;
+ uint64_t tsc_offset;
+ uint32_t asid;
+ uint8_t tlb_ctl;
+ uint8_t reserved_2[3];
+ uint32_t int_ctl;
+ uint32_t int_vector;
+ uint32_t int_state;
+ uint8_t reserved_3[4];
+ uint64_t exit_code;
+ uint64_t exit_info_1;
+ uint64_t exit_info_2;
+ uint32_t exit_int_info;
+ uint32_t exit_int_info_err;
+ uint64_t nested_ctl;
+ uint8_t reserved_4[16];
+ uint32_t event_inj;
+ uint32_t event_inj_err;
+ uint64_t nested_cr3;
+ uint64_t lbr_ctl;
+ uint8_t reserved_5[832];
+};
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+ uint16_t selector;
+ uint16_t attrib;
+ uint32_t limit;
+ uint64_t base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+ struct vmcb_seg es;
+ struct vmcb_seg cs;
+ struct vmcb_seg ss;
+ struct vmcb_seg ds;
+ struct vmcb_seg fs;
+ struct vmcb_seg gs;
+ struct vmcb_seg gdtr;
+ struct vmcb_seg ldtr;
+ struct vmcb_seg idtr;
+ struct vmcb_seg tr;
+ uint8_t reserved_1[43];
+ uint8_t cpl;
+ uint8_t reserved_2[4];
+ uint64_t efer;
+ uint8_t reserved_3[112];
+ uint64_t cr4;
+ uint64_t cr3;
+ uint64_t cr0;
+ uint64_t dr7;
+ uint64_t dr6;
+ uint64_t rflags;
+ uint64_t rip;
+ uint8_t reserved_4[88];
+ uint64_t rsp;
+ uint8_t reserved_5[24];
+ uint64_t rax;
+ uint64_t star;
+ uint64_t lstar;
+ uint64_t cstar;
+ uint64_t sfmask;
+ uint64_t kernel_gs_base;
+ uint64_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ uint64_t cr2;
+ uint8_t reserved_6[32];
+ uint64_t g_pat;
+ uint64_t dbgctl;
+ uint64_t br_from;
+ uint64_t br_to;
+ uint64_t last_excp_from;
+ uint64_t last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+ struct vmcb_control_area control;
+ struct vmcb_save_area save;
+};
+
+#endif
diff --git a/src/recompiler/target-i386/translate.c b/src/recompiler/target-i386/translate.c
new file mode 100644
index 00000000..1b82f3f5
--- /dev/null
+++ b/src/recompiler/target-i386/translate.c
@@ -0,0 +1,8385 @@
+/*
+ * i386 translation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifndef VBOX
+#include <inttypes.h>
+#include <signal.h>
+#endif /* !VBOX */
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+#include "tcg-op.h"
+
+#include "helper.h"
+#define GEN_HELPER 1
+#include "helper.h"
+
+#define PREFIX_REPZ 0x01
+#define PREFIX_REPNZ 0x02
+#define PREFIX_LOCK 0x04
+#define PREFIX_DATA 0x08
+#define PREFIX_ADR 0x10
+
+#ifdef TARGET_X86_64
+#define X86_64_ONLY(x) x
+#define X86_64_DEF(...) __VA_ARGS__
+#define CODE64(s) ((s)->code64)
+#define REX_X(s) ((s)->rex_x)
+#define REX_B(s) ((s)->rex_b)
+# ifdef VBOX
+# define IS_LONG_MODE(s) ((s)->lma)
+# endif
+/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
+#if 1
+#define BUGGY_64(x) NULL
+#endif
+#else
+#define X86_64_ONLY(x) NULL
+#define X86_64_DEF(...)
+#define CODE64(s) 0
+#define REX_X(s) 0
+#define REX_B(s) 0
+# ifdef VBOX
+# define IS_LONG_MODE(s) 0
+# endif
+#endif
+
+//#define MACRO_TEST 1
+
+/* global register indexes */
+static TCGv_ptr cpu_env;
+static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
+static TCGv_i32 cpu_cc_op;
+static TCGv cpu_regs[CPU_NB_REGS];
+/* local temps */
+static TCGv cpu_T[2], cpu_T3;
+/* local register indexes (only used inside old micro ops) */
+static TCGv cpu_tmp0, cpu_tmp4;
+static TCGv_ptr cpu_ptr0, cpu_ptr1;
+static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
+static TCGv_i64 cpu_tmp1_i64;
+static TCGv cpu_tmp5;
+
+static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+
+#include "gen-icount.h"
+
+#ifdef TARGET_X86_64
+static int x86_64_hregs;
+#endif
+
+#ifdef VBOX
+
+/* Special/override code readers to hide patched code. */
+
+uint8_t ldub_code_raw(target_ulong pc)
+{
+ uint8_t b;
+
+# ifdef VBOX_WITH_RAW_MODE
+ if (!remR3GetOpcode(cpu_single_env, pc, &b))
+# endif
+ b = ldub_code(pc);
+ return b;
+}
+# define ldub_code(a) ldub_code_raw(a)
+
+uint16_t lduw_code_raw(target_ulong pc)
+{
+ uint16_t u16;
+ u16 = (uint16_t)ldub_code_raw(pc);
+ u16 |= (uint16_t)ldub_code_raw(pc + 1) << 8;
+ return u16;
+}
+# define lduw_code(a) lduw_code_raw(a)
+
+
+uint32_t ldl_code_raw(target_ulong pc)
+{
+ uint32_t u32;
+ u32 = (uint32_t)ldub_code_raw(pc);
+ u32 |= (uint32_t)ldub_code_raw(pc + 1) << 8;
+ u32 |= (uint32_t)ldub_code_raw(pc + 2) << 16;
+ u32 |= (uint32_t)ldub_code_raw(pc + 3) << 24;
+ return u32;
+}
+# define ldl_code(a) ldl_code_raw(a)
+
+#endif /* VBOX */
+
+typedef struct DisasContext {
+ /* current insn context */
+ int override; /* -1 if no override */
+ int prefix;
+ int aflag, dflag;
+ target_ulong pc; /* pc = eip + cs_base */
+ int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
+ static state change (stop translation) */
+ /* current block context */
+ target_ulong cs_base; /* base of CS segment */
+ int pe; /* protected mode */
+ int code32; /* 32 bit code segment */
+#ifdef TARGET_X86_64
+ int lma; /* long mode active */
+ int code64; /* 64 bit code segment */
+ int rex_x, rex_b;
+#endif
+ int ss32; /* 32 bit stack segment */
+ int cc_op; /* current CC operation */
+ int addseg; /* non zero if either DS/ES/SS have a non zero base */
+ int f_st; /* currently unused */
+ int vm86; /* vm86 mode */
+#ifdef VBOX
+ int vme; /* CR4.VME */
+ int pvi; /* CR4.PVI */
+ int record_call; /* record calls for CSAM or not? */
+#endif
+ int cpl;
+ int iopl;
+ int tf; /* TF cpu flag */
+ int singlestep_enabled; /* "hardware" single step enabled */
+ int jmp_opt; /* use direct block chaining for direct jumps */
+ int mem_index; /* select memory access functions */
+ uint64_t flags; /* all execution flags */
+ struct TranslationBlock *tb;
+ int popl_esp_hack; /* for correct popl with esp base handling */
+ int rip_offset; /* only used in x86_64, but left for simplicity */
+ int cpuid_features;
+ int cpuid_ext_features;
+ int cpuid_ext2_features;
+ int cpuid_ext3_features;
+} DisasContext;
+
+static void gen_eob(DisasContext *s);
+static void gen_jmp(DisasContext *s, target_ulong eip);
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
+
+#ifdef VBOX
+static void gen_check_external_event(void);
+#endif
+
+/* i386 arith/logic operations */
+enum {
+ OP_ADDL,
+ OP_ORL,
+ OP_ADCL,
+ OP_SBBL,
+ OP_ANDL,
+ OP_SUBL,
+ OP_XORL,
+ OP_CMPL,
+};
+
+/* i386 shift ops */
+enum {
+ OP_ROL,
+ OP_ROR,
+ OP_RCL,
+ OP_RCR,
+ OP_SHL,
+ OP_SHR,
+ OP_SHL1, /* undocumented */
+ OP_SAR = 7,
+};
+
+enum {
+ JCC_O,
+ JCC_B,
+ JCC_Z,
+ JCC_BE,
+ JCC_S,
+ JCC_P,
+ JCC_L,
+ JCC_LE,
+};
+
+/* operand size */
+enum {
+ OT_BYTE = 0,
+ OT_WORD,
+ OT_LONG,
+ OT_QUAD,
+};
+
+enum {
+ /* I386 int registers */
+ OR_EAX, /* MUST be even numbered */
+ OR_ECX,
+ OR_EDX,
+ OR_EBX,
+ OR_ESP,
+ OR_EBP,
+ OR_ESI,
+ OR_EDI,
+
+ OR_TMP0 = 16, /* temporary operand register */
+ OR_TMP1,
+ OR_A0, /* temporary register used when doing address evaluation */
+};
+
+static inline void gen_op_movl_T0_0(void)
+{
+ tcg_gen_movi_tl(cpu_T[0], 0);
+}
+
+static inline void gen_op_movl_T0_im(int32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_imu(uint32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T1_im(int32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_T1_imu(uint32_t val)
+{
+ tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_movl_A0_im(uint32_t val)
+{
+ tcg_gen_movi_tl(cpu_A0, val);
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_im(int64_t val)
+{
+ tcg_gen_movi_tl(cpu_A0, val);
+}
+#endif
+
+static inline void gen_movtl_T0_im(target_ulong val)
+{
+ tcg_gen_movi_tl(cpu_T[0], val);
+}
+
+static inline void gen_movtl_T1_im(target_ulong val)
+{
+ tcg_gen_movi_tl(cpu_T[1], val);
+}
+
+static inline void gen_op_andl_T0_ffff(void)
+{
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
+}
+
+static inline void gen_op_andl_T0_im(uint32_t val)
+{
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val);
+}
+
+static inline void gen_op_movl_T0_T1(void)
+{
+ tcg_gen_mov_tl(cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_andl_A0_ffff(void)
+{
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff);
+}
+
+#ifdef TARGET_X86_64
+
+#define NB_OP_SIZES 4
+
+#else /* !TARGET_X86_64 */
+
+#define NB_OP_SIZES 3
+
+#endif /* !TARGET_X86_64 */
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define REG_B_OFFSET (sizeof(target_ulong) - 1)
+#define REG_H_OFFSET (sizeof(target_ulong) - 2)
+#define REG_W_OFFSET (sizeof(target_ulong) - 2)
+#define REG_L_OFFSET (sizeof(target_ulong) - 4)
+#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
+#else
+#define REG_B_OFFSET 0
+#define REG_H_OFFSET 1
+#define REG_W_OFFSET 0
+#define REG_L_OFFSET 0
+#define REG_LH_OFFSET 4
+#endif
+
+static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
+{
+ TCGv tmp;
+
+ switch(ot) {
+ case OT_BYTE:
+ tmp = tcg_temp_new();
+ tcg_gen_ext8u_tl(tmp, t0);
+ if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ } else {
+ tcg_gen_shli_tl(tmp, tmp, 8);
+ tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
+ tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
+ }
+ tcg_temp_free(tmp);
+ break;
+ case OT_WORD:
+ tmp = tcg_temp_new();
+ tcg_gen_ext16u_tl(tmp, t0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ break;
+ default: /* XXX this shouldn't be reached; abort? */
+ case OT_LONG:
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a mov. */
+ tcg_gen_ext32u_tl(cpu_regs[reg], t0);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ tcg_gen_mov_tl(cpu_regs[reg], t0);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_mov_reg_T0(int ot, int reg)
+{
+ gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+}
+
+static inline void gen_op_mov_reg_T1(int ot, int reg)
+{
+ gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+}
+
+static inline void gen_op_mov_reg_A0(int size, int reg)
+{
+ TCGv tmp;
+
+ switch(size) {
+ case 0:
+ tmp = tcg_temp_new();
+ tcg_gen_ext16u_tl(tmp, cpu_A0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+ tcg_temp_free(tmp);
+ break;
+ default: /* XXX this shouldn't be reached; abort? */
+ case 1:
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a mov. */
+ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
+{
+ switch(ot) {
+ case OT_BYTE:
+ if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+ goto std_case;
+ } else {
+ tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
+ tcg_gen_ext8u_tl(t0, t0);
+ }
+ break;
+ default:
+ std_case:
+ tcg_gen_mov_tl(t0, cpu_regs[reg]);
+ break;
+ }
+}
+
+static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+{
+ gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+}
+
+static inline void gen_op_movl_A0_reg(int reg)
+{
+ tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+}
+
+static inline void gen_op_addl_A0_im(int32_t val)
+{
+ tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+#ifdef TARGET_X86_64
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_addq_A0_im(int64_t val)
+{
+ tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+}
+#endif
+
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s))
+ gen_op_addq_A0_im(val);
+ else
+#endif
+ gen_op_addl_A0_im(val);
+}
+
+static inline void gen_op_addl_T0_T1(void)
+{
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_jmp_T0(void)
+{
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+}
+
+static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
+{
+ switch(size) {
+ case 0:
+ tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
+ tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+ break;
+ case 1:
+ tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a nop. */
+ tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_add_reg_T0(int size, int reg)
+{
+ switch(size) {
+ case 0:
+ tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
+ tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
+ tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+ break;
+ case 1:
+ tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a nop. */
+ tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
+ break;
+#endif
+ }
+}
+
+static inline void gen_op_set_cc_op(int32_t val)
+{
+ tcg_gen_movi_i32(cpu_cc_op, val);
+}
+
+static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ if (shift != 0)
+ tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+ /* For x86_64, this sets the higher half of register to zero.
+ For i386, this is equivalent to a nop. */
+ tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+}
+
+#ifdef VBOX
+DECLINLINE(void) gen_op_seg_check(int reg, bool keepA0)
+{
+ /* It seems segments doesn't get out of sync - if they do in fact - enable below code. */
+# ifdef FORCE_SEGMENT_SYNC
+# if 1
+ TCGv t0;
+
+ /* Considering poor quality of TCG optimizer - better call directly */
+ t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ tcg_gen_movi_tl(t0, reg);
+ tcg_gen_helper_0_1(helper_sync_seg, t0);
+ tcg_temp_free(t0);
+# else
+ /* Our segments could be outdated, thus check for newselector field to see if update really needed */
+ int skip_label;
+ TCGv t0, a0;
+
+ /* For other segments this check is waste of time, and also TCG is unable to cope with this code,
+ for data/stack segments, as expects alive cpu_T[0] */
+ if (reg != R_GS)
+ return;
+
+ if (keepA0)
+ {
+ /* we need to store old cpu_A0 */
+ a0 = tcg_temp_local_new(TCG_TYPE_TL);
+ tcg_gen_mov_tl(a0, cpu_A0);
+ }
+
+ skip_label = gen_new_label();
+ t0 = tcg_temp_local_new(TCG_TYPE_TL);
+
+ tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, segs[reg].newselector) + REG_L_OFFSET);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label);
+ tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, eflags) + REG_L_OFFSET);
+ tcg_gen_andi_tl(t0, t0, VM_MASK);
+ tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, skip_label);
+ tcg_gen_movi_tl(t0, reg);
+
+ tcg_gen_helper_0_1(helper_sync_seg, t0);
+
+ tcg_temp_free(t0);
+
+ gen_set_label(skip_label);
+ if (keepA0)
+ {
+ tcg_gen_mov_tl(cpu_A0, a0);
+ tcg_temp_free(a0);
+ }
+# endif /* 0 */
+# endif /* FORCE_SEGMENT_SYNC */
+}
+#endif /* VBOX */
+
+static inline void gen_op_movl_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, false);
+#endif
+ tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
+}
+
+static inline void gen_op_addl_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, true);
+#endif
+ tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+#ifdef TARGET_X86_64
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+#endif
+}
+
+#ifdef TARGET_X86_64
+static inline void gen_op_movq_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, false);
+#endif
+ tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
+}
+
+static inline void gen_op_addq_A0_seg(int reg)
+{
+#ifdef VBOX
+ gen_op_seg_check(reg, true);
+#endif
+ tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
+
+static inline void gen_op_movq_A0_reg(int reg)
+{
+ tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
+}
+
+static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
+ if (shift != 0)
+ tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+}
+#endif
+
+static inline void gen_op_lds_T0_A0(int idx)
+{
+ int mem_index = (idx >> 2) - 1;
+ switch(idx & 3) {
+ case 0:
+ tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
+ break;
+ case 1:
+ tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
+ break;
+ default:
+ case 2:
+ tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
+ break;
+ }
+}
+
+static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
+{
+ int mem_index = (idx >> 2) - 1;
+ switch(idx & 3) {
+ case 0:
+ tcg_gen_qemu_ld8u(t0, a0, mem_index);
+ break;
+ case 1:
+ tcg_gen_qemu_ld16u(t0, a0, mem_index);
+ break;
+ case 2:
+ tcg_gen_qemu_ld32u(t0, a0, mem_index);
+ break;
+ default:
+ case 3:
+ /* Should never happen on 32-bit targets. */
+#ifdef TARGET_X86_64
+ tcg_gen_qemu_ld64(t0, a0, mem_index);
+#endif
+ break;
+ }
+}
+
+/* XXX: always use ldu or lds */
+static inline void gen_op_ld_T0_A0(int idx)
+{
+ gen_op_ld_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_ldu_T0_A0(int idx)
+{
+ gen_op_ld_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_ld_T1_A0(int idx)
+{
+ gen_op_ld_v(idx, cpu_T[1], cpu_A0);
+}
+
+static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
+{
+ int mem_index = (idx >> 2) - 1;
+ switch(idx & 3) {
+ case 0:
+ tcg_gen_qemu_st8(t0, a0, mem_index);
+ break;
+ case 1:
+ tcg_gen_qemu_st16(t0, a0, mem_index);
+ break;
+ case 2:
+ tcg_gen_qemu_st32(t0, a0, mem_index);
+ break;
+ default:
+ case 3:
+ /* Should never happen on 32-bit targets. */
+#ifdef TARGET_X86_64
+ tcg_gen_qemu_st64(t0, a0, mem_index);
+#endif
+ break;
+ }
+}
+
+static inline void gen_op_st_T0_A0(int idx)
+{
+ gen_op_st_v(idx, cpu_T[0], cpu_A0);
+}
+
+static inline void gen_op_st_T1_A0(int idx)
+{
+ gen_op_st_v(idx, cpu_T[1], cpu_A0);
+}
+
+#ifdef VBOX
+
+static void gen_check_external_event(void)
+{
+# if 1
+ /** @todo once TCG codegen improves, we may want to use version
+ from else version */
+ gen_helper_check_external_event();
+# else
+ int skip_label;
+ TCGv t0;
+
+ skip_label = gen_new_label();
+ t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ /* t0 = cpu_tmp0; */
+
+ tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, interrupt_request));
+ /* Keep in sync with helper_check_external_event() */
+ tcg_gen_andi_tl(t0, t0,
+ CPU_INTERRUPT_EXTERNAL_EXIT
+ | CPU_INTERRUPT_EXTERNAL_TIMER
+ | CPU_INTERRUPT_EXTERNAL_DMA
+ | CPU_INTERRUPT_EXTERNAL_HARD);
+ /** @todo predict branch as taken */
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label);
+ tcg_temp_free(t0);
+
+ gen_helper_check_external_event();
+
+ gen_set_label(skip_label);
+# endif
+}
+
+#endif /* VBOX */
+
+static inline void gen_jmp_im(target_ulong pc)
+{
+ tcg_gen_movi_tl(cpu_tmp0, pc);
+ tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
+}
+
+#ifdef VBOX
+DECLINLINE(void) gen_update_eip(target_ulong pc)
+{
+ gen_jmp_im(pc);
+# ifdef VBOX_DUMP_STATE
+ gen_helper_dump_state();
+# endif
+}
+#endif /* VBOX */
+
+static inline void gen_string_movl_A0_ESI(DisasContext *s)
+{
+ int override;
+
+ override = s->override;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ if (override >= 0) {
+ gen_op_movq_A0_seg(override);
+ gen_op_addq_A0_reg_sN(0, R_ESI);
+ } else {
+ gen_op_movq_A0_reg(R_ESI);
+ }
+ } else
+#endif
+ if (s->aflag) {
+ /* 32 bit address */
+ if (s->addseg && override < 0)
+ override = R_DS;
+ if (override >= 0) {
+ gen_op_movl_A0_seg(override);
+ gen_op_addl_A0_reg_sN(0, R_ESI);
+ } else {
+ gen_op_movl_A0_reg(R_ESI);
+ }
+ } else {
+ /* 16 address, always override */
+ if (override < 0)
+ override = R_DS;
+ gen_op_movl_A0_reg(R_ESI);
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(override);
+ }
+}
+
+static inline void gen_string_movl_A0_EDI(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EDI);
+ } else
+#endif
+ if (s->aflag) {
+ if (s->addseg) {
+ gen_op_movl_A0_seg(R_ES);
+ gen_op_addl_A0_reg_sN(0, R_EDI);
+ } else {
+ gen_op_movl_A0_reg(R_EDI);
+ }
+ } else {
+ gen_op_movl_A0_reg(R_EDI);
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(R_ES);
+ }
+}
+
+static inline void gen_op_movl_T0_Dshift(int ot)
+{
+ tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
+ tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
+};
+
+static void gen_extu(int ot, TCGv reg)
+{
+ switch(ot) {
+ case OT_BYTE:
+ tcg_gen_ext8u_tl(reg, reg);
+ break;
+ case OT_WORD:
+ tcg_gen_ext16u_tl(reg, reg);
+ break;
+ case OT_LONG:
+ tcg_gen_ext32u_tl(reg, reg);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gen_exts(int ot, TCGv reg)
+{
+ switch(ot) {
+ case OT_BYTE:
+ tcg_gen_ext8s_tl(reg, reg);
+ break;
+ case OT_WORD:
+ tcg_gen_ext16s_tl(reg, reg);
+ break;
+ case OT_LONG:
+ tcg_gen_ext32s_tl(reg, reg);
+ break;
+ default:
+ break;
+ }
+}
+
+static inline void gen_op_jnz_ecx(int size, int label1)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+ gen_extu(size + 1, cpu_tmp0);
+ tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
+}
+
+static inline void gen_op_jz_ecx(int size, int label1)
+{
+ tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
+ gen_extu(size + 1, cpu_tmp0);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+}
+
+static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
+{
+ switch (ot) {
+ case 0: gen_helper_inb(v, n); break;
+ case 1: gen_helper_inw(v, n); break;
+ case 2: gen_helper_inl(v, n); break;
+ }
+
+}
+
+static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
+{
+ switch (ot) {
+ case 0: gen_helper_outb(v, n); break;
+ case 1: gen_helper_outw(v, n); break;
+ case 2: gen_helper_outl(v, n); break;
+ }
+
+}
+
+static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
+ uint32_t svm_flags)
+{
+ int state_saved;
+ target_ulong next_eip;
+
+ state_saved = 0;
+ if (s->pe && (s->cpl > s->iopl || s->vm86)) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ state_saved = 1;
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ switch (ot) {
+ case 0: gen_helper_check_iob(cpu_tmp2_i32); break;
+ case 1: gen_helper_check_iow(cpu_tmp2_i32); break;
+ case 2: gen_helper_check_iol(cpu_tmp2_i32); break;
+ }
+ }
+ if(s->flags & HF_SVMI_MASK) {
+ if (!state_saved) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ }
+ svm_flags |= (1 << (4 + ot));
+ next_eip = s->pc - s->cs_base;
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags),
+ tcg_const_i32(next_eip - cur_eip));
+ }
+}
+
+static inline void gen_movs(DisasContext *s, int ot)
+{
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_string_movl_A0_EDI(s);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_update_cc_op(DisasContext *s)
+{
+ if (s->cc_op != CC_OP_DYNAMIC) {
+ gen_op_set_cc_op(s->cc_op);
+ s->cc_op = CC_OP_DYNAMIC;
+ }
+}
+
+static void gen_op_update1_cc(void)
+{
+ tcg_gen_discard_tl(cpu_cc_src);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static void gen_op_update2_cc(void)
+{
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static inline void gen_op_cmpl_T0_T1_cc(void)
+{
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+}
+
+static inline void gen_op_testl_T0_T1_cc(void)
+{
+ tcg_gen_discard_tl(cpu_cc_src);
+ tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+}
+
+static void gen_op_update_neg_cc(void)
+{
+ tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+/* compute eflags.C to reg */
+static void gen_compute_eflags_c(TCGv reg)
+{
+ gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op);
+ tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+}
+
+/* compute all eflags to cc_src */
+static void gen_compute_eflags(TCGv reg)
+{
+ gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op);
+ tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+}
+
+static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ switch(jcc_op) {
+ case JCC_O:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_B:
+ gen_compute_eflags_c(cpu_T[0]);
+ break;
+ case JCC_Z:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_BE:
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_S:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_P:
+ gen_compute_eflags(cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ case JCC_L:
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
+ tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ default:
+ case JCC_LE:
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
+ tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
+ tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ break;
+ }
+}
+
+/* return true if setcc_slow is not needed (WARNING: must be kept in
+ sync with gen_jcc1) */
+static int is_fast_jcc_case(DisasContext *s, int b)
+{
+ int jcc_op;
+ jcc_op = (b >> 1) & 7;
+ switch(s->cc_op) {
+ /* we optimize the cmp/jcc case */
+ case CC_OP_SUBB:
+ case CC_OP_SUBW:
+ case CC_OP_SUBL:
+ case CC_OP_SUBQ:
+ if (jcc_op == JCC_O || jcc_op == JCC_P)
+ goto slow_jcc;
+ break;
+
+ /* some jumps are easy to compute */
+ case CC_OP_ADDB:
+ case CC_OP_ADDW:
+ case CC_OP_ADDL:
+ case CC_OP_ADDQ:
+
+ case CC_OP_LOGICB:
+ case CC_OP_LOGICW:
+ case CC_OP_LOGICL:
+ case CC_OP_LOGICQ:
+
+ case CC_OP_INCB:
+ case CC_OP_INCW:
+ case CC_OP_INCL:
+ case CC_OP_INCQ:
+
+ case CC_OP_DECB:
+ case CC_OP_DECW:
+ case CC_OP_DECL:
+ case CC_OP_DECQ:
+
+ case CC_OP_SHLB:
+ case CC_OP_SHLW:
+ case CC_OP_SHLL:
+ case CC_OP_SHLQ:
+ if (jcc_op != JCC_Z && jcc_op != JCC_S)
+ goto slow_jcc;
+ break;
+ default:
+ slow_jcc:
+ return 0;
+ }
+ return 1;
+}
+
+/* generate a conditional jump to label 'l1' according to jump opcode
+ value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+{
+ int inv, jcc_op, size, cond;
+ TCGv t0;
+
+ inv = b & 1;
+ jcc_op = (b >> 1) & 7;
+
+ switch(cc_op) {
+ /* we optimize the cmp/jcc case */
+ case CC_OP_SUBB:
+ case CC_OP_SUBW:
+ case CC_OP_SUBL:
+ case CC_OP_SUBQ:
+
+ size = cc_op - CC_OP_SUBB;
+ switch(jcc_op) {
+ case JCC_Z:
+ fast_jcc_z:
+ switch(size) {
+ case 0:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
+ t0 = cpu_tmp0;
+ break;
+ case 1:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
+ t0 = cpu_tmp0;
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
+ t0 = cpu_tmp0;
+ break;
+#endif
+ default:
+ t0 = cpu_cc_dst;
+ break;
+ }
+ tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
+ break;
+ case JCC_S:
+ fast_jcc_s:
+ switch(size) {
+ case 0:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ 0, l1);
+ break;
+ case 1:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ 0, l1);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ 0, l1);
+ break;
+#endif
+ default:
+ tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
+ 0, l1);
+ break;
+ }
+ break;
+
+ case JCC_B:
+ cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
+ goto fast_jcc_b;
+ case JCC_BE:
+ cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
+ fast_jcc_b:
+ tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+ switch(size) {
+ case 0:
+ t0 = cpu_tmp0;
+ tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
+ tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
+ break;
+ case 1:
+ t0 = cpu_tmp0;
+ tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
+ tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ t0 = cpu_tmp0;
+ tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
+ tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
+ break;
+#endif
+ default:
+ t0 = cpu_cc_src;
+ break;
+ }
+ tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+ break;
+
+ case JCC_L:
+ cond = inv ? TCG_COND_GE : TCG_COND_LT;
+ goto fast_jcc_l;
+ case JCC_LE:
+ cond = inv ? TCG_COND_GT : TCG_COND_LE;
+ fast_jcc_l:
+ tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+ switch(size) {
+ case 0:
+ t0 = cpu_tmp0;
+ tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
+ tcg_gen_ext8s_tl(t0, cpu_cc_src);
+ break;
+ case 1:
+ t0 = cpu_tmp0;
+ tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
+ tcg_gen_ext16s_tl(t0, cpu_cc_src);
+ break;
+#ifdef TARGET_X86_64
+ case 2:
+ t0 = cpu_tmp0;
+ tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
+ tcg_gen_ext32s_tl(t0, cpu_cc_src);
+ break;
+#endif
+ default:
+ t0 = cpu_cc_src;
+ break;
+ }
+ tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+ break;
+
+ default:
+ goto slow_jcc;
+ }
+ break;
+
+ /* some jumps are easy to compute */
+ case CC_OP_ADDB:
+ case CC_OP_ADDW:
+ case CC_OP_ADDL:
+ case CC_OP_ADDQ:
+
+ case CC_OP_ADCB:
+ case CC_OP_ADCW:
+ case CC_OP_ADCL:
+ case CC_OP_ADCQ:
+
+ case CC_OP_SBBB:
+ case CC_OP_SBBW:
+ case CC_OP_SBBL:
+ case CC_OP_SBBQ:
+
+ case CC_OP_LOGICB:
+ case CC_OP_LOGICW:
+ case CC_OP_LOGICL:
+ case CC_OP_LOGICQ:
+
+ case CC_OP_INCB:
+ case CC_OP_INCW:
+ case CC_OP_INCL:
+ case CC_OP_INCQ:
+
+ case CC_OP_DECB:
+ case CC_OP_DECW:
+ case CC_OP_DECL:
+ case CC_OP_DECQ:
+
+ case CC_OP_SHLB:
+ case CC_OP_SHLW:
+ case CC_OP_SHLL:
+ case CC_OP_SHLQ:
+
+ case CC_OP_SARB:
+ case CC_OP_SARW:
+ case CC_OP_SARL:
+ case CC_OP_SARQ:
+ switch(jcc_op) {
+ case JCC_Z:
+ size = (cc_op - CC_OP_ADDB) & 3;
+ goto fast_jcc_z;
+ case JCC_S:
+ size = (cc_op - CC_OP_ADDB) & 3;
+ goto fast_jcc_s;
+ default:
+ goto slow_jcc;
+ }
+ break;
+ default:
+ slow_jcc:
+ gen_setcc_slow_T0(s, jcc_op);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
+ cpu_T[0], 0, l1);
+ break;
+ }
+}
+
+/* XXX: does not work with gdbstub "ice" single step - not a
+ serious problem */
+static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
+{
+ int l1, l2;
+
+ l1 = gen_new_label();
+ l2 = gen_new_label();
+ gen_op_jnz_ecx(s->aflag, l1);
+ gen_set_label(l2);
+ gen_jmp_tb(s, next_eip, 1);
+ gen_set_label(l1);
+ return l2;
+}
+
+static inline void gen_stos(DisasContext *s, int ot)
+{
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ gen_string_movl_A0_EDI(s);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_lods(DisasContext *s, int ot)
+{
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_op_mov_reg_T0(ot, R_EAX);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+}
+
+static inline void gen_scas(DisasContext *s, int ot)
+{
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ gen_string_movl_A0_EDI(s);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_cmpl_T0_T1_cc();
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_cmps(DisasContext *s, int ot)
+{
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_string_movl_A0_EDI(s);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_cmpl_T0_T1_cc();
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+}
+
+static inline void gen_ins(DisasContext *s, int ot)
+{
+ if (use_icount)
+ gen_io_start();
+ gen_string_movl_A0_EDI(s);
+ /* Note: we must do this dummy write first to be restartable in
+ case of page fault. */
+ gen_op_movl_T0_0();
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_EDI);
+ if (use_icount)
+ gen_io_end();
+}
+
+static inline void gen_outs(DisasContext *s, int ot)
+{
+ if (use_icount)
+ gen_io_start();
+ gen_string_movl_A0_ESI(s);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EDX);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
+ gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+
+ gen_op_movl_T0_Dshift(ot);
+ gen_op_add_reg_T0(s->aflag, R_ESI);
+ if (use_icount)
+ gen_io_end();
+}
+
+/* same method as Valgrind : we generate jumps to current or next
+ instruction */
+#define GEN_REPZ(op) \
+static inline void gen_repz_ ## op(DisasContext *s, int ot, \
+ target_ulong cur_eip, target_ulong next_eip) \
+{ \
+ int l2;\
+ gen_update_cc_op(s); \
+ l2 = gen_jz_ecx_string(s, next_eip); \
+ gen_ ## op(s, ot); \
+ gen_op_add_reg_im(s->aflag, R_ECX, -1); \
+ /* a loop would cause two single step exceptions if ECX = 1 \
+ before rep string_insn */ \
+ if (!s->jmp_opt) \
+ gen_op_jz_ecx(s->aflag, l2); \
+ gen_jmp(s, cur_eip); \
+}
+
+#define GEN_REPZ2(op) \
+static inline void gen_repz_ ## op(DisasContext *s, int ot, \
+ target_ulong cur_eip, \
+ target_ulong next_eip, \
+ int nz) \
+{ \
+ int l2;\
+ gen_update_cc_op(s); \
+ l2 = gen_jz_ecx_string(s, next_eip); \
+ gen_ ## op(s, ot); \
+ gen_op_add_reg_im(s->aflag, R_ECX, -1); \
+ gen_op_set_cc_op(CC_OP_SUBB + ot); \
+ gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \
+ if (!s->jmp_opt) \
+ gen_op_jz_ecx(s->aflag, l2); \
+ gen_jmp(s, cur_eip); \
+}
+
+GEN_REPZ(movs)
+GEN_REPZ(stos)
+GEN_REPZ(lods)
+GEN_REPZ(ins)
+GEN_REPZ(outs)
+GEN_REPZ2(scas)
+GEN_REPZ2(cmps)
+
+static void gen_helper_fp_arith_ST0_FT0(int op)
+{
+ switch (op) {
+ case 0: gen_helper_fadd_ST0_FT0(); break;
+ case 1: gen_helper_fmul_ST0_FT0(); break;
+ case 2: gen_helper_fcom_ST0_FT0(); break;
+ case 3: gen_helper_fcom_ST0_FT0(); break;
+ case 4: gen_helper_fsub_ST0_FT0(); break;
+ case 5: gen_helper_fsubr_ST0_FT0(); break;
+ case 6: gen_helper_fdiv_ST0_FT0(); break;
+ case 7: gen_helper_fdivr_ST0_FT0(); break;
+ }
+}
+
+/* NOTE the exception in "r" op ordering */
+static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
+{
+ TCGv_i32 tmp = tcg_const_i32(opreg);
+ switch (op) {
+ case 0: gen_helper_fadd_STN_ST0(tmp); break;
+ case 1: gen_helper_fmul_STN_ST0(tmp); break;
+ case 4: gen_helper_fsubr_STN_ST0(tmp); break;
+ case 5: gen_helper_fsub_STN_ST0(tmp); break;
+ case 6: gen_helper_fdivr_STN_ST0(tmp); break;
+ case 7: gen_helper_fdiv_STN_ST0(tmp); break;
+ }
+}
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_op(DisasContext *s1, int op, int ot, int d)
+{
+ if (d != OR_TMP0) {
+ gen_op_mov_TN_reg(ot, 0, d);
+ } else {
+ gen_op_ld_T0_A0(ot + s1->mem_index);
+ }
+ switch(op) {
+ case OP_ADCL:
+ if (s1->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s1->cc_op);
+ gen_compute_eflags_c(cpu_tmp4);
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
+ tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
+ tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
+ s1->cc_op = CC_OP_DYNAMIC;
+ break;
+ case OP_SBBL:
+ if (s1->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s1->cc_op);
+ gen_compute_eflags_c(cpu_tmp4);
+ tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
+ tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
+ tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
+ s1->cc_op = CC_OP_DYNAMIC;
+ break;
+ case OP_ADDL:
+ gen_op_addl_T0_T1();
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update2_cc();
+ s1->cc_op = CC_OP_ADDB + ot;
+ break;
+ case OP_SUBL:
+ tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update2_cc();
+ s1->cc_op = CC_OP_SUBB + ot;
+ break;
+ default:
+ case OP_ANDL:
+ tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update1_cc();
+ s1->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case OP_ORL:
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update1_cc();
+ s1->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case OP_XORL:
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_op_update1_cc();
+ s1->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case OP_CMPL:
+ gen_op_cmpl_T0_T1_cc();
+ s1->cc_op = CC_OP_SUBB + ot;
+ break;
+ }
+}
+
+/* if d == OR_TMP0, it means memory operand (address in A0) */
+static void gen_inc(DisasContext *s1, int ot, int d, int c)
+{
+ if (d != OR_TMP0)
+ gen_op_mov_TN_reg(ot, 0, d);
+ else
+ gen_op_ld_T0_A0(ot + s1->mem_index);
+ if (s1->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s1->cc_op);
+ if (c > 0) {
+ tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
+ s1->cc_op = CC_OP_INCB + ot;
+ } else {
+ tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
+ s1->cc_op = CC_OP_DECB + ot;
+ }
+ if (d != OR_TMP0)
+ gen_op_mov_reg_T0(ot, d);
+ else
+ gen_op_st_T0_A0(ot + s1->mem_index);
+ gen_compute_eflags_c(cpu_cc_src);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
+static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right, int is_arith)
+{
+ target_ulong mask;
+ int shift_label;
+ TCGv t0, t1;
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_TN_reg(ot, 0, op1);
+
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+
+ tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
+
+ if (is_right) {
+ if (is_arith) {
+ gen_exts(ot, cpu_T[0]);
+ tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+ tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else {
+ gen_extu(ot, cpu_T[0]);
+ tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ }
+ } else {
+ tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
+ tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ }
+
+ /* store */
+ if (op1 == OR_TMP0)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, op1);
+
+ /* update eflags if non zero shift */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ /* XXX: inefficient */
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+
+ tcg_gen_mov_tl(t0, cpu_T[0]);
+ tcg_gen_mov_tl(t1, cpu_T3);
+
+ shift_label = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
+
+ tcg_gen_mov_tl(cpu_cc_src, t1);
+ tcg_gen_mov_tl(cpu_cc_dst, t0);
+ if (is_right)
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+ else
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+
+ gen_set_label(shift_label);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+}
+
+static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
+ int is_right, int is_arith)
+{
+ int mask;
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_TN_reg(ot, 0, op1);
+
+ op2 &= mask;
+ if (op2 != 0) {
+ if (is_right) {
+ if (is_arith) {
+ gen_exts(ot, cpu_T[0]);
+ tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
+ } else {
+ gen_extu(ot, cpu_T[0]);
+ tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
+ }
+ } else {
+ tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
+ tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
+ }
+ }
+
+ /* store */
+ if (op1 == OR_TMP0)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, op1);
+
+ /* update eflags if non zero shift */
+ if (op2 != 0) {
+ tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ if (is_right)
+ s->cc_op = CC_OP_SARB + ot;
+ else
+ s->cc_op = CC_OP_SHLB + ot;
+ }
+}
+
+static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
+{
+ if (arg2 >= 0)
+ tcg_gen_shli_tl(ret, arg1, arg2);
+ else
+ tcg_gen_shri_tl(ret, arg1, -arg2);
+}
+
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right)
+{
+ target_ulong mask;
+ int label1, label2, data_bits;
+ TCGv t0, t1, t2, a0;
+
+ /* XXX: inefficient, but we must use local temps */
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0) {
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, op1);
+ }
+
+ tcg_gen_mov_tl(t1, cpu_T[1]);
+
+ tcg_gen_andi_tl(t1, t1, mask);
+
+ /* Must test zero case to avoid using undefined behaviour in TCG
+ shifts. */
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
+
+ if (ot <= OT_WORD)
+ tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
+ else
+ tcg_gen_mov_tl(cpu_tmp0, t1);
+
+ gen_extu(ot, t0);
+ tcg_gen_mov_tl(t2, t0);
+
+ data_bits = 8 << ot;
+ /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
+ fix TCG definition) */
+ if (is_right) {
+ tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
+ tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
+ tcg_gen_shl_tl(t0, t0, cpu_tmp0);
+ } else {
+ tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
+ tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
+ tcg_gen_shr_tl(t0, t0, cpu_tmp0);
+ }
+ tcg_gen_or_tl(t0, t0, cpu_tmp4);
+
+ gen_set_label(label1);
+ /* store */
+ if (op1 == OR_TMP0) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_reg_v(ot, op1, t0);
+ }
+
+ /* update eflags */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ label2 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
+
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
+ tcg_gen_xor_tl(cpu_tmp0, t2, t0);
+ tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+ if (is_right) {
+ tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ }
+ tcg_gen_andi_tl(t0, t0, CC_C);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+
+ tcg_gen_discard_tl(cpu_cc_dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+
+ gen_set_label(label2);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ tcg_temp_free(a0);
+}
+
+static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
+ int is_right)
+{
+ int mask;
+ int data_bits;
+ TCGv t0, t1, a0;
+
+ /* XXX: inefficient, but we must use local temps */
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0) {
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, op1);
+ }
+
+ gen_extu(ot, t0);
+ tcg_gen_mov_tl(t1, t0);
+
+ op2 &= mask;
+ data_bits = 8 << ot;
+ if (op2 != 0) {
+ int shift = op2 & ((1 << (3 + ot)) - 1);
+ if (is_right) {
+ tcg_gen_shri_tl(cpu_tmp4, t0, shift);
+ tcg_gen_shli_tl(t0, t0, data_bits - shift);
+ }
+ else {
+ tcg_gen_shli_tl(cpu_tmp4, t0, shift);
+ tcg_gen_shri_tl(t0, t0, data_bits - shift);
+ }
+ tcg_gen_or_tl(t0, t0, cpu_tmp4);
+ }
+
+ /* store */
+ if (op1 == OR_TMP0) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_reg_v(ot, op1, t0);
+ }
+
+ if (op2 != 0) {
+ /* update eflags */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
+ tcg_gen_xor_tl(cpu_tmp0, t1, t0);
+ tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+ if (is_right) {
+ tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ }
+ tcg_gen_andi_tl(t0, t0, CC_C);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+
+ tcg_gen_discard_tl(cpu_cc_dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+ s->cc_op = CC_OP_EFLAGS;
+ }
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(a0);
+}
+
+/* XXX: add faster immediate = 1 case */
+static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right)
+{
+ int label1;
+
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ /* load */
+ if (op1 == OR_TMP0)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_TN_reg(ot, 0, op1);
+
+ if (is_right) {
+ switch (ot) {
+ case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#ifdef TARGET_X86_64
+ case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#endif
+ }
+ } else {
+ switch (ot) {
+ case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+ case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#ifdef TARGET_X86_64
+ case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+#endif
+ }
+ }
+ /* store */
+ if (op1 == OR_TMP0)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, op1);
+
+ /* update eflags */
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
+
+ tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
+ tcg_gen_discard_tl(cpu_cc_dst);
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
+
+ gen_set_label(label1);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+}
+
+/* XXX: add faster immediate case */
+static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
+ int is_right)
+{
+ int label1, label2, data_bits;
+ target_ulong mask;
+ TCGv t0, t1, t2, a0;
+
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+
+ if (ot == OT_QUAD)
+ mask = 0x3f;
+ else
+ mask = 0x1f;
+
+ /* load */
+ if (op1 == OR_TMP0) {
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, op1);
+ }
+
+ tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
+
+ tcg_gen_mov_tl(t1, cpu_T[1]);
+ tcg_gen_mov_tl(t2, cpu_T3);
+
+ /* Must test zero case to avoid using undefined behaviour in TCG
+ shifts. */
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+
+ tcg_gen_addi_tl(cpu_tmp5, t2, -1);
+ if (ot == OT_WORD) {
+ /* Note: we implement the Intel behaviour for shift count > 16 */
+ if (is_right) {
+ tcg_gen_andi_tl(t0, t0, 0xffff);
+ tcg_gen_shli_tl(cpu_tmp0, t1, 16);
+ tcg_gen_or_tl(t0, t0, cpu_tmp0);
+ tcg_gen_ext32u_tl(t0, t0);
+
+ tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+
+ /* only needed if count > 16, but a test would complicate */
+ tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
+ tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
+
+ tcg_gen_shr_tl(t0, t0, t2);
+
+ tcg_gen_or_tl(t0, t0, cpu_tmp0);
+ } else {
+ /* XXX: not optimal */
+ tcg_gen_andi_tl(t0, t0, 0xffff);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(t1, t1, t0);
+ tcg_gen_ext32u_tl(t1, t1);
+
+ tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
+ tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5);
+ tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0);
+ tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5);
+
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
+ tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+ tcg_gen_or_tl(t0, t0, t1);
+ }
+ } else {
+ data_bits = 8 << ot;
+ if (is_right) {
+ if (ot == OT_LONG)
+ tcg_gen_ext32u_tl(t0, t0);
+
+ tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+
+ tcg_gen_shr_tl(t0, t0, t2);
+ tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
+ tcg_gen_shl_tl(t1, t1, cpu_tmp5);
+ tcg_gen_or_tl(t0, t0, t1);
+
+ } else {
+ if (ot == OT_LONG)
+ tcg_gen_ext32u_tl(t1, t1);
+
+ tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
+
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
+ tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+ tcg_gen_or_tl(t0, t0, t1);
+ }
+ }
+ tcg_gen_mov_tl(t1, cpu_tmp4);
+
+ gen_set_label(label1);
+ /* store */
+ if (op1 == OR_TMP0) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ } else {
+ gen_op_mov_reg_v(ot, op1, t0);
+ }
+
+ /* update eflags */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+
+ label2 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
+
+ tcg_gen_mov_tl(cpu_cc_src, t1);
+ tcg_gen_mov_tl(cpu_cc_dst, t0);
+ if (is_right) {
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+ } else {
+ tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+ }
+ gen_set_label(label2);
+ s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ tcg_temp_free(a0);
+}
+
+static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
+{
+ if (s != OR_TMP1)
+ gen_op_mov_TN_reg(ot, 1, s);
+ switch(op) {
+ case OP_ROL:
+ gen_rot_rm_T1(s1, ot, d, 0);
+ break;
+ case OP_ROR:
+ gen_rot_rm_T1(s1, ot, d, 1);
+ break;
+ case OP_SHL:
+ case OP_SHL1:
+ gen_shift_rm_T1(s1, ot, d, 0, 0);
+ break;
+ case OP_SHR:
+ gen_shift_rm_T1(s1, ot, d, 1, 0);
+ break;
+ case OP_SAR:
+ gen_shift_rm_T1(s1, ot, d, 1, 1);
+ break;
+ case OP_RCL:
+ gen_rotc_rm_T1(s1, ot, d, 0);
+ break;
+ case OP_RCR:
+ gen_rotc_rm_T1(s1, ot, d, 1);
+ break;
+ }
+}
+
+static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
+{
+ switch(op) {
+ case OP_ROL:
+ gen_rot_rm_im(s1, ot, d, c, 0);
+ break;
+ case OP_ROR:
+ gen_rot_rm_im(s1, ot, d, c, 1);
+ break;
+ case OP_SHL:
+ case OP_SHL1:
+ gen_shift_rm_im(s1, ot, d, c, 0, 0);
+ break;
+ case OP_SHR:
+ gen_shift_rm_im(s1, ot, d, c, 1, 0);
+ break;
+ case OP_SAR:
+ gen_shift_rm_im(s1, ot, d, c, 1, 1);
+ break;
+ default:
+ /* currently not optimized */
+ gen_op_movl_T1_im(c);
+ gen_shift(s1, op, ot, d, OR_TMP1);
+ break;
+ }
+}
+
+static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
+{
+ target_long disp;
+ int havesib;
+ int base;
+ int index;
+ int scale;
+ int opreg;
+ int mod, rm, code, override, must_add_seg;
+
+ override = s->override;
+ must_add_seg = s->addseg;
+ if (override >= 0)
+ must_add_seg = 1;
+ mod = (modrm >> 6) & 3;
+ rm = modrm & 7;
+
+ if (s->aflag) {
+
+ havesib = 0;
+ base = rm;
+ index = 0;
+ scale = 0;
+
+ if (base == 4) {
+ havesib = 1;
+ code = ldub_code(s->pc++);
+ scale = (code >> 6) & 3;
+ index = ((code >> 3) & 7) | REX_X(s);
+ base = (code & 7);
+ }
+ base |= REX_B(s);
+
+ switch (mod) {
+ case 0:
+ if ((base & 7) == 5) {
+ base = -1;
+ disp = (int32_t)ldl_code(s->pc);
+ s->pc += 4;
+ if (CODE64(s) && !havesib) {
+ disp += s->pc + s->rip_offset;
+ }
+ } else {
+ disp = 0;
+ }
+ break;
+ case 1:
+ disp = (int8_t)ldub_code(s->pc++);
+ break;
+ default:
+ case 2:
+#ifdef VBOX
+ disp = (int32_t)ldl_code(s->pc);
+#else
+ disp = ldl_code(s->pc);
+#endif
+ s->pc += 4;
+ break;
+ }
+
+ if (base >= 0) {
+ /* for correct popl handling with esp */
+ if (base == 4 && s->popl_esp_hack)
+ disp += s->popl_esp_hack;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(base);
+ if (disp != 0) {
+ gen_op_addq_A0_im(disp);
+ }
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(base);
+ if (disp != 0)
+ gen_op_addl_A0_im(disp);
+ }
+ } else {
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_im(disp);
+ } else
+#endif
+ {
+ gen_op_movl_A0_im(disp);
+ }
+ }
+ /* index == 4 means no index */
+ if (havesib && (index != 4)) {
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_addq_A0_reg_sN(scale, index);
+ } else
+#endif
+ {
+ gen_op_addl_A0_reg_sN(scale, index);
+ }
+ }
+ if (must_add_seg) {
+ if (override < 0) {
+ if (base == R_EBP || base == R_ESP)
+ override = R_SS;
+ else
+ override = R_DS;
+ }
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_addq_A0_seg(override);
+ } else
+#endif
+ {
+ gen_op_addl_A0_seg(override);
+ }
+ }
+ } else {
+ switch (mod) {
+ case 0:
+ if (rm == 6) {
+ disp = lduw_code(s->pc);
+ s->pc += 2;
+ gen_op_movl_A0_im(disp);
+ rm = 0; /* avoid SS override */
+ goto no_rm;
+ } else {
+ disp = 0;
+ }
+ break;
+ case 1:
+ disp = (int8_t)ldub_code(s->pc++);
+ break;
+ default:
+ case 2:
+ disp = lduw_code(s->pc);
+ s->pc += 2;
+ break;
+ }
+ switch(rm) {
+ case 0:
+ gen_op_movl_A0_reg(R_EBX);
+ gen_op_addl_A0_reg_sN(0, R_ESI);
+ break;
+ case 1:
+ gen_op_movl_A0_reg(R_EBX);
+ gen_op_addl_A0_reg_sN(0, R_EDI);
+ break;
+ case 2:
+ gen_op_movl_A0_reg(R_EBP);
+ gen_op_addl_A0_reg_sN(0, R_ESI);
+ break;
+ case 3:
+ gen_op_movl_A0_reg(R_EBP);
+ gen_op_addl_A0_reg_sN(0, R_EDI);
+ break;
+ case 4:
+ gen_op_movl_A0_reg(R_ESI);
+ break;
+ case 5:
+ gen_op_movl_A0_reg(R_EDI);
+ break;
+ case 6:
+ gen_op_movl_A0_reg(R_EBP);
+ break;
+ default:
+ case 7:
+ gen_op_movl_A0_reg(R_EBX);
+ break;
+ }
+ if (disp != 0)
+ gen_op_addl_A0_im(disp);
+ gen_op_andl_A0_ffff();
+ no_rm:
+ if (must_add_seg) {
+ if (override < 0) {
+ if (rm == 2 || rm == 3 || rm == 6)
+ override = R_SS;
+ else
+ override = R_DS;
+ }
+ gen_op_addl_A0_seg(override);
+ }
+ }
+
+ opreg = OR_A0;
+ disp = 0;
+ *reg_ptr = opreg;
+ *offset_ptr = disp;
+}
+
+static void gen_nop_modrm(DisasContext *s, int modrm)
+{
+ int mod, rm, base, code;
+
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ return;
+ rm = modrm & 7;
+
+ if (s->aflag) {
+
+ base = rm;
+
+ if (base == 4) {
+ code = ldub_code(s->pc++);
+ base = (code & 7);
+ }
+
+ switch (mod) {
+ case 0:
+ if (base == 5) {
+ s->pc += 4;
+ }
+ break;
+ case 1:
+ s->pc++;
+ break;
+ default:
+ case 2:
+ s->pc += 4;
+ break;
+ }
+ } else {
+ switch (mod) {
+ case 0:
+ if (rm == 6) {
+ s->pc += 2;
+ }
+ break;
+ case 1:
+ s->pc++;
+ break;
+ default:
+ case 2:
+ s->pc += 2;
+ break;
+ }
+ }
+}
+
+/* used for LEA and MOV AX, mem */
+static void gen_add_A0_ds_seg(DisasContext *s)
+{
+ int override, must_add_seg;
+ must_add_seg = s->addseg;
+ override = R_DS;
+ if (s->override >= 0) {
+ override = s->override;
+ must_add_seg = 1;
+ }
+ if (must_add_seg) {
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_addq_A0_seg(override);
+ } else
+#endif
+ {
+ gen_op_addl_A0_seg(override);
+ }
+ }
+}
+
+/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
+ OR_TMP0 */
+static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
+{
+ int mod, rm, opreg, disp;
+
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod == 3) {
+ if (is_store) {
+ if (reg != OR_TMP0)
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_mov_reg_T0(ot, rm);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ if (reg != OR_TMP0)
+ gen_op_mov_reg_T0(ot, reg);
+ }
+ } else {
+ gen_lea_modrm(s, modrm, &opreg, &disp);
+ if (is_store) {
+ if (reg != OR_TMP0)
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ if (reg != OR_TMP0)
+ gen_op_mov_reg_T0(ot, reg);
+ }
+ }
+}
+
+static inline uint32_t insn_get(DisasContext *s, int ot)
+{
+ uint32_t ret;
+
+ switch(ot) {
+ case OT_BYTE:
+ ret = ldub_code(s->pc);
+ s->pc++;
+ break;
+ case OT_WORD:
+ ret = lduw_code(s->pc);
+ s->pc += 2;
+ break;
+ default:
+ case OT_LONG:
+ ret = ldl_code(s->pc);
+ s->pc += 4;
+ break;
+ }
+ return ret;
+}
+
+static inline int insn_const_size(unsigned int ot)
+{
+ if (ot <= OT_LONG)
+ return 1 << ot;
+ else
+ return 4;
+}
+
+static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
+{
+ TranslationBlock *tb;
+ target_ulong pc;
+
+ pc = s->cs_base + eip;
+ tb = s->tb;
+ /* NOTE: we handle the case where the TB spans two pages here */
+ if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
+ (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) {
+#ifdef VBOX
+ gen_check_external_event();
+#endif /* VBOX */
+ /* jump to same page: we can use a direct jump */
+ tcg_gen_goto_tb(tb_num);
+ gen_jmp_im(eip);
+ tcg_gen_exit_tb((intptr_t)tb + tb_num);
+ } else {
+ /* jump to another page: currently not optimized */
+ gen_jmp_im(eip);
+ gen_eob(s);
+ }
+}
+
+static inline void gen_jcc(DisasContext *s, int b,
+ target_ulong val, target_ulong next_eip)
+{
+ int l1, l2, cc_op;
+
+ cc_op = s->cc_op;
+ gen_update_cc_op(s);
+ if (s->jmp_opt) {
+ l1 = gen_new_label();
+ gen_jcc1(s, cc_op, b, l1);
+
+ gen_goto_tb(s, 0, next_eip);
+
+ gen_set_label(l1);
+ gen_goto_tb(s, 1, val);
+ s->is_jmp = DISAS_TB_JUMP;
+ } else {
+
+ l1 = gen_new_label();
+ l2 = gen_new_label();
+ gen_jcc1(s, cc_op, b, l1);
+
+ gen_jmp_im(next_eip);
+ tcg_gen_br(l2);
+
+ gen_set_label(l1);
+ gen_jmp_im(val);
+ gen_set_label(l2);
+ gen_eob(s);
+ }
+}
+
+static void gen_setcc(DisasContext *s, int b)
+{
+ int inv, jcc_op, l1;
+ TCGv t0;
+
+ if (is_fast_jcc_case(s, b)) {
+ /* nominal case: we use a jump */
+ /* XXX: make it faster by adding new instructions in TCG */
+ t0 = tcg_temp_local_new();
+ tcg_gen_movi_tl(t0, 0);
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ tcg_gen_movi_tl(t0, 1);
+ gen_set_label(l1);
+ tcg_gen_mov_tl(cpu_T[0], t0);
+ tcg_temp_free(t0);
+ } else {
+ /* slow case: it is more efficient not to generate a jump,
+ although it is questionnable whether this optimization is
+ worth to */
+ inv = b & 1;
+ jcc_op = (b >> 1) & 7;
+ gen_setcc_slow_T0(s, jcc_op);
+ if (inv) {
+ tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
+ }
+ }
+}
+
+static inline void gen_op_movl_T0_seg(int seg_reg)
+{
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[seg_reg].selector));
+}
+
+static inline void gen_op_movl_seg_T0_vm(int seg_reg)
+{
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[seg_reg].selector));
+ tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
+ tcg_gen_st_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[seg_reg].base));
+}
+
+/* move T0 to seg_reg and compute if the CPU state may change. Never
+ call this function with seg_reg == R_CS */
+static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
+{
+ if (s->pe && !s->vm86) {
+ /* XXX: optimize by finding processor state dynamically */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32);
+ /* abort translation because the addseg value may change or
+ because ss32 may change. For R_SS, translation must always
+ stop as a special handling must be done to disable hardware
+ interrupts for the next instruction */
+ if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
+ s->is_jmp = DISAS_TB_JUMP;
+ } else {
+ gen_op_movl_seg_T0_vm(seg_reg);
+ if (seg_reg == R_SS)
+ s->is_jmp = DISAS_TB_JUMP;
+ }
+}
+
+static inline int svm_is_rep(int prefixes)
+{
+ return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
+}
+
+static inline void
+gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
+ uint32_t type, uint64_t param)
+{
+ /* no SVM activated; fast case */
+ if (likely(!(s->flags & HF_SVMI_MASK)))
+ return;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_svm_check_intercept_param(tcg_const_i32(type),
+ tcg_const_i64(param));
+}
+
+static inline void
+gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
+{
+ gen_svm_check_intercept_param(s, pc_start, type, 0);
+}
+
+static inline void gen_stack_update(DisasContext *s, int addend)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_add_reg_im(2, R_ESP, addend);
+ } else
+#endif
+ if (s->ss32) {
+ gen_op_add_reg_im(1, R_ESP, addend);
+ } else {
+ gen_op_add_reg_im(0, R_ESP, addend);
+ }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+static void gen_push_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_movq_A0_reg(R_ESP);
+ if (s->dflag) {
+ gen_op_addq_A0_im(-8);
+ gen_op_st_T0_A0(OT_QUAD + s->mem_index);
+ } else {
+ gen_op_addq_A0_im(-2);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ }
+ gen_op_mov_reg_A0(2, R_ESP);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->dflag)
+ gen_op_addl_A0_im(-2);
+ else
+ gen_op_addl_A0_im(-4);
+ if (s->ss32) {
+ if (s->addseg) {
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ gen_op_addl_A0_seg(R_SS);
+ }
+ } else {
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ gen_op_addl_A0_seg(R_SS);
+ }
+ gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
+ if (s->ss32 && !s->addseg)
+ gen_op_mov_reg_A0(1, R_ESP);
+ else
+ gen_op_mov_reg_T1(s->ss32 + 1, R_ESP);
+ }
+}
+
+/* generate a push. It depends on ss32, addseg and dflag */
+/* slower version for T1, only used for call Ev */
+static void gen_push_T1(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_movq_A0_reg(R_ESP);
+ if (s->dflag) {
+ gen_op_addq_A0_im(-8);
+ gen_op_st_T1_A0(OT_QUAD + s->mem_index);
+ } else {
+ gen_op_addq_A0_im(-2);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ }
+ gen_op_mov_reg_A0(2, R_ESP);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->dflag)
+ gen_op_addl_A0_im(-2);
+ else
+ gen_op_addl_A0_im(-4);
+ if (s->ss32) {
+ if (s->addseg) {
+ gen_op_addl_A0_seg(R_SS);
+ }
+ } else {
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(R_SS);
+ }
+ gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
+
+ if (s->ss32 && !s->addseg)
+ gen_op_mov_reg_A0(1, R_ESP);
+ else
+ gen_stack_update(s, -(2 << s->dflag));
+ }
+}
+
+/* two step pop is necessary for precise exceptions */
+static void gen_pop_T0(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ gen_op_movq_A0_reg(R_ESP);
+ gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_ESP);
+ if (s->ss32) {
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ } else {
+ gen_op_andl_A0_ffff();
+ gen_op_addl_A0_seg(R_SS);
+ }
+ gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
+ }
+}
+
+static void gen_pop_update(DisasContext *s)
+{
+#ifdef TARGET_X86_64
+ if (CODE64(s) && s->dflag) {
+ gen_stack_update(s, 8);
+ } else
+#endif
+ {
+ gen_stack_update(s, 2 << s->dflag);
+ }
+}
+
+static void gen_stack_A0(DisasContext *s)
+{
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_pusha(DisasContext *s)
+{
+ int i;
+ gen_op_movl_A0_reg(R_ESP);
+ gen_op_addl_A0_im(-(16 << s->dflag));
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ for(i = 0;i < 8; i++) {
+ gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
+ gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
+ gen_op_addl_A0_im(2 << s->dflag);
+ }
+ gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+}
+
+/* NOTE: wrap around in 16 bit not fully handled */
+static void gen_popa(DisasContext *s)
+{
+ int i;
+ gen_op_movl_A0_reg(R_ESP);
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 << s->dflag);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ for(i = 0;i < 8; i++) {
+ /* ESP is not reloaded */
+ if (i != 3) {
+ gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index);
+ gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i);
+ }
+ gen_op_addl_A0_im(2 << s->dflag);
+ }
+ gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+}
+
+static void gen_enter(DisasContext *s, int esp_addend, int level)
+{
+ int ot, opsize;
+
+ level &= 0x1f;
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ ot = s->dflag ? OT_QUAD : OT_WORD;
+ opsize = 1 << ot;
+
+ gen_op_movl_A0_reg(R_ESP);
+ gen_op_addq_A0_im(-opsize);
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+
+ /* push bp */
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ if (level) {
+ /* XXX: must save state */
+ gen_helper_enter64_level(tcg_const_i32(level),
+ tcg_const_i32((ot == OT_QUAD)),
+ cpu_T[1]);
+ }
+ gen_op_mov_reg_T1(ot, R_EBP);
+ tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
+ gen_op_mov_reg_T1(OT_QUAD, R_ESP);
+ } else
+#endif
+ {
+ ot = s->dflag + OT_WORD;
+ opsize = 2 << s->dflag;
+
+ gen_op_movl_A0_reg(R_ESP);
+ gen_op_addl_A0_im(-opsize);
+ if (!s->ss32)
+ gen_op_andl_A0_ffff();
+ tcg_gen_mov_tl(cpu_T[1], cpu_A0);
+ if (s->addseg)
+ gen_op_addl_A0_seg(R_SS);
+ /* push bp */
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ if (level) {
+ /* XXX: must save state */
+ gen_helper_enter_level(tcg_const_i32(level),
+ tcg_const_i32(s->dflag),
+ cpu_T[1]);
+ }
+ gen_op_mov_reg_T1(ot, R_EBP);
+ tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
+ gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP);
+ }
+}
+
+static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ gen_helper_raise_exception(tcg_const_i32(trapno));
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+/* an interrupt is different from an exception because of the
+ privilege checks */
+static void gen_interrupt(DisasContext *s, int intno,
+ target_ulong cur_eip, target_ulong next_eip)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ gen_helper_raise_interrupt(tcg_const_i32(intno),
+ tcg_const_i32(next_eip - cur_eip));
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+static void gen_debug(DisasContext *s, target_ulong cur_eip)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(cur_eip);
+ gen_helper_debug();
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+/* generate a generic end of block. Trace exception is also generated
+ if needed */
+static void gen_eob(DisasContext *s)
+{
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
+ gen_helper_reset_inhibit_irq();
+ }
+ if (s->tb->flags & HF_RF_MASK) {
+ gen_helper_reset_rf();
+ }
+ if ( s->singlestep_enabled
+#ifdef VBOX
+ && ( !(cpu_single_env->state & CPU_EMULATE_SINGLE_STEP)
+ || !(s->prefix & (PREFIX_REPNZ | PREFIX_REPZ) ))
+#endif
+ ) {
+ gen_helper_debug();
+ } else if (s->tf) {
+ gen_helper_single_step();
+ } else {
+ tcg_gen_exit_tb(0);
+ }
+ s->is_jmp = DISAS_TB_JUMP;
+}
+
+/* generate a jump to eip. No segment change must happen before as a
+ direct call to the next block may occur */
+static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
+{
+ if (s->jmp_opt) {
+ gen_update_cc_op(s);
+ gen_goto_tb(s, tb_num, eip);
+ s->is_jmp = DISAS_TB_JUMP;
+ } else {
+ gen_jmp_im(eip);
+ gen_eob(s);
+ }
+}
+
+static void gen_jmp(DisasContext *s, target_ulong eip)
+{
+ gen_jmp_tb(s, eip, 0);
+}
+
+static inline void gen_ldq_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
+}
+
+static inline void gen_stq_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+}
+
+static inline void gen_ldo_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+ tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+}
+
+static inline void gen_sto_env_A0(int idx, int offset)
+{
+ int mem_index = (idx >> 2) - 1;
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index);
+ tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index);
+}
+
+static inline void gen_op_movo(int d_offset, int s_offset)
+{
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
+}
+
+static inline void gen_op_movq(int d_offset, int s_offset)
+{
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+}
+
+static inline void gen_op_movl(int d_offset, int s_offset)
+{
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
+}
+
+static inline void gen_op_movq_env_0(int d_offset)
+{
+ tcg_gen_movi_i64(cpu_tmp1_i64, 0);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
+}
+
+#define SSE_SPECIAL ((void *)1)
+#define SSE_DUMMY ((void *)2)
+
+#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
+#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
+ gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
+
+static void *sse_op_table1[256][4] = {
+ /* 3DNow! extensions */
+ [0x0e] = { SSE_DUMMY }, /* femms */
+ [0x0f] = { SSE_DUMMY }, /* pf... */
+ /* pure SSE operations */
+ [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+ [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
+ [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
+ [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
+ [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
+ [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
+ [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
+ [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
+
+ [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
+ [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
+ [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
+ [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
+ [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
+ [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
+ [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
+ [0x2f] = { gen_helper_comiss, gen_helper_comisd },
+ [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
+ [0x51] = SSE_FOP(sqrt),
+ [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
+ [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
+ [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
+ [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
+ [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
+ [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
+ [0x58] = SSE_FOP(add),
+ [0x59] = SSE_FOP(mul),
+ [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
+ gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
+ [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
+ [0x5c] = SSE_FOP(sub),
+ [0x5d] = SSE_FOP(min),
+ [0x5e] = SSE_FOP(div),
+ [0x5f] = SSE_FOP(max),
+
+ [0xc2] = SSE_FOP(cmpeq),
+ [0xc6] = { gen_helper_shufps, gen_helper_shufpd },
+
+ [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+ [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+
+ /* MMX ops and their SSE extensions */
+ [0x60] = MMX_OP2(punpcklbw),
+ [0x61] = MMX_OP2(punpcklwd),
+ [0x62] = MMX_OP2(punpckldq),
+ [0x63] = MMX_OP2(packsswb),
+ [0x64] = MMX_OP2(pcmpgtb),
+ [0x65] = MMX_OP2(pcmpgtw),
+ [0x66] = MMX_OP2(pcmpgtl),
+ [0x67] = MMX_OP2(packuswb),
+ [0x68] = MMX_OP2(punpckhbw),
+ [0x69] = MMX_OP2(punpckhwd),
+ [0x6a] = MMX_OP2(punpckhdq),
+ [0x6b] = MMX_OP2(packssdw),
+ [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
+ [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
+ [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
+ [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
+ [0x70] = { gen_helper_pshufw_mmx,
+ gen_helper_pshufd_xmm,
+ gen_helper_pshufhw_xmm,
+ gen_helper_pshuflw_xmm },
+ [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
+ [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
+ [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
+ [0x74] = MMX_OP2(pcmpeqb),
+ [0x75] = MMX_OP2(pcmpeqw),
+ [0x76] = MMX_OP2(pcmpeql),
+ [0x77] = { SSE_DUMMY }, /* emms */
+ [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
+ [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
+ [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
+ [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
+ [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
+ [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
+ [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
+ [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
+ [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
+ [0xd1] = MMX_OP2(psrlw),
+ [0xd2] = MMX_OP2(psrld),
+ [0xd3] = MMX_OP2(psrlq),
+ [0xd4] = MMX_OP2(paddq),
+ [0xd5] = MMX_OP2(pmullw),
+ [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
+ [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
+ [0xd8] = MMX_OP2(psubusb),
+ [0xd9] = MMX_OP2(psubusw),
+ [0xda] = MMX_OP2(pminub),
+ [0xdb] = MMX_OP2(pand),
+ [0xdc] = MMX_OP2(paddusb),
+ [0xdd] = MMX_OP2(paddusw),
+ [0xde] = MMX_OP2(pmaxub),
+ [0xdf] = MMX_OP2(pandn),
+ [0xe0] = MMX_OP2(pavgb),
+ [0xe1] = MMX_OP2(psraw),
+ [0xe2] = MMX_OP2(psrad),
+ [0xe3] = MMX_OP2(pavgw),
+ [0xe4] = MMX_OP2(pmulhuw),
+ [0xe5] = MMX_OP2(pmulhw),
+ [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
+ [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
+ [0xe8] = MMX_OP2(psubsb),
+ [0xe9] = MMX_OP2(psubsw),
+ [0xea] = MMX_OP2(pminsw),
+ [0xeb] = MMX_OP2(por),
+ [0xec] = MMX_OP2(paddsb),
+ [0xed] = MMX_OP2(paddsw),
+ [0xee] = MMX_OP2(pmaxsw),
+ [0xef] = MMX_OP2(pxor),
+ [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
+ [0xf1] = MMX_OP2(psllw),
+ [0xf2] = MMX_OP2(pslld),
+ [0xf3] = MMX_OP2(psllq),
+ [0xf4] = MMX_OP2(pmuludq),
+ [0xf5] = MMX_OP2(pmaddwd),
+ [0xf6] = MMX_OP2(psadbw),
+ [0xf7] = MMX_OP2(maskmov),
+ [0xf8] = MMX_OP2(psubb),
+ [0xf9] = MMX_OP2(psubw),
+ [0xfa] = MMX_OP2(psubl),
+ [0xfb] = MMX_OP2(psubq),
+ [0xfc] = MMX_OP2(paddb),
+ [0xfd] = MMX_OP2(paddw),
+ [0xfe] = MMX_OP2(paddl),
+};
+
+static void *sse_op_table2[3 * 8][2] = {
+ [0 + 2] = MMX_OP2(psrlw),
+ [0 + 4] = MMX_OP2(psraw),
+ [0 + 6] = MMX_OP2(psllw),
+ [8 + 2] = MMX_OP2(psrld),
+ [8 + 4] = MMX_OP2(psrad),
+ [8 + 6] = MMX_OP2(pslld),
+ [16 + 2] = MMX_OP2(psrlq),
+ [16 + 3] = { NULL, gen_helper_psrldq_xmm },
+ [16 + 6] = MMX_OP2(psllq),
+ [16 + 7] = { NULL, gen_helper_pslldq_xmm },
+};
+
+static void *sse_op_table3[4 * 3] = {
+ gen_helper_cvtsi2ss,
+ gen_helper_cvtsi2sd,
+ X86_64_ONLY(gen_helper_cvtsq2ss),
+ X86_64_ONLY(gen_helper_cvtsq2sd),
+
+ gen_helper_cvttss2si,
+ gen_helper_cvttsd2si,
+ X86_64_ONLY(gen_helper_cvttss2sq),
+ X86_64_ONLY(gen_helper_cvttsd2sq),
+
+ gen_helper_cvtss2si,
+ gen_helper_cvtsd2si,
+ X86_64_ONLY(gen_helper_cvtss2sq),
+ X86_64_ONLY(gen_helper_cvtsd2sq),
+};
+
+static void *sse_op_table4[8][4] = {
+ SSE_FOP(cmpeq),
+ SSE_FOP(cmplt),
+ SSE_FOP(cmple),
+ SSE_FOP(cmpunord),
+ SSE_FOP(cmpneq),
+ SSE_FOP(cmpnlt),
+ SSE_FOP(cmpnle),
+ SSE_FOP(cmpord),
+};
+
+static void *sse_op_table5[256] = {
+ [0x0c] = gen_helper_pi2fw,
+ [0x0d] = gen_helper_pi2fd,
+ [0x1c] = gen_helper_pf2iw,
+ [0x1d] = gen_helper_pf2id,
+ [0x8a] = gen_helper_pfnacc,
+ [0x8e] = gen_helper_pfpnacc,
+ [0x90] = gen_helper_pfcmpge,
+ [0x94] = gen_helper_pfmin,
+ [0x96] = gen_helper_pfrcp,
+ [0x97] = gen_helper_pfrsqrt,
+ [0x9a] = gen_helper_pfsub,
+ [0x9e] = gen_helper_pfadd,
+ [0xa0] = gen_helper_pfcmpgt,
+ [0xa4] = gen_helper_pfmax,
+ [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
+ [0xa7] = gen_helper_movq, /* pfrsqit1 */
+ [0xaa] = gen_helper_pfsubr,
+ [0xae] = gen_helper_pfacc,
+ [0xb0] = gen_helper_pfcmpeq,
+ [0xb4] = gen_helper_pfmul,
+ [0xb6] = gen_helper_movq, /* pfrcpit2 */
+ [0xb7] = gen_helper_pmulhrw_mmx,
+ [0xbb] = gen_helper_pswapd,
+ [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
+};
+
+struct sse_op_helper_s {
+ void *op[2]; uint32_t ext_mask;
+};
+#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
+#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
+#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
+#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+static struct sse_op_helper_s sse_op_table6[256] = {
+ [0x00] = SSSE3_OP(pshufb),
+ [0x01] = SSSE3_OP(phaddw),
+ [0x02] = SSSE3_OP(phaddd),
+ [0x03] = SSSE3_OP(phaddsw),
+ [0x04] = SSSE3_OP(pmaddubsw),
+ [0x05] = SSSE3_OP(phsubw),
+ [0x06] = SSSE3_OP(phsubd),
+ [0x07] = SSSE3_OP(phsubsw),
+ [0x08] = SSSE3_OP(psignb),
+ [0x09] = SSSE3_OP(psignw),
+ [0x0a] = SSSE3_OP(psignd),
+ [0x0b] = SSSE3_OP(pmulhrsw),
+ [0x10] = SSE41_OP(pblendvb),
+ [0x14] = SSE41_OP(blendvps),
+ [0x15] = SSE41_OP(blendvpd),
+ [0x17] = SSE41_OP(ptest),
+ [0x1c] = SSSE3_OP(pabsb),
+ [0x1d] = SSSE3_OP(pabsw),
+ [0x1e] = SSSE3_OP(pabsd),
+ [0x20] = SSE41_OP(pmovsxbw),
+ [0x21] = SSE41_OP(pmovsxbd),
+ [0x22] = SSE41_OP(pmovsxbq),
+ [0x23] = SSE41_OP(pmovsxwd),
+ [0x24] = SSE41_OP(pmovsxwq),
+ [0x25] = SSE41_OP(pmovsxdq),
+ [0x28] = SSE41_OP(pmuldq),
+ [0x29] = SSE41_OP(pcmpeqq),
+ [0x2a] = SSE41_SPECIAL, /* movntqda */
+ [0x2b] = SSE41_OP(packusdw),
+ [0x30] = SSE41_OP(pmovzxbw),
+ [0x31] = SSE41_OP(pmovzxbd),
+ [0x32] = SSE41_OP(pmovzxbq),
+ [0x33] = SSE41_OP(pmovzxwd),
+ [0x34] = SSE41_OP(pmovzxwq),
+ [0x35] = SSE41_OP(pmovzxdq),
+ [0x37] = SSE42_OP(pcmpgtq),
+ [0x38] = SSE41_OP(pminsb),
+ [0x39] = SSE41_OP(pminsd),
+ [0x3a] = SSE41_OP(pminuw),
+ [0x3b] = SSE41_OP(pminud),
+ [0x3c] = SSE41_OP(pmaxsb),
+ [0x3d] = SSE41_OP(pmaxsd),
+ [0x3e] = SSE41_OP(pmaxuw),
+ [0x3f] = SSE41_OP(pmaxud),
+ [0x40] = SSE41_OP(pmulld),
+ [0x41] = SSE41_OP(phminposuw),
+};
+
+static struct sse_op_helper_s sse_op_table7[256] = {
+ [0x08] = SSE41_OP(roundps),
+ [0x09] = SSE41_OP(roundpd),
+ [0x0a] = SSE41_OP(roundss),
+ [0x0b] = SSE41_OP(roundsd),
+ [0x0c] = SSE41_OP(blendps),
+ [0x0d] = SSE41_OP(blendpd),
+ [0x0e] = SSE41_OP(pblendw),
+ [0x0f] = SSSE3_OP(palignr),
+ [0x14] = SSE41_SPECIAL, /* pextrb */
+ [0x15] = SSE41_SPECIAL, /* pextrw */
+ [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
+ [0x17] = SSE41_SPECIAL, /* extractps */
+ [0x20] = SSE41_SPECIAL, /* pinsrb */
+ [0x21] = SSE41_SPECIAL, /* insertps */
+ [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
+ [0x40] = SSE41_OP(dpps),
+ [0x41] = SSE41_OP(dppd),
+ [0x42] = SSE41_OP(mpsadbw),
+ [0x60] = SSE42_OP(pcmpestrm),
+ [0x61] = SSE42_OP(pcmpestri),
+ [0x62] = SSE42_OP(pcmpistrm),
+ [0x63] = SSE42_OP(pcmpistri),
+};
+
+static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
+{
+ int b1, op1_offset, op2_offset, is_xmm, val, ot;
+ int modrm, mod, rm, reg, reg_addr, offset_addr;
+ void *sse_op2;
+
+ b &= 0xff;
+ if (s->prefix & PREFIX_DATA)
+ b1 = 1;
+ else if (s->prefix & PREFIX_REPZ)
+ b1 = 2;
+ else if (s->prefix & PREFIX_REPNZ)
+ b1 = 3;
+ else
+ b1 = 0;
+ sse_op2 = sse_op_table1[b][b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
+ is_xmm = 1;
+ } else {
+ if (b1 == 0) {
+ /* MMX case */
+ is_xmm = 0;
+ } else {
+ is_xmm = 1;
+ }
+ }
+ /* simple MMX/SSE operation */
+ if (s->flags & HF_TS_MASK) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ return;
+ }
+ if (s->flags & HF_EM_MASK) {
+ illegal_op:
+ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ return;
+ }
+ if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
+ if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
+ goto illegal_op;
+ if (b == 0x0e) {
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
+ goto illegal_op;
+ /* femms */
+ gen_helper_emms();
+ return;
+ }
+ if (b == 0x77) {
+ /* emms */
+ gen_helper_emms();
+ return;
+ }
+ /* prepare MMX state (XXX: optimize by storing fptt and fptags in
+ the static cpu state) */
+ if (!is_xmm) {
+ gen_helper_enter_mmx();
+ }
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7);
+ if (is_xmm)
+ reg |= rex_r;
+ mod = (modrm >> 6) & 3;
+ if (sse_op2 == SSE_SPECIAL) {
+ b |= (b1 << 8);
+ switch(b) {
+ case 0x0e7: /* movntq */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+ break;
+ case 0x1e7: /* movntdq */
+ case 0x02b: /* movntps */
+ case 0x12b: /* movntps */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ break;
+ case 0x3f0: /* lddqu */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ break;
+ case 0x22b: /* movntss */
+ case 0x32b: /* movntsd */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (b1 & 1) {
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,
+ xmm_regs[reg]));
+ } else {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(0)));
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ }
+ break;
+ case 0x6e: /* movd mm, ea */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
+ } else
+#endif
+ {
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
+ }
+ break;
+ case 0x16e: /* movd xmm, ea */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]));
+ gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
+ } else
+#endif
+ {
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]));
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
+ }
+ break;
+ case 0x6f: /* movq mm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+ } else {
+ rm = (modrm & 7);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,fpregs[rm].mmx));
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ }
+ break;
+ case 0x010: /* movups */
+ case 0x110: /* movupd */
+ case 0x028: /* movaps */
+ case 0x128: /* movapd */
+ case 0x16f: /* movdqa xmm, ea */
+ case 0x26f: /* movdqu xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
+ offsetof(CPUX86State,xmm_regs[rm]));
+ }
+ break;
+ case 0x210: /* movss xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+ }
+ break;
+ case 0x310: /* movsd xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ break;
+ case 0x012: /* movlps */
+ case 0x112: /* movlpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ /* movhlps */
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+ }
+ break;
+ case 0x212: /* movsldup */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
+ }
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+ break;
+ case 0x312: /* movddup */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ break;
+ case 0x016: /* movhps */
+ case 0x116: /* movhpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ } else {
+ /* movlhps */
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ break;
+ case 0x216: /* movshdup */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
+ }
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+ break;
+ case 0x178:
+ case 0x378:
+ {
+ int bit_index, field_length;
+
+ if (b1 == 1 && reg != 0)
+ goto illegal_op;
+ field_length = ldub_code(s->pc++) & 0x3F;
+ bit_index = ldub_code(s->pc++) & 0x3F;
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]));
+ if (b1 == 1)
+ gen_helper_extrq_i(cpu_ptr0, tcg_const_i32(bit_index),
+ tcg_const_i32(field_length));
+ else
+ gen_helper_insertq_i(cpu_ptr0, tcg_const_i32(bit_index),
+ tcg_const_i32(field_length));
+ }
+ break;
+ case 0x7e: /* movd ea, mm */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ tcg_gen_ld_i64(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+ } else
+#endif
+ {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+ }
+ break;
+ case 0x17e: /* movd ea, xmm */
+#ifdef TARGET_X86_64
+ if (s->dflag == 2) {
+ tcg_gen_ld_i64(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+ } else
+#endif
+ {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+ }
+ break;
+ case 0x27e: /* movq xmm, ea */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ }
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ break;
+ case 0x7f: /* movq ea, mm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
+ } else {
+ rm = (modrm & 7);
+ gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+ offsetof(CPUX86State,fpregs[reg].mmx));
+ }
+ break;
+ case 0x011: /* movups */
+ case 0x111: /* movupd */
+ case 0x029: /* movaps */
+ case 0x129: /* movapd */
+ case 0x17f: /* movdqa ea, xmm */
+ case 0x27f: /* movdqu ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
+ offsetof(CPUX86State,xmm_regs[reg]));
+ }
+ break;
+ case 0x211: /* movss ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+ }
+ break;
+ case 0x311: /* movsd ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ }
+ break;
+ case 0x013: /* movlps */
+ case 0x113: /* movlpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ goto illegal_op;
+ }
+ break;
+ case 0x017: /* movhps */
+ case 0x117: /* movhpd */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ } else {
+ goto illegal_op;
+ }
+ break;
+ case 0x71: /* shift mm, im */
+ case 0x72:
+ case 0x73:
+ case 0x171: /* shift xmm, im */
+ case 0x172:
+ case 0x173:
+ if (b1 >= 2) {
+ goto illegal_op;
+ }
+ val = ldub_code(s->pc++);
+ if (is_xmm) {
+ gen_op_movl_T0_im(val);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
+ op1_offset = offsetof(CPUX86State,xmm_t0);
+ } else {
+ gen_op_movl_T0_im(val);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
+ gen_op_movl_T0_0();
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+ op1_offset = offsetof(CPUX86State,mmx_t0);
+ }
+ sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (is_xmm) {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ } else {
+ rm = (modrm & 7);
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ }
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x050: /* movmskps */
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm]));
+ gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ break;
+ case 0x150: /* movmskpd */
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm]));
+ gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ break;
+ case 0x02a: /* cvtpi2ps */
+ case 0x12a: /* cvtpi2pd */
+ gen_helper_enter_mmx();
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ } else {
+ rm = (modrm & 7);
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ }
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ switch(b >> 8) {
+ case 0x0:
+ gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1);
+ break;
+ default:
+ case 0x1:
+ gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1);
+ break;
+ }
+ break;
+ case 0x22a: /* cvtsi2ss */
+ case 0x32a: /* cvtsi2sd */
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
+ if (ot == OT_LONG) {
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32);
+ } else {
+ ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]);
+ }
+ break;
+ case 0x02c: /* cvttps2pi */
+ case 0x12c: /* cvttpd2pi */
+ case 0x02d: /* cvtps2pi */
+ case 0x12d: /* cvtpd2pi */
+ gen_helper_enter_mmx();
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ }
+ op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ switch(b) {
+ case 0x02c:
+ gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x12c:
+ gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x02d:
+ gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x12d:
+ gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1);
+ break;
+ }
+ break;
+ case 0x22c: /* cvttss2si */
+ case 0x32c: /* cvttsd2si */
+ case 0x22d: /* cvtss2si */
+ case 0x32d: /* cvtsd2si */
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if ((b >> 8) & 1) {
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+ } else {
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+ }
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ }
+ sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
+ (b & 1) * 4];
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
+ if (ot == OT_LONG) {
+ ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ } else {
+ ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0xc4: /* pinsrw */
+ case 0x1c4:
+ s->rip_offset = 1;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ val = ldub_code(s->pc++);
+ if (b1) {
+ val &= 7;
+ tcg_gen_st16_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
+ } else {
+ val &= 3;
+ tcg_gen_st16_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
+ }
+ break;
+ case 0xc5: /* pextrw */
+ case 0x1c5:
+ if (mod != 3)
+ goto illegal_op;
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ val = ldub_code(s->pc++);
+ if (b1) {
+ val &= 7;
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
+ } else {
+ val &= 3;
+ rm = (modrm & 7);
+ tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
+ }
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0x1d6: /* movq ea, xmm */
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+ }
+ break;
+ case 0x2d6: /* movq2dq */
+ gen_helper_enter_mmx();
+ rm = (modrm & 7);
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+ offsetof(CPUX86State,fpregs[rm].mmx));
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+ break;
+ case 0x3d6: /* movdq2q */
+ gen_helper_enter_mmx();
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ break;
+ case 0xd7: /* pmovmskb */
+ case 0x1d7:
+ if (mod != 3)
+ goto illegal_op;
+ if (b1) {
+ rm = (modrm & 7) | REX_B(s);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
+ gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0);
+ } else {
+ rm = (modrm & 7);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
+ gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0);
+ }
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ break;
+ case 0x138:
+ if (s->prefix & PREFIX_REPNZ)
+ goto crc32;
+ case 0x038:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (b1 >= 2) {
+ goto illegal_op;
+ }
+
+ sse_op2 = sse_op_table6[b].op[b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
+ goto illegal_op;
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ switch (b) {
+ case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
+ case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
+ case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
+ gen_ldq_env_A0(s->mem_index, op2_offset +
+ offsetof(XMMReg, XMM_Q(0)));
+ break;
+ case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
+ case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
+ offsetof(XMMReg, XMM_L(0)));
+ break;
+ case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
+ tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
+ offsetof(XMMReg, XMM_W(0)));
+ break;
+ case 0x2a: /* movntqda */
+ gen_ldo_env_A0(s->mem_index, op1_offset);
+ return;
+ default:
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ if (sse_op2 == SSE_SPECIAL)
+ goto illegal_op;
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+
+ if (b == 0x17)
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x338: /* crc32 */
+ crc32:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ if (b != 0xf0 && b != 0xf1)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
+ goto illegal_op;
+
+ if (b == 0xf0)
+ ot = OT_BYTE;
+ else if (b == 0xf1 && s->dflag != 2)
+ if (s->prefix & PREFIX_DATA)
+ ot = OT_WORD;
+ else
+ ot = OT_LONG;
+ else
+ ot = OT_QUAD;
+
+ gen_op_mov_TN_reg(OT_LONG, 0, reg);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
+ cpu_T[0], tcg_const_i32(8 << ot));
+
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0x03a:
+ case 0x13a:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (b1 >= 2) {
+ goto illegal_op;
+ }
+
+ sse_op2 = sse_op_table7[b].op[b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
+ goto illegal_op;
+
+ if (sse_op2 == SSE_SPECIAL) {
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3)
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ val = ldub_code(s->pc++);
+ switch (b) {
+ case 0x14: /* pextrb */
+ tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_B(val & 15)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x15: /* pextrw */
+ tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_W(val & 7)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x16:
+ if (ot == OT_LONG) { /* pextrd */
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ if (mod == 3)
+ gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+ else
+ tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ } else { /* pextrq */
+#ifdef TARGET_X86_64
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_Q(val & 1)));
+ if (mod == 3)
+ gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
+ else
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+#else
+ goto illegal_op;
+#endif
+ }
+ break;
+ case 0x17: /* extractps */
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x20: /* pinsrb */
+ if (mod == 3)
+ gen_op_mov_TN_reg(OT_LONG, 0, rm);
+ else
+ tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_B(val & 15)));
+ break;
+ case 0x21: /* insertps */
+ if (mod == 3) {
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm]
+ .XMM_L((val >> 6) & 3)));
+ } else {
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ }
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]
+ .XMM_L((val >> 4) & 3)));
+ if ((val >> 0) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(0)));
+ if ((val >> 1) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(1)));
+ if ((val >> 2) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(2)));
+ if ((val >> 3) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(3)));
+ break;
+ case 0x22:
+ if (ot == OT_LONG) { /* pinsrd */
+ if (mod == 3)
+ gen_op_mov_v_reg(ot, cpu_tmp0, rm);
+ else
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ } else { /* pinsrq */
+#ifdef TARGET_X86_64
+ if (mod == 3)
+ gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
+ else
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_Q(val & 1)));
+#else
+ goto illegal_op;
+#endif
+ }
+ break;
+ }
+ return;
+ }
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ val = ldub_code(s->pc++);
+
+ if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
+ s->cc_op = CC_OP_EFLAGS;
+
+ if (s->dflag == 2)
+ /* The helper must use entire 64-bit gp registers */
+ val |= 1 << 8;
+ }
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else {
+ /* generic MMX or SSE operation */
+ switch(b) {
+ case 0x70: /* pshufx insn */
+ case 0xc6: /* pshufx insn */
+ case 0xc2: /* compare insns */
+ s->rip_offset = 1;
+ break;
+ default:
+ break;
+ }
+ if (is_xmm) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
+ b == 0xc2)) {
+ /* specific case for SSE single instructions */
+ if (b1 == 2) {
+ /* 32 bit access */
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+ } else {
+ /* 64 bit access */
+ gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0)));
+ }
+ } else {
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ } else {
+ rm = (modrm & 7);
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ }
+ }
+ switch(b) {
+ case 0x0f: /* 3DNow! data insns */
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
+ goto illegal_op;
+ val = ldub_code(s->pc++);
+ sse_op2 = sse_op_table5[val];
+ if (!sse_op2)
+ goto illegal_op;
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x70: /* pshufx insn */
+ case 0xc6: /* pshufx insn */
+ val = ldub_code(s->pc++);
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+ break;
+ case 0xc2:
+ /* compare insns */
+ val = ldub_code(s->pc++);
+ if (val >= 8)
+ goto illegal_op;
+ sse_op2 = sse_op_table4[val][b1];
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ case 0xf7:
+ /* maskmov : we must prepare A0 */
+ if (mod != 3)
+ goto illegal_op;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EDI);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_EDI);
+ if (s->aflag == 0)
+ gen_op_andl_A0_ffff();
+ }
+ gen_add_A0_ds_seg(s);
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0);
+ break;
+ default:
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+ break;
+ }
+ if (b == 0x2e || b == 0x2f) {
+ s->cc_op = CC_OP_EFLAGS;
+ }
+ }
+}
+
+#ifdef VBOX
+/* Checks if it's an invalid lock sequence. Only a few instructions
+ can be used together with the lock prefix and of those only the
+ form that write a memory operand. So, this is kind of annoying
+ work to do...
+ The AMD manual lists the following instructions.
+ ADC
+ ADD
+ AND
+ BTC
+ BTR
+ BTS
+ CMPXCHG
+ CMPXCHG8B
+ CMPXCHG16B
+ DEC
+ INC
+ NEG
+ NOT
+ OR
+ SBB
+ SUB
+ XADD
+ XCHG
+ XOR */
+static bool is_invalid_lock_sequence(DisasContext *s, target_ulong pc_start, int b)
+{
+ target_ulong pc = s->pc;
+ int modrm, mod, op;
+
+ /* X={8,16,32,64} Y={16,32,64} */
+ switch (b)
+ {
+ /* /2: ADC reg/memX, immX */
+ /* /0: ADD reg/memX, immX */
+ /* /4: AND reg/memX, immX */
+ /* /1: OR reg/memX, immX */
+ /* /3: SBB reg/memX, immX */
+ /* /5: SUB reg/memX, immX */
+ /* /6: XOR reg/memX, immX */
+ case 0x80:
+ case 0x81:
+ case 0x83:
+ modrm = ldub_code(pc++);
+ op = (modrm >> 3) & 7;
+ if (op == 7) /* /7: CMP */
+ break;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ case 0x10: /* /r: ADC reg/mem8, reg8 */
+ case 0x11: /* /r: ADC reg/memX, regY */
+ case 0x00: /* /r: ADD reg/mem8, reg8 */
+ case 0x01: /* /r: ADD reg/memX, regY */
+ case 0x20: /* /r: AND reg/mem8, reg8 */
+ case 0x21: /* /r: AND reg/memY, regY */
+ case 0x08: /* /r: OR reg/mem8, reg8 */
+ case 0x09: /* /r: OR reg/memY, regY */
+ case 0x18: /* /r: SBB reg/mem8, reg8 */
+ case 0x19: /* /r: SBB reg/memY, regY */
+ case 0x28: /* /r: SUB reg/mem8, reg8 */
+ case 0x29: /* /r: SUB reg/memY, regY */
+ case 0x86: /* /r: XCHG reg/mem8, reg8 or XCHG reg8, reg/mem8 */
+ case 0x87: /* /r: XCHG reg/memY, regY or XCHG regY, reg/memY */
+ case 0x30: /* /r: XOR reg/mem8, reg8 */
+ case 0x31: /* /r: XOR reg/memY, regY */
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ /* /1: DEC reg/memX */
+ /* /0: INC reg/memX */
+ case 0xfe:
+ case 0xff:
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ /* /3: NEG reg/memX */
+ /* /2: NOT reg/memX */
+ case 0xf6:
+ case 0xf7:
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ case 0x0f:
+ b = ldub_code(pc++);
+ switch (b)
+ {
+ /* /7: BTC reg/memY, imm8 */
+ /* /6: BTR reg/memY, imm8 */
+ /* /5: BTS reg/memY, imm8 */
+ case 0xba:
+ modrm = ldub_code(pc++);
+ op = (modrm >> 3) & 7;
+ if (op < 5)
+ break;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ case 0xbb: /* /r: BTC reg/memY, regY */
+ case 0xb3: /* /r: BTR reg/memY, regY */
+ case 0xab: /* /r: BTS reg/memY, regY */
+ case 0xb0: /* /r: CMPXCHG reg/mem8, reg8 */
+ case 0xb1: /* /r: CMPXCHG reg/memY, regY */
+ case 0xc0: /* /r: XADD reg/mem8, reg8 */
+ case 0xc1: /* /r: XADD reg/memY, regY */
+ modrm = ldub_code(pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) /* register destination */
+ break;
+ return false;
+
+ /* /1: CMPXCHG8B mem64 or CMPXCHG16B mem128 */
+ case 0xc7:
+ modrm = ldub_code(pc++);
+ op = (modrm >> 3) & 7;
+ if (op != 1)
+ break;
+ return false;
+ }
+ break;
+ }
+
+ /* illegal sequence. The s->pc is past the lock prefix and that
+ is sufficient for the TB, I think. */
+ Log(("illegal lock sequence %RGv (b=%#x)\n", pc_start, b));
+ return true;
+}
+#endif /* VBOX */
+
+/* convert one instruction. s->is_jmp is set if the translation must
+ be stopped. Return the next pc value */
+static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
+{
+ int b, prefixes, aflag, dflag;
+ int shift, ot;
+ int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
+ target_ulong next_eip, tval;
+ int rex_w, rex_r;
+
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+ tcg_gen_debug_insn_start(pc_start);
+ s->pc = pc_start;
+ prefixes = 0;
+ aflag = s->code32;
+ dflag = s->code32;
+ s->override = -1;
+ rex_w = -1;
+ rex_r = 0;
+#ifdef TARGET_X86_64
+ s->rex_x = 0;
+ s->rex_b = 0;
+ x86_64_hregs = 0;
+#endif
+ s->rip_offset = 0; /* for relative ip address */
+#ifdef VBOX
+ /* nike: seems only slow down things */
+# if 0
+ /* Always update EIP. Otherwise one must be very careful with generated code that can raise exceptions. */
+
+ gen_update_eip(pc_start - s->cs_base);
+# endif
+#endif /* VBOX */
+
+ next_byte:
+ b = ldub_code(s->pc);
+ s->pc++;
+ /* check prefixes */
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ switch (b) {
+ case 0xf3:
+ prefixes |= PREFIX_REPZ;
+ goto next_byte;
+ case 0xf2:
+ prefixes |= PREFIX_REPNZ;
+ goto next_byte;
+ case 0xf0:
+ prefixes |= PREFIX_LOCK;
+ goto next_byte;
+ case 0x2e:
+ s->override = R_CS;
+ goto next_byte;
+ case 0x36:
+ s->override = R_SS;
+ goto next_byte;
+ case 0x3e:
+ s->override = R_DS;
+ goto next_byte;
+ case 0x26:
+ s->override = R_ES;
+ goto next_byte;
+ case 0x64:
+ s->override = R_FS;
+ goto next_byte;
+ case 0x65:
+ s->override = R_GS;
+ goto next_byte;
+ case 0x66:
+ prefixes |= PREFIX_DATA;
+ goto next_byte;
+ case 0x67:
+ prefixes |= PREFIX_ADR;
+ goto next_byte;
+ case 0x40 ... 0x4f:
+ /* REX prefix */
+ rex_w = (b >> 3) & 1;
+ rex_r = (b & 0x4) << 1;
+ s->rex_x = (b & 0x2) << 2;
+ REX_B(s) = (b & 0x1) << 3;
+ x86_64_hregs = 1; /* select uniform byte register addressing */
+ goto next_byte;
+ }
+ if (rex_w == 1) {
+ /* 0x66 is ignored if rex.w is set */
+ dflag = 2;
+ } else {
+ if (prefixes & PREFIX_DATA)
+ dflag ^= 1;
+ }
+ if (!(prefixes & PREFIX_ADR))
+ aflag = 2;
+ } else
+#endif
+ {
+ switch (b) {
+ case 0xf3:
+ prefixes |= PREFIX_REPZ;
+ goto next_byte;
+ case 0xf2:
+ prefixes |= PREFIX_REPNZ;
+ goto next_byte;
+ case 0xf0:
+ prefixes |= PREFIX_LOCK;
+ goto next_byte;
+ case 0x2e:
+ s->override = R_CS;
+ goto next_byte;
+ case 0x36:
+ s->override = R_SS;
+ goto next_byte;
+ case 0x3e:
+ s->override = R_DS;
+ goto next_byte;
+ case 0x26:
+ s->override = R_ES;
+ goto next_byte;
+ case 0x64:
+ s->override = R_FS;
+ goto next_byte;
+ case 0x65:
+ s->override = R_GS;
+ goto next_byte;
+ case 0x66:
+ prefixes |= PREFIX_DATA;
+ goto next_byte;
+ case 0x67:
+ prefixes |= PREFIX_ADR;
+ goto next_byte;
+ }
+ if (prefixes & PREFIX_DATA)
+ dflag ^= 1;
+ if (prefixes & PREFIX_ADR)
+ aflag ^= 1;
+ }
+
+ s->prefix = prefixes;
+ s->aflag = aflag;
+ s->dflag = dflag;
+
+ /* lock generation */
+#ifndef VBOX
+ if (prefixes & PREFIX_LOCK)
+ gen_helper_lock();
+#else /* VBOX */
+ if (prefixes & PREFIX_LOCK) {
+ if (is_invalid_lock_sequence(s, pc_start, b)) {
+ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ return s->pc;
+ }
+ gen_helper_lock();
+ }
+#endif /* VBOX */
+
+ /* now check op code */
+ reswitch:
+ switch(b) {
+ case 0x0f:
+ /**************************/
+ /* extended op code */
+ b = ldub_code(s->pc++) | 0x100;
+ goto reswitch;
+
+ /**************************/
+ /* arith & logic */
+ case 0x00 ... 0x05:
+ case 0x08 ... 0x0d:
+ case 0x10 ... 0x15:
+ case 0x18 ... 0x1d:
+ case 0x20 ... 0x25:
+ case 0x28 ... 0x2d:
+ case 0x30 ... 0x35:
+ case 0x38 ... 0x3d:
+ {
+ int op, f, val;
+ op = (b >> 3) & 7;
+ f = (b >> 1) & 3;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ switch(f) {
+ case 0: /* OP Ev, Gv */
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else if (op == OP_XORL && rm == reg) {
+ xor_zero:
+ /* xor reg, reg optimisation */
+ gen_op_movl_T0_0();
+ s->cc_op = CC_OP_LOGICB + ot;
+ gen_op_mov_reg_T0(ot, reg);
+ gen_op_update1_cc();
+ break;
+ } else {
+ opreg = rm;
+ }
+ gen_op_mov_TN_reg(ot, 1, reg);
+ gen_op(s, op, ot, opreg);
+ break;
+ case 1: /* OP Gv, Ev */
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ } else if (op == OP_XORL && rm == reg) {
+ goto xor_zero;
+ } else {
+ gen_op_mov_TN_reg(ot, 1, rm);
+ }
+ gen_op(s, op, ot, reg);
+ break;
+ case 2: /* OP A, Iv */
+ val = insn_get(s, ot);
+ gen_op_movl_T1_im(val);
+ gen_op(s, op, ot, OR_EAX);
+ break;
+ }
+ }
+ break;
+
+ case 0x82:
+ if (CODE64(s))
+ goto illegal_op;
+ case 0x80: /* GRP1 */
+ case 0x81:
+ case 0x83:
+ {
+ int val;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ op = (modrm >> 3) & 7;
+
+ if (mod != 3) {
+ if (b == 0x83)
+ s->rip_offset = 1;
+ else
+ s->rip_offset = insn_const_size(ot);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else {
+ opreg = rm;
+ }
+
+ switch(b) {
+ default:
+ case 0x80:
+ case 0x81:
+ case 0x82:
+ val = insn_get(s, ot);
+ break;
+ case 0x83:
+ val = (int8_t)insn_get(s, OT_BYTE);
+ break;
+ }
+ gen_op_movl_T1_im(val);
+ gen_op(s, op, ot, opreg);
+ }
+ break;
+
+ /**************************/
+ /* inc, dec, and other misc arith */
+ case 0x40 ... 0x47: /* inc Gv */
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_inc(s, ot, OR_EAX + (b & 7), 1);
+ break;
+ case 0x48 ... 0x4f: /* dec Gv */
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_inc(s, ot, OR_EAX + (b & 7), -1);
+ break;
+ case 0xf6: /* GRP3 */
+ case 0xf7:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ op = (modrm >> 3) & 7;
+ if (mod != 3) {
+ if (op == 0)
+ s->rip_offset = insn_const_size(ot);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+
+ switch(op) {
+ case 0: /* test */
+ val = insn_get(s, ot);
+ gen_op_movl_T1_im(val);
+ gen_op_testl_T0_T1_cc();
+ s->cc_op = CC_OP_LOGICB + ot;
+ break;
+ case 2: /* not */
+ tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+ if (mod != 3) {
+ gen_op_st_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ break;
+ case 3: /* neg */
+ tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
+ if (mod != 3) {
+ gen_op_st_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ gen_op_update_neg_cc();
+ s->cc_op = CC_OP_SUBB + ot;
+ break;
+ case 4: /* mul */
+ switch(ot) {
+ case OT_BYTE:
+ gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+ tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
+ s->cc_op = CC_OP_MULB;
+ break;
+ case OT_WORD:
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+ tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+ gen_op_mov_reg_T0(OT_WORD, R_EDX);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ s->cc_op = CC_OP_MULW;
+ break;
+ default:
+ case OT_LONG:
+#ifdef TARGET_X86_64
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+#else
+ {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_extu_i32_i64(t0, cpu_T[0]);
+ tcg_gen_extu_i32_i64(t1, cpu_T[1]);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ }
+#endif
+ s->cc_op = CC_OP_MULL;
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_helper_mulq_EAX_T0(cpu_T[0]);
+ s->cc_op = CC_OP_MULQ;
+ break;
+#endif
+ }
+ break;
+ case 5: /* imul */
+ switch(ot) {
+ case OT_BYTE:
+ gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+ tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ s->cc_op = CC_OP_MULB;
+ break;
+ case OT_WORD:
+ gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+ gen_op_mov_reg_T0(OT_WORD, R_EDX);
+ s->cc_op = CC_OP_MULW;
+ break;
+ default:
+ case OT_LONG:
+#ifdef TARGET_X86_64
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+#else
+ {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+ tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+ tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ }
+#endif
+ s->cc_op = CC_OP_MULL;
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_helper_imulq_EAX_T0(cpu_T[0]);
+ s->cc_op = CC_OP_MULQ;
+ break;
+#endif
+ }
+ break;
+ case 6: /* div */
+ switch(ot) {
+ case OT_BYTE:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divb_AL(cpu_T[0]);
+ break;
+ case OT_WORD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divw_AX(cpu_T[0]);
+ break;
+ default:
+ case OT_LONG:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divl_EAX(cpu_T[0]);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_divq_EAX(cpu_T[0]);
+ break;
+#endif
+ }
+ break;
+ case 7: /* idiv */
+ switch(ot) {
+ case OT_BYTE:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivb_AL(cpu_T[0]);
+ break;
+ case OT_WORD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivw_AX(cpu_T[0]);
+ break;
+ default:
+ case OT_LONG:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivl_EAX(cpu_T[0]);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_idivq_EAX(cpu_T[0]);
+ break;
+#endif
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+
+ case 0xfe: /* GRP4 */
+ case 0xff: /* GRP5 */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ op = (modrm >> 3) & 7;
+ if (op >= 2 && b == 0xfe) {
+ goto illegal_op;
+ }
+ if (CODE64(s)) {
+ if (op == 2 || op == 4) {
+ /* operand size for jumps is 64 bit */
+ ot = OT_QUAD;
+ } else if (op == 3 || op == 5) {
+ ot = dflag ? OT_LONG + (rex_w == 1) : OT_WORD;
+ } else if (op == 6) {
+ /* default push size is 64 bit */
+ ot = dflag ? OT_QUAD : OT_WORD;
+ }
+ }
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (op >= 2 && op != 3 && op != 5)
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+
+ switch(op) {
+ case 0: /* inc Ev */
+ if (mod != 3)
+ opreg = OR_TMP0;
+ else
+ opreg = rm;
+ gen_inc(s, ot, opreg, 1);
+ break;
+ case 1: /* dec Ev */
+ if (mod != 3)
+ opreg = OR_TMP0;
+ else
+ opreg = rm;
+ gen_inc(s, ot, opreg, -1);
+ break;
+ case 2: /* call Ev */
+ /* XXX: optimize if memory (no 'and' is necessary) */
+#ifdef VBOX_WITH_CALL_RECORD
+ if (s->record_call)
+ gen_op_record_call();
+#endif
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ next_eip = s->pc - s->cs_base;
+ gen_movtl_T1_im(next_eip);
+ gen_push_T1(s);
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 3: /* lcall Ev */
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+ gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+ do_lcall:
+ if (s->pe && !s->vm86) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
+ tcg_const_i32(dflag),
+ tcg_const_i32(s->pc - pc_start));
+ } else {
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
+ tcg_const_i32(dflag),
+ tcg_const_i32(s->pc - s->cs_base));
+ }
+ gen_eob(s);
+ break;
+ case 4: /* jmp Ev */
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 5: /* ljmp Ev */
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+ gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+ do_ljmp:
+ if (s->pe && !s->vm86) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1],
+ tcg_const_i32(s->pc - pc_start));
+ } else {
+ gen_op_movl_seg_T0_vm(R_CS);
+ gen_op_movl_T0_T1();
+ gen_op_jmp_T0();
+ }
+ gen_eob(s);
+ break;
+ case 6: /* push Ev */
+ gen_push_T0(s);
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+
+ case 0x84: /* test Ev, Gv */
+ case 0x85:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_op_mov_TN_reg(ot, 1, reg);
+ gen_op_testl_T0_T1_cc();
+ s->cc_op = CC_OP_LOGICB + ot;
+ break;
+
+ case 0xa8: /* test eAX, Iv */
+ case 0xa9:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ val = insn_get(s, ot);
+
+ gen_op_mov_TN_reg(ot, 0, OR_EAX);
+ gen_op_movl_T1_im(val);
+ gen_op_testl_T0_T1_cc();
+ s->cc_op = CC_OP_LOGICB + ot;
+ break;
+
+ case 0x98: /* CWDE/CBW */
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_QUAD, R_EAX);
+ } else
+#endif
+ if (dflag == 1) {
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_LONG, R_EAX);
+ } else {
+ gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX);
+ tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ }
+ break;
+ case 0x99: /* CDQ/CWD */
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
+ gen_op_mov_reg_T0(OT_QUAD, R_EDX);
+ } else
+#endif
+ if (dflag == 1) {
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
+ gen_op_mov_reg_T0(OT_LONG, R_EDX);
+ } else {
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EAX);
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
+ gen_op_mov_reg_T0(OT_WORD, R_EDX);
+ }
+ break;
+ case 0x1af: /* imul Gv, Ev */
+ case 0x69: /* imul Gv, Ev, I */
+ case 0x6b:
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (b == 0x69)
+ s->rip_offset = insn_const_size(ot);
+ else if (b == 0x6b)
+ s->rip_offset = 1;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ if (b == 0x69) {
+ val = insn_get(s, ot);
+ gen_op_movl_T1_im(val);
+ } else if (b == 0x6b) {
+ val = (int8_t)insn_get(s, OT_BYTE);
+ gen_op_movl_T1_im(val);
+ } else {
+ gen_op_mov_TN_reg(ot, 1, reg);
+ }
+
+#ifdef TARGET_X86_64
+ if (ot == OT_QUAD) {
+ gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else
+#endif
+ if (ot == OT_LONG) {
+#ifdef TARGET_X86_64
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+#else
+ {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+ tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_trunc_i64_i32(cpu_T[1], t0);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
+ }
+#endif
+ } else {
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+ /* XXX: use 32 bit mul which could be faster */
+ tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ s->cc_op = CC_OP_MULB + ot;
+ break;
+ case 0x1c0:
+ case 0x1c1: /* xadd Ev, Gv */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_mov_TN_reg(ot, 1, rm);
+ gen_op_addl_T0_T1();
+ gen_op_mov_reg_T1(ot, reg);
+ gen_op_mov_reg_T0(ot, rm);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_addl_T0_T1();
+ gen_op_st_T0_A0(ot + s->mem_index);
+ gen_op_mov_reg_T1(ot, reg);
+ }
+ gen_op_update2_cc();
+ s->cc_op = CC_OP_ADDB + ot;
+ break;
+ case 0x1b0:
+ case 0x1b1: /* cmpxchg Ev, Gv */
+ {
+ int label1, label2;
+ TCGv t0, t1, t2, a0;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ a0 = tcg_temp_local_new();
+ gen_op_mov_v_reg(ot, t1, reg);
+ if (mod == 3) {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_v_reg(ot, t0, rm);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_mov_tl(a0, cpu_A0);
+ gen_op_ld_v(ot + s->mem_index, t0, a0);
+ rm = 0; /* avoid warning */
+ }
+ label1 = gen_new_label();
+ tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0);
+ gen_extu(ot, t2);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+ if (mod == 3) {
+ label2 = gen_new_label();
+ gen_op_mov_reg_v(ot, R_EAX, t0);
+ tcg_gen_br(label2);
+ gen_set_label(label1);
+ gen_op_mov_reg_v(ot, rm, t1);
+ gen_set_label(label2);
+ } else {
+ tcg_gen_mov_tl(t1, t0);
+ gen_op_mov_reg_v(ot, R_EAX, t0);
+ gen_set_label(label1);
+ /* always store */
+ gen_op_st_v(ot + s->mem_index, t1, a0);
+ }
+ tcg_gen_mov_tl(cpu_cc_src, t0);
+ tcg_gen_mov_tl(cpu_cc_dst, t2);
+ s->cc_op = CC_OP_SUBB + ot;
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ tcg_temp_free(a0);
+ }
+ break;
+ case 0x1c7: /* cmpxchg8b */
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if ((mod == 3) || ((modrm & 0x38) != 0x8))
+ goto illegal_op;
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
+ goto illegal_op;
+ gen_jmp_im(pc_start - s->cs_base);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_helper_cmpxchg16b(cpu_A0);
+ } else
+#endif
+ {
+ if (!(s->cpuid_features & CPUID_CX8))
+ goto illegal_op;
+ gen_jmp_im(pc_start - s->cs_base);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_helper_cmpxchg8b(cpu_A0);
+ }
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+
+ /**************************/
+ /* push/pop */
+ case 0x50 ... 0x57: /* push */
+ gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s));
+ gen_push_T0(s);
+ break;
+ case 0x58 ... 0x5f: /* pop */
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ gen_pop_T0(s);
+ /* NOTE: order is important for pop %sp */
+ gen_pop_update(s);
+ gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s));
+ break;
+ case 0x60: /* pusha */
+ if (CODE64(s))
+ goto illegal_op;
+ gen_pusha(s);
+ break;
+ case 0x61: /* popa */
+ if (CODE64(s))
+ goto illegal_op;
+ gen_popa(s);
+ break;
+ case 0x68: /* push Iv */
+ case 0x6a:
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ if (b == 0x68)
+ val = insn_get(s, ot);
+ else
+ val = (int8_t)insn_get(s, OT_BYTE);
+ gen_op_movl_T0_im(val);
+ gen_push_T0(s);
+ break;
+ case 0x8f: /* pop Ev */
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ gen_pop_T0(s);
+ if (mod == 3) {
+ /* NOTE: order is important for pop %sp */
+ gen_pop_update(s);
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_reg_T0(ot, rm);
+ } else {
+ /* NOTE: order is important too for MMU exceptions */
+ s->popl_esp_hack = 1 << ot;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ s->popl_esp_hack = 0;
+ gen_pop_update(s);
+ }
+ break;
+ case 0xc8: /* enter */
+ {
+ int level;
+ val = lduw_code(s->pc);
+ s->pc += 2;
+ level = ldub_code(s->pc++);
+ gen_enter(s, val, level);
+ }
+ break;
+ case 0xc9: /* leave */
+ /* XXX: exception not precise (ESP is updated before potential exception) */
+ if (CODE64(s)) {
+ gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP);
+ gen_op_mov_reg_T0(OT_QUAD, R_ESP);
+ } else if (s->ss32) {
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
+ gen_op_mov_reg_T0(OT_LONG, R_ESP);
+ } else {
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EBP);
+ gen_op_mov_reg_T0(OT_WORD, R_ESP);
+ }
+ gen_pop_T0(s);
+ if (CODE64(s)) {
+ ot = dflag ? OT_QUAD : OT_WORD;
+ } else {
+ ot = dflag + OT_WORD;
+ }
+ gen_op_mov_reg_T0(ot, R_EBP);
+ gen_pop_update(s);
+ break;
+ case 0x06: /* push es */
+ case 0x0e: /* push cs */
+ case 0x16: /* push ss */
+ case 0x1e: /* push ds */
+ if (CODE64(s))
+ goto illegal_op;
+ gen_op_movl_T0_seg(b >> 3);
+ gen_push_T0(s);
+ break;
+ case 0x1a0: /* push fs */
+ case 0x1a8: /* push gs */
+ gen_op_movl_T0_seg((b >> 3) & 7);
+ gen_push_T0(s);
+ break;
+ case 0x07: /* pop es */
+ case 0x17: /* pop ss */
+ case 0x1f: /* pop ds */
+ if (CODE64(s))
+ goto illegal_op;
+ reg = b >> 3;
+ gen_pop_T0(s);
+ gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+ gen_pop_update(s);
+ if (reg == R_SS) {
+ /* if reg == SS, inhibit interrupts/trace. */
+ /* If several instructions disable interrupts, only the
+ _first_ does it */
+ if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+ gen_helper_set_inhibit_irq();
+ s->tf = 0;
+ }
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 0x1a1: /* pop fs */
+ case 0x1a9: /* pop gs */
+ gen_pop_T0(s);
+ gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
+ gen_pop_update(s);
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+
+ /**************************/
+ /* mov */
+ case 0x88:
+ case 0x89: /* mov Gv, Ev */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ /* generate a generic store */
+ gen_ldst_modrm(s, modrm, ot, reg, 1);
+ break;
+ case 0xc6:
+ case 0xc7: /* mov Ev, Iv */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod != 3) {
+ s->rip_offset = insn_const_size(ot);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ }
+ val = insn_get(s, ot);
+ gen_op_movl_T0_im(val);
+ if (mod != 3)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s));
+ break;
+ case 0x8a:
+ case 0x8b: /* mov Ev, Gv */
+#ifdef VBOX /* dtrace hot fix */
+ if (prefixes & PREFIX_LOCK)
+ goto illegal_op;
+#endif
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = OT_WORD + dflag;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0x8e: /* mov seg, Gv */
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ if (reg >= 6 || reg == R_CS)
+ goto illegal_op;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+ if (reg == R_SS) {
+ /* if reg == SS, inhibit interrupts/trace */
+ /* If several instructions disable interrupts, only the
+ _first_ does it */
+ if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+ gen_helper_set_inhibit_irq();
+ s->tf = 0;
+ }
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 0x8c: /* mov Gv, seg */
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ if (reg >= 6)
+ goto illegal_op;
+ gen_op_movl_T0_seg(reg);
+ if (mod == 3)
+ ot = OT_WORD + dflag;
+ else
+ ot = OT_WORD;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ break;
+
+ case 0x1b6: /* movzbS Gv, Eb */
+ case 0x1b7: /* movzwS Gv, Eb */
+ case 0x1be: /* movsbS Gv, Eb */
+ case 0x1bf: /* movswS Gv, Eb */
+ {
+ int d_ot;
+ /* d_ot is the size of destination */
+ d_ot = dflag + OT_WORD;
+ /* ot is the size of source */
+ ot = (b & 1) + OT_BYTE;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+
+ if (mod == 3) {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ switch(ot | (b & 8)) {
+ case OT_BYTE:
+ tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+ break;
+ case OT_BYTE | 8:
+ tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+ break;
+ case OT_WORD:
+ tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+ break;
+ default:
+ case OT_WORD | 8:
+ tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+ break;
+ }
+ gen_op_mov_reg_T0(d_ot, reg);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (b & 8) {
+ gen_op_lds_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_ldu_T0_A0(ot + s->mem_index);
+ }
+ gen_op_mov_reg_T0(d_ot, reg);
+ }
+ }
+ break;
+
+ case 0x8d: /* lea */
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ /* we must ensure that no segment is added */
+ s->override = -1;
+ val = s->addseg;
+ s->addseg = 0;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ s->addseg = val;
+ gen_op_mov_reg_A0(ot - OT_WORD, reg);
+ break;
+
+ case 0xa0: /* mov EAX, Ov */
+ case 0xa1:
+ case 0xa2: /* mov Ov, EAX */
+ case 0xa3:
+ {
+ target_ulong offset_addr;
+
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ offset_addr = ldq_code(s->pc);
+ s->pc += 8;
+ gen_op_movq_A0_im(offset_addr);
+ } else
+#endif
+ {
+ if (s->aflag) {
+ offset_addr = insn_get(s, OT_LONG);
+ } else {
+ offset_addr = insn_get(s, OT_WORD);
+ }
+ gen_op_movl_A0_im(offset_addr);
+ }
+ gen_add_A0_ds_seg(s);
+ if ((b & 2) == 0) {
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ gen_op_mov_reg_T0(ot, R_EAX);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, R_EAX);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ }
+ }
+ break;
+ case 0xd7: /* xlat */
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EBX);
+ gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_EBX);
+ gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+ if (s->aflag == 0)
+ gen_op_andl_A0_ffff();
+ else
+ tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+ }
+ gen_add_A0_ds_seg(s);
+ gen_op_ldu_T0_A0(OT_BYTE + s->mem_index);
+ gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+ break;
+ case 0xb0 ... 0xb7: /* mov R, Ib */
+ val = insn_get(s, OT_BYTE);
+ gen_op_movl_T0_im(val);
+ gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
+ break;
+ case 0xb8 ... 0xbf: /* mov R, Iv */
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ uint64_t tmp;
+ /* 64 bit case */
+ tmp = ldq_code(s->pc);
+ s->pc += 8;
+ reg = (b & 7) | REX_B(s);
+ gen_movtl_T0_im(tmp);
+ gen_op_mov_reg_T0(OT_QUAD, reg);
+ } else
+#endif
+ {
+ ot = dflag ? OT_LONG : OT_WORD;
+ val = insn_get(s, ot);
+ reg = (b & 7) | REX_B(s);
+ gen_op_movl_T0_im(val);
+ gen_op_mov_reg_T0(ot, reg);
+ }
+ break;
+
+ case 0x91 ... 0x97: /* xchg R, EAX */
+ do_xchg_reg_eax:
+ ot = dflag + OT_WORD;
+ reg = (b & 7) | REX_B(s);
+ rm = R_EAX;
+ goto do_xchg_reg;
+ case 0x86:
+ case 0x87: /* xchg Ev, Gv */
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3) {
+ rm = (modrm & 7) | REX_B(s);
+ do_xchg_reg:
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_op_mov_TN_reg(ot, 1, rm);
+ gen_op_mov_reg_T0(ot, rm);
+ gen_op_mov_reg_T1(ot, reg);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_mov_TN_reg(ot, 0, reg);
+ /* for xchg, lock is implicit */
+ if (!(prefixes & PREFIX_LOCK))
+ gen_helper_lock();
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_op_st_T0_A0(ot + s->mem_index);
+ if (!(prefixes & PREFIX_LOCK))
+ gen_helper_unlock();
+ gen_op_mov_reg_T1(ot, reg);
+ }
+ break;
+ case 0xc4: /* les Gv */
+ if (CODE64(s))
+ goto illegal_op;
+ op = R_ES;
+ goto do_lxx;
+ case 0xc5: /* lds Gv */
+ if (CODE64(s))
+ goto illegal_op;
+ op = R_DS;
+ goto do_lxx;
+ case 0x1b2: /* lss Gv */
+ op = R_SS;
+ goto do_lxx;
+ case 0x1b4: /* lfs Gv */
+ op = R_FS;
+ goto do_lxx;
+ case 0x1b5: /* lgs Gv */
+ op = R_GS;
+ do_lxx:
+ ot = dflag ? OT_LONG : OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T1_A0(ot + s->mem_index);
+ gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
+ /* load the segment first to handle exceptions properly */
+ gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
+ gen_movl_seg_T0(s, op, pc_start - s->cs_base);
+ /* then put the data */
+ gen_op_mov_reg_T1(ot, reg);
+ if (s->is_jmp) {
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+
+ /************************/
+ /* shifts */
+ case 0xc0:
+ case 0xc1:
+ /* shift Ev,Ib */
+ shift = 2;
+ grp2:
+ {
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+
+ if (mod != 3) {
+ if (shift == 2) {
+ s->rip_offset = 1;
+ }
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else {
+ opreg = (modrm & 7) | REX_B(s);
+ }
+
+ /* simpler op */
+ if (shift == 0) {
+ gen_shift(s, op, ot, opreg, OR_ECX);
+ } else {
+ if (shift == 2) {
+ shift = ldub_code(s->pc++);
+ }
+ gen_shifti(s, op, ot, opreg, shift);
+ }
+ }
+ break;
+ case 0xd0:
+ case 0xd1:
+ /* shift Ev,1 */
+ shift = 1;
+ goto grp2;
+ case 0xd2:
+ case 0xd3:
+ /* shift Ev,cl */
+ shift = 0;
+ goto grp2;
+
+ case 0x1a4: /* shld imm */
+ op = 0;
+ shift = 1;
+ goto do_shiftd;
+ case 0x1a5: /* shld cl */
+ op = 0;
+ shift = 0;
+ goto do_shiftd;
+ case 0x1ac: /* shrd imm */
+ op = 1;
+ shift = 1;
+ goto do_shiftd;
+ case 0x1ad: /* shrd cl */
+ op = 1;
+ shift = 0;
+ do_shiftd:
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ opreg = OR_TMP0;
+ } else {
+ opreg = rm;
+ }
+ gen_op_mov_TN_reg(ot, 1, reg);
+
+ if (shift) {
+ val = ldub_code(s->pc++);
+ tcg_gen_movi_tl(cpu_T3, val);
+ } else {
+ tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
+ }
+ gen_shiftd_rm_T1_T3(s, ot, opreg, op);
+ break;
+
+ /************************/
+ /* floats */
+ case 0xd8 ... 0xdf:
+ if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
+ /* if CR0.EM or CR0.TS are set, generate an FPU exception */
+ /* XXX: what to do if illegal op ? */
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ rm = modrm & 7;
+ op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+ if (mod != 3) {
+ /* memory op */
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ switch(op) {
+ case 0x00 ... 0x07: /* fxxxs */
+ case 0x10 ... 0x17: /* fixxxl */
+ case 0x20 ... 0x27: /* fxxxl */
+ case 0x30 ... 0x37: /* fixxx */
+ {
+ int op1;
+ op1 = op & 7;
+
+ switch(op >> 4) {
+ case 0:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_flds_FT0(cpu_tmp2_i32);
+ break;
+ case 1:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_FT0(cpu_tmp2_i32);
+ break;
+ case 2:
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fldl_FT0(cpu_tmp1_i64);
+ break;
+ case 3:
+ default:
+ gen_op_lds_T0_A0(OT_WORD + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_FT0(cpu_tmp2_i32);
+ break;
+ }
+
+ gen_helper_fp_arith_ST0_FT0(op1);
+ if (op1 == 3) {
+ /* fcomp needs pop */
+ gen_helper_fpop();
+ }
+ }
+ break;
+ case 0x08: /* flds */
+ case 0x0a: /* fsts */
+ case 0x0b: /* fstps */
+ case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+ case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+ case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
+ switch(op & 7) {
+ case 0:
+ switch(op >> 4) {
+ case 0:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_flds_ST0(cpu_tmp2_i32);
+ break;
+ case 1:
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_ST0(cpu_tmp2_i32);
+ break;
+ case 2:
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fldl_ST0(cpu_tmp1_i64);
+ break;
+ case 3:
+ default:
+ gen_op_lds_T0_A0(OT_WORD + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fildl_ST0(cpu_tmp2_i32);
+ break;
+ }
+ break;
+ case 1:
+ /* XXX: the corresponding CPUID bit must be tested ! */
+ switch(op >> 4) {
+ case 1:
+ gen_helper_fisttl_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ break;
+ case 2:
+ gen_helper_fisttll_ST0(cpu_tmp1_i64);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 3:
+ default:
+ gen_helper_fistt_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ }
+ gen_helper_fpop();
+ break;
+ default:
+ switch(op >> 4) {
+ case 0:
+ gen_helper_fsts_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ break;
+ case 1:
+ gen_helper_fistl_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ break;
+ case 2:
+ gen_helper_fstl_ST0(cpu_tmp1_i64);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 3:
+ default:
+ gen_helper_fist_ST0(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ }
+ if ((op & 7) == 3)
+ gen_helper_fpop();
+ break;
+ }
+ break;
+ case 0x0c: /* fldenv mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fldenv(
+ cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x0d: /* fldcw mem */
+ gen_op_ld_T0_A0(OT_WORD + s->mem_index);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_fldcw(cpu_tmp2_i32);
+ break;
+ case 0x0e: /* fnstenv mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fstenv(cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x0f: /* fnstcw mem */
+ gen_helper_fnstcw(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ case 0x1d: /* fldt mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fldt_ST0(cpu_A0);
+ break;
+ case 0x1f: /* fstpt mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fstt_ST0(cpu_A0);
+ gen_helper_fpop();
+ break;
+ case 0x2c: /* frstor mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_frstor(cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x2e: /* fnsave mem */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fsave(cpu_A0, tcg_const_i32(s->dflag));
+ break;
+ case 0x2f: /* fnstsw mem */
+ gen_helper_fnstsw(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ break;
+ case 0x3c: /* fbld */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fbld_ST0(cpu_A0);
+ break;
+ case 0x3e: /* fbstp */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fbst_ST0(cpu_A0);
+ gen_helper_fpop();
+ break;
+ case 0x3d: /* fildll */
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fildll_ST0(cpu_tmp1_i64);
+ break;
+ case 0x3f: /* fistpll */
+ gen_helper_fistll_ST0(cpu_tmp1_i64);
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ gen_helper_fpop();
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else {
+ /* register float ops */
+ opreg = rm;
+
+ switch(op) {
+ case 0x08: /* fld sti */
+ gen_helper_fpush();
+ gen_helper_fmov_ST0_STN(tcg_const_i32((opreg + 1) & 7));
+ break;
+ case 0x09: /* fxchg sti */
+ case 0x29: /* fxchg4 sti, undocumented op */
+ case 0x39: /* fxchg7 sti, undocumented op */
+ gen_helper_fxchg_ST0_STN(tcg_const_i32(opreg));
+ break;
+ case 0x0a: /* grp d9/2 */
+ switch(rm) {
+ case 0: /* fnop */
+ /* check exceptions (FreeBSD FPU probe) */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fwait();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x0c: /* grp d9/4 */
+ switch(rm) {
+ case 0: /* fchs */
+ gen_helper_fchs_ST0();
+ break;
+ case 1: /* fabs */
+ gen_helper_fabs_ST0();
+ break;
+ case 4: /* ftst */
+ gen_helper_fldz_FT0();
+ gen_helper_fcom_ST0_FT0();
+ break;
+ case 5: /* fxam */
+ gen_helper_fxam_ST0();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x0d: /* grp d9/5 */
+ {
+ switch(rm) {
+ case 0:
+ gen_helper_fpush();
+ gen_helper_fld1_ST0();
+ break;
+ case 1:
+ gen_helper_fpush();
+ gen_helper_fldl2t_ST0();
+ break;
+ case 2:
+ gen_helper_fpush();
+ gen_helper_fldl2e_ST0();
+ break;
+ case 3:
+ gen_helper_fpush();
+ gen_helper_fldpi_ST0();
+ break;
+ case 4:
+ gen_helper_fpush();
+ gen_helper_fldlg2_ST0();
+ break;
+ case 5:
+ gen_helper_fpush();
+ gen_helper_fldln2_ST0();
+ break;
+ case 6:
+ gen_helper_fpush();
+ gen_helper_fldz_ST0();
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ case 0x0e: /* grp d9/6 */
+ switch(rm) {
+ case 0: /* f2xm1 */
+ gen_helper_f2xm1();
+ break;
+ case 1: /* fyl2x */
+ gen_helper_fyl2x();
+ break;
+ case 2: /* fptan */
+ gen_helper_fptan();
+ break;
+ case 3: /* fpatan */
+ gen_helper_fpatan();
+ break;
+ case 4: /* fxtract */
+ gen_helper_fxtract();
+ break;
+ case 5: /* fprem1 */
+ gen_helper_fprem1();
+ break;
+ case 6: /* fdecstp */
+ gen_helper_fdecstp();
+ break;
+ default:
+ case 7: /* fincstp */
+ gen_helper_fincstp();
+ break;
+ }
+ break;
+ case 0x0f: /* grp d9/7 */
+ switch(rm) {
+ case 0: /* fprem */
+ gen_helper_fprem();
+ break;
+ case 1: /* fyl2xp1 */
+ gen_helper_fyl2xp1();
+ break;
+ case 2: /* fsqrt */
+ gen_helper_fsqrt();
+ break;
+ case 3: /* fsincos */
+ gen_helper_fsincos();
+ break;
+ case 5: /* fscale */
+ gen_helper_fscale();
+ break;
+ case 4: /* frndint */
+ gen_helper_frndint();
+ break;
+ case 6: /* fsin */
+ gen_helper_fsin();
+ break;
+ default:
+ case 7: /* fcos */
+ gen_helper_fcos();
+ break;
+ }
+ break;
+ case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+ case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+ case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+ {
+ int op1;
+
+ op1 = op & 7;
+ if (op >= 0x20) {
+ gen_helper_fp_arith_STN_ST0(op1, opreg);
+ if (op >= 0x30)
+ gen_helper_fpop();
+ } else {
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fp_arith_ST0_FT0(op1);
+ }
+ }
+ break;
+ case 0x02: /* fcom */
+ case 0x22: /* fcom2, undocumented op */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcom_ST0_FT0();
+ break;
+ case 0x03: /* fcomp */
+ case 0x23: /* fcomp3, undocumented op */
+ case 0x32: /* fcomp5, undocumented op */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcom_ST0_FT0();
+ gen_helper_fpop();
+ break;
+ case 0x15: /* da/5 */
+ switch(rm) {
+ case 1: /* fucompp */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(1));
+ gen_helper_fucom_ST0_FT0();
+ gen_helper_fpop();
+ gen_helper_fpop();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x1c:
+ switch(rm) {
+ case 0: /* feni (287 only, just do nop here) */
+ break;
+ case 1: /* fdisi (287 only, just do nop here) */
+ break;
+ case 2: /* fclex */
+ gen_helper_fclex();
+ break;
+ case 3: /* fninit */
+ gen_helper_fninit();
+ break;
+ case 4: /* fsetpm (287 only, just do nop here) */
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x1d: /* fucomi */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucomi_ST0_FT0();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x1e: /* fcomi */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcomi_ST0_FT0();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x28: /* ffree sti */
+ gen_helper_ffree_STN(tcg_const_i32(opreg));
+ break;
+ case 0x2a: /* fst sti */
+ gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+ break;
+ case 0x2b: /* fstp sti */
+ case 0x0b: /* fstp1 sti, undocumented op */
+ case 0x3a: /* fstp8 sti, undocumented op */
+ case 0x3b: /* fstp9 sti, undocumented op */
+ gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+ gen_helper_fpop();
+ break;
+ case 0x2c: /* fucom st(i) */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucom_ST0_FT0();
+ break;
+ case 0x2d: /* fucomp st(i) */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucom_ST0_FT0();
+ gen_helper_fpop();
+ break;
+ case 0x33: /* de/3 */
+ switch(rm) {
+ case 1: /* fcompp */
+ gen_helper_fmov_FT0_STN(tcg_const_i32(1));
+ gen_helper_fcom_ST0_FT0();
+ gen_helper_fpop();
+ gen_helper_fpop();
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x38: /* ffreep sti, undocumented op */
+ gen_helper_ffree_STN(tcg_const_i32(opreg));
+ gen_helper_fpop();
+ break;
+ case 0x3c: /* df/4 */
+ switch(rm) {
+ case 0:
+ gen_helper_fnstsw(cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ gen_op_mov_reg_T0(OT_WORD, R_EAX);
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x3d: /* fucomip */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fucomi_ST0_FT0();
+ gen_helper_fpop();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x3e: /* fcomip */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+ gen_helper_fcomi_ST0_FT0();
+ gen_helper_fpop();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x10 ... 0x13: /* fcmovxx */
+ case 0x18 ... 0x1b:
+ {
+ int op1, l1;
+ static const uint8_t fcmov_cc[8] = {
+ (JCC_B << 1),
+ (JCC_Z << 1),
+ (JCC_BE << 1),
+ (JCC_P << 1),
+ };
+ op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, op1, l1);
+ gen_helper_fmov_ST0_STN(tcg_const_i32(opreg));
+ gen_set_label(l1);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ /************************/
+ /* string ops */
+
+ case 0xa4: /* movsS */
+ case 0xa5:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_movs(s, ot);
+ }
+ break;
+
+ case 0xaa: /* stosS */
+ case 0xab:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_stos(s, ot);
+ }
+ break;
+ case 0xac: /* lodsS */
+ case 0xad:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_lods(s, ot);
+ }
+ break;
+ case 0xae: /* scasS */
+ case 0xaf:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ if (prefixes & PREFIX_REPNZ) {
+ gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+ } else if (prefixes & PREFIX_REPZ) {
+ gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+ } else {
+ gen_scas(s, ot);
+ s->cc_op = CC_OP_SUBB + ot;
+ }
+ break;
+
+ case 0xa6: /* cmpsS */
+ case 0xa7:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag + OT_WORD;
+ if (prefixes & PREFIX_REPNZ) {
+ gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
+ } else if (prefixes & PREFIX_REPZ) {
+ gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
+ } else {
+ gen_cmps(s, ot);
+ s->cc_op = CC_OP_SUBB + ot;
+ }
+ break;
+ case 0x6c: /* insS */
+ case 0x6d:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_ins(s, ot);
+ if (use_icount) {
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ }
+ break;
+ case 0x6e: /* outsS */
+ case 0x6f:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ svm_is_rep(prefixes) | 4);
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
+ } else {
+ gen_outs(s, ot);
+ if (use_icount) {
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ }
+ break;
+
+ /************************/
+ /* port I/O */
+
+ case 0xe4:
+ case 0xe5:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ val = ldub_code(s->pc++);
+ gen_op_movl_T0_im(val);
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
+ gen_op_mov_reg_T1(ot, R_EAX);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0xe6:
+ case 0xe7:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ val = ldub_code(s->pc++);
+ gen_op_movl_T0_im(val);
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ svm_is_rep(prefixes));
+ gen_op_mov_TN_reg(ot, 1, R_EAX);
+
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+ gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0xec:
+ case 0xed:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
+ gen_op_mov_reg_T1(ot, R_EAX);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0xee:
+ case 0xef:
+ if ((b & 1) == 0)
+ ot = OT_BYTE;
+ else
+ ot = dflag ? OT_LONG : OT_WORD;
+ gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
+ gen_op_andl_T0_ffff();
+ gen_check_io(s, ot, pc_start - s->cs_base,
+ svm_is_rep(prefixes));
+ gen_op_mov_TN_reg(ot, 1, R_EAX);
+
+ if (use_icount)
+ gen_io_start();
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+ gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+
+ /************************/
+ /* control */
+ case 0xc2: /* ret im */
+ val = ldsw_code(s->pc);
+ s->pc += 2;
+ gen_pop_T0(s);
+ if (CODE64(s) && s->dflag)
+ s->dflag = 2;
+ gen_stack_update(s, val + (2 << s->dflag));
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 0xc3: /* ret */
+ gen_pop_T0(s);
+ gen_pop_update(s);
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ gen_op_jmp_T0();
+ gen_eob(s);
+ break;
+ case 0xca: /* lret im */
+ val = ldsw_code(s->pc);
+ s->pc += 2;
+ do_lret:
+ if (s->pe && !s->vm86) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_lret_protected(tcg_const_i32(s->dflag),
+ tcg_const_i32(val));
+ } else {
+ gen_stack_A0(s);
+ /* pop offset */
+ gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
+ if (s->dflag == 0)
+ gen_op_andl_T0_ffff();
+ /* NOTE: keeping EIP updated is not a problem in case of
+ exception */
+ gen_op_jmp_T0();
+ /* pop selector */
+ gen_op_addl_A0_im(2 << s->dflag);
+ gen_op_ld_T0_A0(1 + s->dflag + s->mem_index);
+ gen_op_movl_seg_T0_vm(R_CS);
+ /* add stack offset */
+ gen_stack_update(s, val + (4 << s->dflag));
+ }
+ gen_eob(s);
+ break;
+ case 0xcb: /* lret */
+ val = 0;
+ goto do_lret;
+ case 0xcf: /* iret */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
+ if (!s->pe) {
+ /* real mode */
+ gen_helper_iret_real(tcg_const_i32(s->dflag));
+ s->cc_op = CC_OP_EFLAGS;
+ } else if (s->vm86) {
+#ifdef VBOX
+ if (s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+ if (s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_helper_iret_real(tcg_const_i32(s->dflag));
+ s->cc_op = CC_OP_EFLAGS;
+ }
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_iret_protected(tcg_const_i32(s->dflag),
+ tcg_const_i32(s->pc - s->cs_base));
+ s->cc_op = CC_OP_EFLAGS;
+ }
+ gen_eob(s);
+ break;
+ case 0xe8: /* call im */
+ {
+ if (dflag)
+ tval = (int32_t)insn_get(s, OT_LONG);
+ else
+ tval = (int16_t)insn_get(s, OT_WORD);
+ next_eip = s->pc - s->cs_base;
+ tval += next_eip;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ else if(!CODE64(s))
+ tval &= 0xffffffff;
+ gen_movtl_T0_im(next_eip);
+ gen_push_T0(s);
+ gen_jmp(s, tval);
+ }
+ break;
+ case 0x9a: /* lcall im */
+ {
+ unsigned int selector, offset;
+
+ if (CODE64(s))
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ offset = insn_get(s, ot);
+ selector = insn_get(s, OT_WORD);
+
+ gen_op_movl_T0_im(selector);
+ gen_op_movl_T1_imu(offset);
+ }
+ goto do_lcall;
+ case 0xe9: /* jmp im */
+ if (dflag)
+ tval = (int32_t)insn_get(s, OT_LONG);
+ else
+ tval = (int16_t)insn_get(s, OT_WORD);
+ tval += s->pc - s->cs_base;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ else if(!CODE64(s))
+ tval &= 0xffffffff;
+ gen_jmp(s, tval);
+ break;
+ case 0xea: /* ljmp im */
+ {
+ unsigned int selector, offset;
+
+ if (CODE64(s))
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ offset = insn_get(s, ot);
+ selector = insn_get(s, OT_WORD);
+
+ gen_op_movl_T0_im(selector);
+ gen_op_movl_T1_imu(offset);
+ }
+ goto do_ljmp;
+ case 0xeb: /* jmp Jb */
+ tval = (int8_t)insn_get(s, OT_BYTE);
+ tval += s->pc - s->cs_base;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ gen_jmp(s, tval);
+ break;
+ case 0x70 ... 0x7f: /* jcc Jb */
+ tval = (int8_t)insn_get(s, OT_BYTE);
+ goto do_jcc;
+ case 0x180 ... 0x18f: /* jcc Jv */
+ if (dflag) {
+ tval = (int32_t)insn_get(s, OT_LONG);
+ } else {
+ tval = (int16_t)insn_get(s, OT_WORD);
+ }
+ do_jcc:
+ next_eip = s->pc - s->cs_base;
+ tval += next_eip;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+ gen_jcc(s, b, tval, next_eip);
+ break;
+
+ case 0x190 ... 0x19f: /* setcc Gv */
+ modrm = ldub_code(s->pc++);
+ gen_setcc(s, b);
+ gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
+ break;
+ case 0x140 ... 0x14f: /* cmov Gv, Ev */
+ {
+ int l1;
+ TCGv t0;
+
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ t0 = tcg_temp_local_new();
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_v_reg(ot, t0, rm);
+ }
+#ifdef TARGET_X86_64
+ if (ot == OT_LONG) {
+ /* XXX: specific Intel behaviour ? */
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ tcg_gen_mov_tl(cpu_regs[reg], t0);
+ gen_set_label(l1);
+ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
+ } else
+#endif
+ {
+ l1 = gen_new_label();
+ gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ gen_op_mov_reg_v(ot, reg, t0);
+ gen_set_label(l1);
+ }
+ tcg_temp_free(t0);
+ }
+ break;
+
+ /************************/
+ /* flags */
+ case 0x9c: /* pushf */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+ if (s->vm86 && s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+#ifdef VBOX
+ if (s->vm86 && s->vme && s->iopl != 3)
+ gen_helper_read_eflags_vme(cpu_T[0]);
+ else
+#endif
+ gen_helper_read_eflags(cpu_T[0]);
+ gen_push_T0(s);
+ }
+ break;
+ case 0x9d: /* popf */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) {
+#else
+ if (s->vm86 && s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_pop_T0(s);
+ if (s->cpl == 0) {
+ if (s->dflag) {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK)));
+ } else {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff));
+ }
+ } else {
+ if (s->cpl <= s->iopl) {
+ if (s->dflag) {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK)));
+ } else {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff));
+ }
+ } else {
+ if (s->dflag) {
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK)));
+ } else {
+#ifdef VBOX
+ if (s->vm86 && s->vme)
+ gen_helper_write_eflags_vme(cpu_T[0]);
+ else
+#endif
+ gen_helper_write_eflags(cpu_T[0],
+ tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff));
+ }
+ }
+ }
+ gen_pop_update(s);
+ s->cc_op = CC_OP_EFLAGS;
+ /* abort translation because TF flag may change */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 0x9e: /* sahf */
+ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+ goto illegal_op;
+ gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
+ tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x9f: /* lahf */
+ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_T[0]);
+ /* Note: gen_compute_eflags() only gives the condition codes */
+ tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+ gen_op_mov_reg_T0(OT_BYTE, R_AH);
+ break;
+ case 0xf5: /* cmc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xf8: /* clc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xf9: /* stc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xfc: /* cld */
+ tcg_gen_movi_i32(cpu_tmp2_i32, 1);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+ break;
+ case 0xfd: /* std */
+ tcg_gen_movi_i32(cpu_tmp2_i32, -1);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+ break;
+
+ /************************/
+ /* bit operations */
+ case 0x1ba: /* bt/bts/btr/btc Gv, im */
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ op = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3) {
+ s->rip_offset = 1;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+ /* load shift */
+ val = ldub_code(s->pc++);
+ gen_op_movl_T1_im(val);
+ if (op < 4)
+ goto illegal_op;
+ op -= 4;
+ goto bt_op;
+ case 0x1a3: /* bt Gv, Ev */
+ op = 0;
+ goto do_btx;
+ case 0x1ab: /* bts */
+ op = 1;
+ goto do_btx;
+ case 0x1b3: /* btr */
+ op = 2;
+ goto do_btx;
+ case 0x1bb: /* btc */
+ op = 3;
+ do_btx:
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_TN_reg(OT_LONG, 1, reg);
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ /* specific case: we need to add a displacement */
+ gen_exts(ot, cpu_T[1]);
+ tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
+ tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
+ tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
+ } else {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ }
+ bt_op:
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
+ switch(op) {
+ case 0:
+ tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_cc_dst, 0);
+ break;
+ case 1:
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_tmp0, 1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
+ case 2:
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_tmp0, 1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+ tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
+ tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
+ default:
+ case 3:
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+ tcg_gen_movi_tl(cpu_tmp0, 1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
+ }
+ s->cc_op = CC_OP_SARB + ot;
+ if (op != 0) {
+ if (mod != 3)
+ gen_op_st_T0_A0(ot + s->mem_index);
+ else
+ gen_op_mov_reg_T0(ot, rm);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
+ tcg_gen_movi_tl(cpu_cc_dst, 0);
+ }
+ break;
+ case 0x1bc: /* bsf */
+ case 0x1bd: /* bsr */
+ {
+ int label1;
+ TCGv t0;
+
+ ot = dflag + OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_ldst_modrm(s,modrm, ot, OR_TMP0, 0);
+ gen_extu(ot, cpu_T[0]);
+ t0 = tcg_temp_local_new();
+ tcg_gen_mov_tl(t0, cpu_T[0]);
+ if ((b & 1) && (prefixes & PREFIX_REPZ) &&
+ (s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
+ switch(ot) {
+ case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
+ tcg_const_i32(16)); break;
+ case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
+ tcg_const_i32(32)); break;
+ case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
+ tcg_const_i32(64)); break;
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ } else {
+ label1 = gen_new_label();
+ tcg_gen_movi_tl(cpu_cc_dst, 0);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
+ if (b & 1) {
+ gen_helper_bsr(cpu_T[0], t0);
+ } else {
+ gen_helper_bsf(cpu_T[0], t0);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ tcg_gen_movi_tl(cpu_cc_dst, 1);
+ gen_set_label(label1);
+ tcg_gen_discard_tl(cpu_cc_src);
+ s->cc_op = CC_OP_LOGICB + ot;
+ }
+ tcg_temp_free(t0);
+ }
+ break;
+ /************************/
+ /* bcd */
+ case 0x27: /* daa */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_daa();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x2f: /* das */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_das();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x37: /* aaa */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_aaa();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x3f: /* aas */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_helper_aas();
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0xd4: /* aam */
+ if (CODE64(s))
+ goto illegal_op;
+ val = ldub_code(s->pc++);
+ if (val == 0) {
+ gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+ } else {
+ gen_helper_aam(tcg_const_i32(val));
+ s->cc_op = CC_OP_LOGICB;
+ }
+ break;
+ case 0xd5: /* aad */
+ if (CODE64(s))
+ goto illegal_op;
+ val = ldub_code(s->pc++);
+ gen_helper_aad(tcg_const_i32(val));
+ s->cc_op = CC_OP_LOGICB;
+ break;
+ /************************/
+ /* misc */
+ case 0x90: /* nop */
+ /* XXX: correct lock test for all insn */
+ if (prefixes & PREFIX_LOCK) {
+ goto illegal_op;
+ }
+ /* If REX_B is set, then this is xchg eax, r8d, not a nop. */
+ if (REX_B(s)) {
+ goto do_xchg_reg_eax;
+ }
+ if (prefixes & PREFIX_REPZ) {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_PAUSE);
+ }
+ break;
+ case 0x9b: /* fwait */
+ if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
+ (HF_MP_MASK | HF_TS_MASK)) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fwait();
+ }
+ break;
+ case 0xcc: /* int3 */
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && !s->vme) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else
+#endif
+ gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
+ break;
+ case 0xcd: /* int N */
+ val = ldub_code(s->pc++);
+#ifdef VBOX
+ if (s->vm86 && s->iopl != 3 && !s->vme) {
+#else
+ if (s->vm86 && s->iopl != 3) {
+#endif
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
+ }
+ break;
+ case 0xce: /* into */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_into(tcg_const_i32(s->pc - pc_start));
+ break;
+#ifdef WANT_ICEBP
+ case 0xf1: /* icebp (undocumented, exits to external debugger) */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
+#if 1
+ gen_debug(s, pc_start - s->cs_base);
+#else
+ /* start debug */
+ tb_flush(cpu_single_env);
+ cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
+#endif
+ break;
+#endif
+ case 0xfa: /* cli */
+ if (!s->vm86) {
+ if (s->cpl <= s->iopl) {
+ gen_helper_cli();
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ } else {
+ if (s->iopl == 3) {
+ gen_helper_cli();
+#ifdef VBOX
+ } else if (s->iopl != 3 && s->vme) {
+ gen_helper_cli_vme();
+#endif
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ }
+ break;
+ case 0xfb: /* sti */
+ if (!s->vm86) {
+ if (s->cpl <= s->iopl) {
+ gen_sti:
+ gen_helper_sti();
+ /* interruptions are enabled only the first insn after sti */
+ /* If several instructions disable interrupts, only the
+ _first_ does it */
+ if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
+ gen_helper_set_inhibit_irq();
+ /* give a chance to handle pending irqs */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ } else {
+ if (s->iopl == 3) {
+ goto gen_sti;
+#ifdef VBOX
+ } else if (s->iopl != 3 && s->vme) {
+ gen_helper_sti_vme();
+ /* give a chance to handle pending irqs */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+#endif
+ } else {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ }
+ }
+ break;
+ case 0x62: /* bound */
+ if (CODE64(s))
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ gen_op_mov_TN_reg(ot, 0, reg);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ if (ot == OT_WORD)
+ gen_helper_boundw(cpu_A0, cpu_tmp2_i32);
+ else
+ gen_helper_boundl(cpu_A0, cpu_tmp2_i32);
+ break;
+ case 0x1c8 ... 0x1cf: /* bswap reg */
+ reg = (b & 7) | REX_B(s);
+#ifdef TARGET_X86_64
+ if (dflag == 2) {
+ gen_op_mov_TN_reg(OT_QUAD, 0, reg);
+ tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_QUAD, reg);
+ } else
+#endif
+ {
+ gen_op_mov_TN_reg(OT_LONG, 0, reg);
+ tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_LONG, reg);
+ }
+ break;
+ case 0xd6: /* salc */
+ if (CODE64(s))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags_c(cpu_T[0]);
+ tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(OT_BYTE, R_EAX);
+ break;
+ case 0xe0: /* loopnz */
+ case 0xe1: /* loopz */
+ case 0xe2: /* loop */
+ case 0xe3: /* jecxz */
+ {
+ int l1, l2, l3;
+
+ tval = (int8_t)insn_get(s, OT_BYTE);
+ next_eip = s->pc - s->cs_base;
+ tval += next_eip;
+ if (s->dflag == 0)
+ tval &= 0xffff;
+
+ l1 = gen_new_label();
+ l2 = gen_new_label();
+ l3 = gen_new_label();
+ b &= 3;
+ switch(b) {
+ case 0: /* loopnz */
+ case 1: /* loopz */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_op_add_reg_im(s->aflag, R_ECX, -1);
+ gen_op_jz_ecx(s->aflag, l3);
+ gen_compute_eflags(cpu_tmp0);
+ tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
+ if (b == 0) {
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
+ } else {
+ tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1);
+ }
+ break;
+ case 2: /* loop */
+ gen_op_add_reg_im(s->aflag, R_ECX, -1);
+ gen_op_jnz_ecx(s->aflag, l1);
+ break;
+ default:
+ case 3: /* jcxz */
+ gen_op_jz_ecx(s->aflag, l1);
+ break;
+ }
+
+ gen_set_label(l3);
+ gen_jmp_im(next_eip);
+ tcg_gen_br(l2);
+
+ gen_set_label(l1);
+ gen_jmp_im(tval);
+ gen_set_label(l2);
+ gen_eob(s);
+ }
+ break;
+ case 0x130: /* wrmsr */
+ case 0x132: /* rdmsr */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (b & 2) {
+ gen_helper_rdmsr();
+ } else {
+ gen_helper_wrmsr();
+ }
+ }
+ break;
+ case 0x131: /* rdtsc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (use_icount)
+ gen_io_start();
+ gen_helper_rdtsc();
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ case 0x133: /* rdpmc */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_rdpmc();
+ break;
+ case 0x134: /* sysenter */
+#ifndef VBOX
+ /* For Intel SYSENTER is valid on 64-bit */
+ if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+#else
+ if ( !(cpu_single_env->cpuid_features & CPUID_SEP)
+ || ( IS_LONG_MODE(s)
+ && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL))
+#endif
+ goto illegal_op;
+ if (!s->pe) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_sysenter();
+ gen_eob(s);
+ }
+ break;
+ case 0x135: /* sysexit */
+#ifndef VBOX
+ /* For Intel SYSEXIT is valid on 64-bit */
+ if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+#else
+ if ( !(cpu_single_env->cpuid_features & CPUID_SEP)
+ || ( IS_LONG_MODE(s)
+ && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL))
+#endif
+ goto illegal_op;
+ if (!s->pe) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_sysexit(tcg_const_i32(dflag));
+ gen_eob(s);
+ }
+ break;
+#ifdef TARGET_X86_64
+ case 0x105: /* syscall */
+ /* XXX: is it usable in real mode ? */
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_syscall(tcg_const_i32(s->pc - pc_start));
+ gen_eob(s);
+ break;
+ case 0x107: /* sysret */
+ if (!s->pe) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_sysret(tcg_const_i32(s->dflag));
+ /* condition codes are modified only in long mode */
+ if (s->lma)
+ s->cc_op = CC_OP_EFLAGS;
+ gen_eob(s);
+ }
+ break;
+#endif
+ case 0x1a2: /* cpuid */
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_cpuid();
+ break;
+ case 0xf4: /* hlt */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_hlt(tcg_const_i32(s->pc - pc_start));
+ s->is_jmp = DISAS_TB_JUMP;
+ }
+ break;
+ case 0x100:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ switch(op) {
+ case 0: /* sldt */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
+ ot = OT_WORD;
+ if (mod == 3)
+ ot += s->dflag;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ break;
+ case 2: /* lldt */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_lldt(cpu_tmp2_i32);
+ }
+ break;
+ case 1: /* str */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
+ ot = OT_WORD;
+ if (mod == 3)
+ ot += s->dflag;
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+ break;
+ case 3: /* ltr */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_jmp_im(pc_start - s->cs_base);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_helper_ltr(cpu_tmp2_i32);
+ }
+ break;
+ case 4: /* verr */
+ case 5: /* verw */
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ if (op == 4)
+ gen_helper_verr(cpu_T[0]);
+ else
+ gen_helper_verw(cpu_T[0]);
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x101:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ rm = modrm & 7;
+ switch(op) {
+ case 0: /* sgdt */
+ if (mod == 3)
+ goto illegal_op;
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ gen_add_A0_im(s, 2);
+ tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base));
+ gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+ break;
+ case 1:
+ if (mod == 3) {
+ switch (rm) {
+ case 0: /* monitor */
+ if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+ s->cpl != 0)
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+#ifdef TARGET_X86_64
+ if (s->aflag == 2) {
+ gen_op_movq_A0_reg(R_EAX);
+ } else
+#endif
+ {
+ gen_op_movl_A0_reg(R_EAX);
+ if (s->aflag == 0)
+ gen_op_andl_A0_ffff();
+ }
+ gen_add_A0_ds_seg(s);
+ gen_helper_monitor(cpu_A0);
+ break;
+ case 1: /* mwait */
+ if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+ s->cpl != 0)
+ goto illegal_op;
+ gen_update_cc_op(s);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_mwait(tcg_const_i32(s->pc - pc_start));
+ gen_eob(s);
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else { /* sidt */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
+ gen_op_st_T0_A0(OT_WORD + s->mem_index);
+ gen_add_A0_im(s, 2);
+ tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base));
+ gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+ }
+ break;
+ case 2: /* lgdt */
+ case 3: /* lidt */
+ if (mod == 3) {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ switch(rm) {
+ case 0: /* VMRUN */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_vmrun(tcg_const_i32(s->aflag),
+ tcg_const_i32(s->pc - pc_start));
+ tcg_gen_exit_tb(0);
+ s->is_jmp = DISAS_TB_JUMP;
+ }
+ break;
+ case 1: /* VMMCALL */
+ if (!(s->flags & HF_SVME_MASK))
+ goto illegal_op;
+ gen_helper_vmmcall();
+ break;
+ case 2: /* VMLOAD */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_vmload(tcg_const_i32(s->aflag));
+ }
+ break;
+ case 3: /* VMSAVE */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_vmsave(tcg_const_i32(s->aflag));
+ }
+ break;
+ case 4: /* STGI */
+ if ((!(s->flags & HF_SVME_MASK) &&
+ !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+ !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_stgi();
+ }
+ break;
+ case 5: /* CLGI */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_clgi();
+ }
+ break;
+ case 6: /* SKINIT */
+ if ((!(s->flags & HF_SVME_MASK) &&
+ !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+ !s->pe)
+ goto illegal_op;
+ gen_helper_skinit();
+ break;
+ case 7: /* INVLPGA */
+ if (!(s->flags & HF_SVME_MASK) || !s->pe)
+ goto illegal_op;
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ break;
+ } else {
+ gen_helper_invlpga(tcg_const_i32(s->aflag));
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ } else if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start,
+ op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_T1_A0(OT_WORD + s->mem_index);
+ gen_add_A0_im(s, 2);
+ gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
+ if (!s->dflag)
+ gen_op_andl_T0_im(0xffffff);
+ if (op == 2) {
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base));
+ tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit));
+ } else {
+ tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base));
+ tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit));
+ }
+ }
+ break;
+ case 4: /* smsw */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
+#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4);
+#else
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
+#endif
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1);
+ break;
+ case 6: /* lmsw */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ gen_helper_lmsw(cpu_T[0]);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ case 7:
+ if (mod != 3) { /* invlpg */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_helper_invlpg(cpu_A0);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ } else {
+ switch (rm) {
+ case 0: /* swapgs */
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ tcg_gen_ld_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,segs[R_GS].base));
+ tcg_gen_ld_tl(cpu_T[1], cpu_env,
+ offsetof(CPUX86State,kernelgsbase));
+ tcg_gen_st_tl(cpu_T[1], cpu_env,
+ offsetof(CPUX86State,segs[R_GS].base));
+ tcg_gen_st_tl(cpu_T[0], cpu_env,
+ offsetof(CPUX86State,kernelgsbase));
+ }
+ } else
+#endif
+ {
+ goto illegal_op;
+ }
+ break;
+ case 1: /* rdtscp */
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
+ goto illegal_op;
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (use_icount)
+ gen_io_start();
+ gen_helper_rdtscp();
+ if (use_icount) {
+ gen_io_end();
+ gen_jmp(s, s->pc - s->cs_base);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x108: /* invd */
+ case 0x109: /* wbinvd */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
+ /* nothing to do */
+ }
+ break;
+ case 0x63: /* arpl or movslS (x86_64) */
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
+ int d_ot;
+ /* d_ot is the size of destination */
+ d_ot = dflag + OT_WORD;
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+ rm = (modrm & 7) | REX_B(s);
+
+ if (mod == 3) {
+ gen_op_mov_TN_reg(OT_LONG, 0, rm);
+ /* sign extend */
+ if (d_ot == OT_QUAD)
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ gen_op_mov_reg_T0(d_ot, reg);
+ } else {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (d_ot == OT_QUAD) {
+ gen_op_lds_T0_A0(OT_LONG + s->mem_index);
+ } else {
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ }
+ gen_op_mov_reg_T0(d_ot, reg);
+ }
+ } else
+#endif
+ {
+ int label1;
+ TCGv t0, t1, t2, a0;
+
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ t0 = tcg_temp_local_new();
+ t1 = tcg_temp_local_new();
+ t2 = tcg_temp_local_new();
+ ot = OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = (modrm >> 3) & 7;
+ mod = (modrm >> 6) & 3;
+ rm = modrm & 7;
+ if (mod != 3) {
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
+ a0 = tcg_temp_local_new();
+ tcg_gen_mov_tl(a0, cpu_A0);
+ } else {
+ gen_op_mov_v_reg(ot, t0, rm);
+ TCGV_UNUSED(a0);
+ }
+ gen_op_mov_v_reg(ot, t1, reg);
+ tcg_gen_andi_tl(cpu_tmp0, t0, 3);
+ tcg_gen_andi_tl(t1, t1, 3);
+ tcg_gen_movi_tl(t2, 0);
+ label1 = gen_new_label();
+ tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
+ tcg_gen_andi_tl(t0, t0, ~3);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_movi_tl(t2, CC_Z);
+ gen_set_label(label1);
+ if (mod != 3) {
+ gen_op_st_v(ot + s->mem_index, t0, a0);
+ tcg_temp_free(a0);
+ } else {
+ gen_op_mov_reg_v(ot, rm, t0);
+ }
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
+ s->cc_op = CC_OP_EFLAGS;
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ }
+ break;
+ case 0x102: /* lar */
+ case 0x103: /* lsl */
+ {
+ int label1;
+ TCGv t0;
+ if (!s->pe || s->vm86)
+ goto illegal_op;
+ ot = dflag ? OT_LONG : OT_WORD;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+ t0 = tcg_temp_local_new();
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ if (b == 0x102)
+ gen_helper_lar(t0, cpu_T[0]);
+ else
+ gen_helper_lsl(t0, cpu_T[0]);
+ tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
+ label1 = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+ gen_op_mov_reg_v(ot, reg, t0);
+ gen_set_label(label1);
+ s->cc_op = CC_OP_EFLAGS;
+ tcg_temp_free(t0);
+ }
+ break;
+ case 0x118:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ switch(op) {
+ case 0: /* prefetchnta */
+ case 1: /* prefetchnt0 */
+ case 2: /* prefetchnt0 */
+ case 3: /* prefetchnt0 */
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ /* nothing more to do */
+ break;
+ default: /* nop (multi byte) */
+ gen_nop_modrm(s, modrm);
+ break;
+ }
+ break;
+ case 0x119 ... 0x11f: /* nop (multi byte) */
+ modrm = ldub_code(s->pc++);
+ gen_nop_modrm(s, modrm);
+ break;
+ case 0x120: /* mov reg, crN */
+ case 0x122: /* mov crN, reg */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ modrm = ldub_code(s->pc++);
+#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */
+ if ((modrm & 0xc0) != 0xc0)
+ goto illegal_op;
+#endif
+ rm = (modrm & 7) | REX_B(s);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (CODE64(s))
+ ot = OT_QUAD;
+ else
+ ot = OT_LONG;
+ if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
+ (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
+ reg = 8;
+ }
+ switch(reg) {
+ case 0:
+ case 2:
+ case 3:
+ case 4:
+ case 8:
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ if (b & 2) {
+ gen_op_mov_TN_reg(ot, 0, rm);
+ gen_helper_write_crN(tcg_const_i32(reg), cpu_T[0]);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ } else {
+ gen_helper_read_crN(cpu_T[0], tcg_const_i32(reg));
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ }
+ break;
+ case 0x121: /* mov reg, drN */
+ case 0x123: /* mov drN, reg */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ modrm = ldub_code(s->pc++);
+#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */
+ if ((modrm & 0xc0) != 0xc0)
+ goto illegal_op;
+#endif
+ rm = (modrm & 7) | REX_B(s);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ if (CODE64(s))
+ ot = OT_QUAD;
+ else
+ ot = OT_LONG;
+ /* XXX: do it dynamically with CR4.DE bit */
+ if (reg == 4 || reg == 5 || reg >= 8)
+ goto illegal_op;
+ if (b & 2) {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
+ gen_op_mov_TN_reg(ot, 0, rm);
+ gen_helper_movl_drN_T0(tcg_const_i32(reg), cpu_T[0]);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
+ tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
+ gen_op_mov_reg_T0(ot, rm);
+ }
+ }
+ break;
+ case 0x106: /* clts */
+ if (s->cpl != 0) {
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+ } else {
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+ gen_helper_clts();
+ /* abort block because static cpu state changed */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob(s);
+ }
+ break;
+ /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
+ case 0x1c3: /* MOVNTI reg, mem */
+ if (!(s->cpuid_features & CPUID_SSE2))
+ goto illegal_op;
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ /* generate a generic store */
+ gen_ldst_modrm(s, modrm, ot, reg, 1);
+ break;
+ case 0x1ae:
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ op = (modrm >> 3) & 7;
+ switch(op) {
+ case 0: /* fxsave */
+ if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+ (s->prefix & PREFIX_LOCK))
+ goto illegal_op;
+ if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fxsave(cpu_A0, tcg_const_i32((s->dflag == 2)));
+ break;
+ case 1: /* fxrstor */
+ if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
+ (s->prefix & PREFIX_LOCK))
+ goto illegal_op;
+ if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (s->cc_op != CC_OP_DYNAMIC)
+ gen_op_set_cc_op(s->cc_op);
+ gen_jmp_im(pc_start - s->cs_base);
+ gen_helper_fxrstor(cpu_A0, tcg_const_i32((s->dflag == 2)));
+ break;
+ case 2: /* ldmxcsr */
+ case 3: /* stmxcsr */
+ if (s->flags & HF_TS_MASK) {
+ gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+ break;
+ }
+ if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
+ mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ if (op == 2) {
+ gen_op_ld_T0_A0(OT_LONG + s->mem_index);
+ tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+ } else {
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+ gen_op_st_T0_A0(OT_LONG + s->mem_index);
+ }
+ break;
+ case 5: /* lfence */
+ case 6: /* mfence */
+ if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE))
+ goto illegal_op;
+ break;
+ case 7: /* sfence / clflush */
+ if ((modrm & 0xc7) == 0xc0) {
+ /* sfence */
+ /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
+ if (!(s->cpuid_features & CPUID_SSE))
+ goto illegal_op;
+ } else {
+ /* clflush */
+ if (!(s->cpuid_features & CPUID_CLFLUSH))
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ }
+ break;
+ default:
+ goto illegal_op;
+ }
+ break;
+ case 0x10d: /* 3DNow! prefetch(w) */
+ modrm = ldub_code(s->pc++);
+ mod = (modrm >> 6) & 3;
+ if (mod == 3)
+ goto illegal_op;
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ /* ignore for now */
+ break;
+ case 0x1aa: /* rsm */
+ gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
+ if (!(s->flags & HF_SMM_MASK))
+ goto illegal_op;
+ gen_update_cc_op(s);
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_helper_rsm();
+ gen_eob(s);
+ break;
+ case 0x1b8: /* SSE4.2 popcnt */
+ if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
+ PREFIX_REPZ)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
+ goto illegal_op;
+
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7);
+
+ if (s->prefix & PREFIX_DATA)
+ ot = OT_WORD;
+ else if (s->dflag != 2)
+ ot = OT_LONG;
+ else
+ ot = OT_QUAD;
+
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_helper_popcnt(cpu_T[0], cpu_T[0], tcg_const_i32(ot));
+ gen_op_mov_reg_T0(ot, reg);
+
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x10e ... 0x10f:
+ /* 3DNow! instructions, ignore prefixes */
+ s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
+ case 0x110 ... 0x117:
+ case 0x128 ... 0x12f:
+ case 0x138 ... 0x13a:
+ case 0x150 ... 0x179:
+ case 0x17c ... 0x17f:
+ case 0x1c2:
+ case 0x1c4 ... 0x1c6:
+ case 0x1d0 ... 0x1fe:
+ gen_sse(s, b, pc_start, rex_r);
+ break;
+ default:
+ goto illegal_op;
+ }
+ /* lock generation */
+ if (s->prefix & PREFIX_LOCK)
+ gen_helper_unlock();
+ return s->pc;
+ illegal_op:
+ if (s->prefix & PREFIX_LOCK)
+ gen_helper_unlock();
+ /* XXX: ensure that no lock was generated */
+ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ return s->pc;
+}
+
+void optimize_flags_init(void)
+{
+#if TCG_TARGET_REG_BITS == 32
+ assert(sizeof(CCTable) == (1 << 3));
+#else
+ assert(sizeof(CCTable) == (1 << 4));
+#endif
+ cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+ cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, cc_op), "cc_op");
+ cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_src),
+ "cc_src");
+ cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_dst),
+ "cc_dst");
+ cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
+ "cc_tmp");
+
+#ifdef TARGET_X86_64
+ cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EAX]), "rax");
+ cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ECX]), "rcx");
+ cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDX]), "rdx");
+ cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBX]), "rbx");
+ cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESP]), "rsp");
+ cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBP]), "rbp");
+ cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESI]), "rsi");
+ cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDI]), "rdi");
+ cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[8]), "r8");
+ cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[9]), "r9");
+ cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[10]), "r10");
+ cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[11]), "r11");
+ cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[12]), "r12");
+ cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[13]), "r13");
+ cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[14]), "r14");
+ cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, regs[15]), "r15");
+#else
+ cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EAX]), "eax");
+ cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ECX]), "ecx");
+ cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDX]), "edx");
+ cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBX]), "ebx");
+ cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESP]), "esp");
+ cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EBP]), "ebp");
+ cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_ESI]), "esi");
+ cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[R_EDI]), "edi");
+#endif
+
+ /* register helpers */
+#define GEN_HELPER 2
+#include "helper.h"
+}
+
+/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
+ basic block 'tb'. If search_pc is TRUE, also generate PC
+ information for each intermediate instruction. */
+static inline void gen_intermediate_code_internal(CPUState *env,
+ TranslationBlock *tb,
+ int search_pc)
+{
+ DisasContext dc1, *dc = &dc1;
+ target_ulong pc_ptr;
+ uint16_t *gen_opc_end;
+ CPUBreakpoint *bp;
+ int j, lj;
+ uint64_t flags;
+ target_ulong pc_start;
+ target_ulong cs_base;
+ int num_insns;
+ int max_insns;
+#ifdef VBOX
+ int const singlestep = env->state & CPU_EMULATE_SINGLE_STEP;
+#endif
+
+ /* generate intermediate code */
+ pc_start = tb->pc;
+ cs_base = tb->cs_base;
+ flags = tb->flags;
+
+ dc->pe = (flags >> HF_PE_SHIFT) & 1;
+ dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
+ dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
+ dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
+ dc->f_st = 0;
+ dc->vm86 = (flags >> VM_SHIFT) & 1;
+#ifdef VBOX
+ dc->vme = !!(env->cr[4] & CR4_VME_MASK);
+ dc->pvi = !!(env->cr[4] & CR4_PVI_MASK);
+# ifdef VBOX_WITH_CALL_RECORD
+ if ( !(env->state & CPU_RAW_RING0)
+ && (env->cr[0] & CR0_PG_MASK)
+ && !(env->eflags & X86_EFL_IF)
+ && dc->code32)
+ dc->record_call = 1;
+ else
+ dc->record_call = 0;
+# endif
+#endif /* VBOX */
+ dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
+ dc->iopl = (flags >> IOPL_SHIFT) & 3;
+ dc->tf = (flags >> TF_SHIFT) & 1;
+ dc->singlestep_enabled = env->singlestep_enabled;
+ dc->cc_op = CC_OP_DYNAMIC;
+ dc->cs_base = cs_base;
+ dc->tb = tb;
+ dc->popl_esp_hack = 0;
+ /* select memory access functions */
+ dc->mem_index = 0;
+ if (flags & HF_SOFTMMU_MASK) {
+ if (dc->cpl == 3)
+ dc->mem_index = 2 * 4;
+ else
+ dc->mem_index = 1 * 4;
+ }
+ dc->cpuid_features = env->cpuid_features;
+ dc->cpuid_ext_features = env->cpuid_ext_features;
+ dc->cpuid_ext2_features = env->cpuid_ext2_features;
+ dc->cpuid_ext3_features = env->cpuid_ext3_features;
+#ifdef TARGET_X86_64
+ dc->lma = (flags >> HF_LMA_SHIFT) & 1;
+ dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
+#endif
+ dc->flags = flags;
+ dc->jmp_opt = !(dc->tf || env->singlestep_enabled ||
+ (flags & HF_INHIBIT_IRQ_MASK)
+#ifndef CONFIG_SOFTMMU
+ || (flags & HF_SOFTMMU_MASK)
+#endif
+ );
+#if 0
+ /* check addseg logic */
+ if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
+ printf("ERROR addseg\n");
+#endif
+
+ cpu_T[0] = tcg_temp_new();
+ cpu_T[1] = tcg_temp_new();
+ cpu_A0 = tcg_temp_new();
+ cpu_T3 = tcg_temp_new();
+
+ cpu_tmp0 = tcg_temp_new();
+ cpu_tmp1_i64 = tcg_temp_new_i64();
+ cpu_tmp2_i32 = tcg_temp_new_i32();
+ cpu_tmp3_i32 = tcg_temp_new_i32();
+ cpu_tmp4 = tcg_temp_new();
+ cpu_tmp5 = tcg_temp_new();
+ cpu_ptr0 = tcg_temp_new_ptr();
+ cpu_ptr1 = tcg_temp_new_ptr();
+
+ gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+
+ dc->is_jmp = DISAS_NEXT;
+ pc_ptr = pc_start;
+ lj = -1;
+ num_insns = 0;
+ max_insns = tb->cflags & CF_COUNT_MASK;
+ if (max_insns == 0)
+ max_insns = CF_COUNT_MASK;
+
+ gen_icount_start();
+ for(;;) {
+ if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
+ QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+ if (bp->pc == pc_ptr &&
+ !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) {
+ gen_debug(dc, pc_ptr - dc->cs_base);
+ break;
+ }
+ }
+ }
+ if (search_pc) {
+ j = gen_opc_ptr - gen_opc_buf;
+ if (lj < j) {
+ lj++;
+ while (lj < j)
+ gen_opc_instr_start[lj++] = 0;
+ }
+ gen_opc_pc[lj] = pc_ptr;
+ gen_opc_cc_op[lj] = dc->cc_op;
+ gen_opc_instr_start[lj] = 1;
+ gen_opc_icount[lj] = num_insns;
+ }
+ if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+ gen_io_start();
+
+ pc_ptr = disas_insn(dc, pc_ptr);
+ num_insns++;
+ /* stop translation if indicated */
+ if (dc->is_jmp)
+ break;
+#ifdef VBOX
+# ifdef DEBUG
+/*
+ if(cpu_check_code_raw(env, pc_ptr, env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK))) == ERROR_SUCCESS)
+ {
+ //should never happen as the jump to the patch code terminates the translation block
+ dprintf(("QEmu is about to execute instructions in our patch block at %08X!!\n", pc_ptr));
+ }
+*/
+# endif /* DEBUG */
+ if (env->state & CPU_EMULATE_SINGLE_INSTR)
+ {
+ env->state &= ~CPU_EMULATE_SINGLE_INSTR;
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+#endif /* VBOX */
+
+ /* if single step mode, we generate only one instruction and
+ generate an exception */
+ /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
+ the flag and abort the translation to give the irqs a
+ change to be happen */
+ if (dc->tf || dc->singlestep_enabled ||
+ (flags & HF_INHIBIT_IRQ_MASK)) {
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+ /* if too long translation, stop generation too */
+ if (gen_opc_ptr >= gen_opc_end ||
+ (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
+ num_insns >= max_insns) {
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+ if (singlestep) {
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+ }
+ if (tb->cflags & CF_LAST_IO)
+ gen_io_end();
+ gen_icount_end(tb, num_insns);
+ *gen_opc_ptr = INDEX_op_end;
+ /* we don't forget to fill the last values */
+ if (search_pc) {
+ j = gen_opc_ptr - gen_opc_buf;
+ lj++;
+ while (lj <= j)
+ gen_opc_instr_start[lj++] = 0;
+ }
+
+#ifdef DEBUG_DISAS
+ if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+ int disas_flags;
+ qemu_log("----------------\n");
+ qemu_log("IN: %s\n", lookup_symbol(pc_start));
+#ifdef TARGET_X86_64
+ if (dc->code64)
+ disas_flags = 2;
+ else
+#endif
+ disas_flags = !dc->code32;
+ log_target_disas(pc_start, pc_ptr - pc_start, disas_flags);
+ qemu_log("\n");
+ }
+#endif
+
+ if (!search_pc) {
+ tb->size = pc_ptr - pc_start;
+ tb->icount = num_insns;
+ }
+}
+
+void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
+{
+ gen_intermediate_code_internal(env, tb, 0);
+}
+
+void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
+{
+ gen_intermediate_code_internal(env, tb, 1);
+}
+
+void gen_pc_load(CPUState *env, TranslationBlock *tb,
+ uintptr_t searched_pc, int pc_pos, void *puc)
+{
+ int cc_op;
+#ifdef DEBUG_DISAS
+ if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
+ int i;
+ qemu_log("RESTORE:\n");
+ for(i = 0;i <= pc_pos; i++) {
+ if (gen_opc_instr_start[i]) {
+ qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
+ }
+ }
+ qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
+ searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+ (uint32_t)tb->cs_base);
+ }
+#endif
+ env->eip = gen_opc_pc[pc_pos] - tb->cs_base;
+ cc_op = gen_opc_cc_op[pc_pos];
+ if (cc_op != CC_OP_DYNAMIC)
+ env->cc_op = cc_op;
+}
diff --git a/src/recompiler/targphys.h b/src/recompiler/targphys.h
new file mode 100644
index 00000000..95648d68
--- /dev/null
+++ b/src/recompiler/targphys.h
@@ -0,0 +1,21 @@
+/* Define target_phys_addr_t if it exists. */
+
+#ifndef TARGPHYS_H
+#define TARGPHYS_H
+
+#ifdef TARGET_PHYS_ADDR_BITS
+/* target_phys_addr_t is the type of a physical address (its size can
+ be different from 'target_ulong'). */
+
+#if TARGET_PHYS_ADDR_BITS == 32
+typedef uint32_t target_phys_addr_t;
+#define TARGET_PHYS_ADDR_MAX UINT32_MAX
+#define TARGET_FMT_plx "%08x"
+#elif TARGET_PHYS_ADDR_BITS == 64
+typedef uint64_t target_phys_addr_t;
+#define TARGET_PHYS_ADDR_MAX UINT64_MAX
+#define TARGET_FMT_plx "%016" PRIx64
+#endif
+#endif
+
+#endif
diff --git a/src/recompiler/tcg-runtime.c b/src/recompiler/tcg-runtime.c
new file mode 100644
index 00000000..ab2df5f0
--- /dev/null
+++ b/src/recompiler/tcg-runtime.c
@@ -0,0 +1,89 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef VBOX
+#include <stdint.h>
+#else
+# include <VBox/types.h>
+#endif
+
+#include "tcg/tcg-runtime.h"
+
+/* 32-bit helpers */
+
+int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2)
+{
+ return arg1 / arg2;
+}
+
+int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2)
+{
+ return arg1 % arg2;
+}
+
+uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2)
+{
+ return arg1 / arg2;
+}
+
+uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2)
+{
+ return arg1 % arg2;
+}
+
+/* 64-bit helpers */
+
+int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2)
+{
+ return arg1 << arg2;
+}
+
+int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2)
+{
+ return (uint64_t)arg1 >> arg2;
+}
+
+int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2)
+{
+ return arg1 >> arg2;
+}
+
+int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2)
+{
+ return arg1 / arg2;
+}
+
+int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2)
+{
+ return arg1 % arg2;
+}
+
+uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2)
+{
+ return arg1 / arg2;
+}
+
+uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2)
+{
+ return arg1 % arg2;
+}
diff --git a/src/recompiler/tcg/LICENSE b/src/recompiler/tcg/LICENSE
new file mode 100644
index 00000000..be817fa1
--- /dev/null
+++ b/src/recompiler/tcg/LICENSE
@@ -0,0 +1,3 @@
+All the files in this directory and subdirectories are released under
+a BSD like license (see header in each file). No other license is
+accepted.
diff --git a/src/recompiler/tcg/Makefile.kup b/src/recompiler/tcg/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/recompiler/tcg/Makefile.kup
diff --git a/src/recompiler/tcg/README b/src/recompiler/tcg/README
new file mode 100644
index 00000000..8d6fe059
--- /dev/null
+++ b/src/recompiler/tcg/README
@@ -0,0 +1,497 @@
+Tiny Code Generator - Fabrice Bellard.
+
+1) Introduction
+
+TCG (Tiny Code Generator) began as a generic backend for a C
+compiler. It was simplified to be used in QEMU. It also has its roots
+in the QOP code generator written by Paul Brook.
+
+2) Definitions
+
+The TCG "target" is the architecture for which we generate the
+code. It is of course not the same as the "target" of QEMU which is
+the emulated architecture. As TCG started as a generic C backend used
+for cross compiling, it is assumed that the TCG target is different
+from the host, although it is never the case for QEMU.
+
+A TCG "function" corresponds to a QEMU Translated Block (TB).
+
+A TCG "temporary" is a variable only live in a basic
+block. Temporaries are allocated explicitly in each function.
+
+A TCG "local temporary" is a variable only live in a function. Local
+temporaries are allocated explicitly in each function.
+
+A TCG "global" is a variable which is live in all the functions
+(equivalent of a C global variable). They are defined before the
+functions defined. A TCG global can be a memory location (e.g. a QEMU
+CPU register), a fixed host register (e.g. the QEMU CPU state pointer)
+or a memory location which is stored in a register outside QEMU TBs
+(not implemented yet).
+
+A TCG "basic block" corresponds to a list of instructions terminated
+by a branch instruction.
+
+3) Intermediate representation
+
+3.1) Introduction
+
+TCG instructions operate on variables which are temporaries, local
+temporaries or globals. TCG instructions and variables are strongly
+typed. Two types are supported: 32 bit integers and 64 bit
+integers. Pointers are defined as an alias to 32 bit or 64 bit
+integers depending on the TCG target word size.
+
+Each instruction has a fixed number of output variable operands, input
+variable operands and always constant operands.
+
+The notable exception is the call instruction which has a variable
+number of outputs and inputs.
+
+In the textual form, output operands usually come first, followed by
+input operands, followed by constant operands. The output type is
+included in the instruction name. Constants are prefixed with a '$'.
+
+add_i32 t0, t1, t2 (t0 <- t1 + t2)
+
+3.2) Assumptions
+
+* Basic blocks
+
+- Basic blocks end after branches (e.g. brcond_i32 instruction),
+ goto_tb and exit_tb instructions.
+- Basic blocks start after the end of a previous basic block, or at a
+ set_label instruction.
+
+After the end of a basic block, the content of temporaries is
+destroyed, but local temporaries and globals are preserved.
+
+* Floating point types are not supported yet
+
+* Pointers: depending on the TCG target, pointer size is 32 bit or 64
+ bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or
+ TCG_TYPE_I64.
+
+* Helpers:
+
+Using the tcg_gen_helper_x_y it is possible to call any function
+taking i32, i64 or pointer types. By default, before calling an helper,
+all globals are stored at their canonical location and it is assumed
+that the function can modify them. This can be overriden by the
+TCG_CALL_CONST function modifier. By default, the helper is allowed to
+modify the CPU state or raise an exception. This can be overriden by
+the TCG_CALL_PURE function modifier, in which case the call to the
+function is removed if the return value is not used.
+
+On some TCG targets (e.g. x86), several calling conventions are
+supported.
+
+* Branches:
+
+Use the instruction 'br' to jump to a label. Use 'jmp' to jump to an
+explicit address. Conditional branches can only jump to labels.
+
+3.3) Code Optimizations
+
+When generating instructions, you can count on at least the following
+optimizations:
+
+- Single instructions are simplified, e.g.
+
+ and_i32 t0, t0, $0xffffffff
+
+ is suppressed.
+
+- A liveness analysis is done at the basic block level. The
+ information is used to suppress moves from a dead variable to
+ another one. It is also used to remove instructions which compute
+ dead results. The later is especially useful for condition code
+ optimization in QEMU.
+
+ In the following example:
+
+ add_i32 t0, t1, t2
+ add_i32 t0, t0, $1
+ mov_i32 t0, $1
+
+ only the last instruction is kept.
+
+3.4) Instruction Reference
+
+********* Function call
+
+* call <ret> <params> ptr
+
+call function 'ptr' (pointer type)
+
+<ret> optional 32 bit or 64 bit return value
+<params> optional 32 bit or 64 bit parameters
+
+********* Jumps/Labels
+
+* jmp t0
+
+Absolute jump to address t0 (pointer type).
+
+* set_label $label
+
+Define label 'label' at the current program point.
+
+* br $label
+
+Jump to label.
+
+* brcond_i32/i64 cond, t0, t1, label
+
+Conditional jump if t0 cond t1 is true. cond can be:
+ TCG_COND_EQ
+ TCG_COND_NE
+ TCG_COND_LT /* signed */
+ TCG_COND_GE /* signed */
+ TCG_COND_LE /* signed */
+ TCG_COND_GT /* signed */
+ TCG_COND_LTU /* unsigned */
+ TCG_COND_GEU /* unsigned */
+ TCG_COND_LEU /* unsigned */
+ TCG_COND_GTU /* unsigned */
+
+********* Arithmetic
+
+* add_i32/i64 t0, t1, t2
+
+t0=t1+t2
+
+* sub_i32/i64 t0, t1, t2
+
+t0=t1-t2
+
+* neg_i32/i64 t0, t1
+
+t0=-t1 (two's complement)
+
+* mul_i32/i64 t0, t1, t2
+
+t0=t1*t2
+
+* div_i32/i64 t0, t1, t2
+
+t0=t1/t2 (signed). Undefined behavior if division by zero or overflow.
+
+* divu_i32/i64 t0, t1, t2
+
+t0=t1/t2 (unsigned). Undefined behavior if division by zero.
+
+* rem_i32/i64 t0, t1, t2
+
+t0=t1%t2 (signed). Undefined behavior if division by zero or overflow.
+
+* remu_i32/i64 t0, t1, t2
+
+t0=t1%t2 (unsigned). Undefined behavior if division by zero.
+
+********* Logical
+
+* and_i32/i64 t0, t1, t2
+
+t0=t1&t2
+
+* or_i32/i64 t0, t1, t2
+
+t0=t1|t2
+
+* xor_i32/i64 t0, t1, t2
+
+t0=t1^t2
+
+* not_i32/i64 t0, t1
+
+t0=~t1
+
+* andc_i32/i64 t0, t1, t2
+
+t0=t1&~t2
+
+* eqv_i32/i64 t0, t1, t2
+
+t0=~(t1^t2), or equivalently, t0=t1^~t2
+
+* nand_i32/i64 t0, t1, t2
+
+t0=~(t1&t2)
+
+* nor_i32/i64 t0, t1, t2
+
+t0=~(t1|t2)
+
+* orc_i32/i64 t0, t1, t2
+
+t0=t1|~t2
+
+********* Shifts/Rotates
+
+* shl_i32/i64 t0, t1, t2
+
+t0=t1 << t2. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* shr_i32/i64 t0, t1, t2
+
+t0=t1 >> t2 (unsigned). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* sar_i32/i64 t0, t1, t2
+
+t0=t1 >> t2 (signed). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* rotl_i32/i64 t0, t1, t2
+
+Rotation of t2 bits to the left. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* rotr_i32/i64 t0, t1, t2
+
+Rotation of t2 bits to the right. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+********* Misc
+
+* mov_i32/i64 t0, t1
+
+t0 = t1
+
+Move t1 to t0 (both operands must have the same type).
+
+* ext8s_i32/i64 t0, t1
+ext8u_i32/i64 t0, t1
+ext16s_i32/i64 t0, t1
+ext16u_i32/i64 t0, t1
+ext32s_i64 t0, t1
+ext32u_i64 t0, t1
+
+8, 16 or 32 bit sign/zero extension (both operands must have the same type)
+
+* bswap16_i32/i64 t0, t1
+
+16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order
+bytes are set to zero.
+
+* bswap32_i32/i64 t0, t1
+
+32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that
+the four high order bytes are set to zero.
+
+* bswap64_i64 t0, t1
+
+64 bit byte swap
+
+* discard_i32/i64 t0
+
+Indicate that the value of t0 won't be used later. It is useful to
+force dead code elimination.
+
+********* Conditional moves
+
+* setcond_i32/i64 cond, dest, t1, t2
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+********* Type conversions
+
+* ext_i32_i64 t0, t1
+Convert t1 (32 bit) to t0 (64 bit) and does sign extension
+
+* extu_i32_i64 t0, t1
+Convert t1 (32 bit) to t0 (64 bit) and does zero extension
+
+* trunc_i64_i32 t0, t1
+Truncate t1 (64 bit) to t0 (32 bit)
+
+* concat_i32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
+from t2 (32 bit).
+
+* concat32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half
+from t2 (64 bit).
+
+********* Load/Store
+
+* ld_i32/i64 t0, t1, offset
+ld8s_i32/i64 t0, t1, offset
+ld8u_i32/i64 t0, t1, offset
+ld16s_i32/i64 t0, t1, offset
+ld16u_i32/i64 t0, t1, offset
+ld32s_i64 t0, t1, offset
+ld32u_i64 t0, t1, offset
+
+t0 = read(t1 + offset)
+Load 8, 16, 32 or 64 bits with or without sign extension from host memory.
+offset must be a constant.
+
+* st_i32/i64 t0, t1, offset
+st8_i32/i64 t0, t1, offset
+st16_i32/i64 t0, t1, offset
+st32_i64 t0, t1, offset
+
+write(t0, t1 + offset)
+Write 8, 16, 32 or 64 bits to host memory.
+
+********* 64-bit target on 32-bit host support
+
+The following opcodes are internal to TCG. Thus they are to be implemented by
+32-bit host code generators, but are not to be emitted by guest translators.
+They are emitted as needed by inline functions within "tcg-op.h".
+
+* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
+* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the 64-bit inputs T1 and T2 are
+formed from two 32-bit arguments, and the 64-bit output T0
+is returned in two 32-bit outputs.
+
+* mulu2_i32 t0_low, t0_high, t1, t2
+
+Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
+the full 64-bit product T0. The later is returned in two 32-bit outputs.
+
+* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments. The result is a 32-bit value.
+
+********* QEMU specific operations
+
+* tb_exit t0
+
+Exit the current TB and return the value t0 (word type).
+
+* goto_tb index
+
+Exit the current TB and jump to the TB index 'index' (constant) if the
+current TB was linked to this TB. Otherwise execute the next
+instructions.
+
+* qemu_ld8u t0, t1, flags
+qemu_ld8s t0, t1, flags
+qemu_ld16u t0, t1, flags
+qemu_ld16s t0, t1, flags
+qemu_ld32 t0, t1, flags
+qemu_ld32u t0, t1, flags
+qemu_ld32s t0, t1, flags
+qemu_ld64 t0, t1, flags
+
+Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address
+type. 'flags' contains the QEMU memory index (selects user or kernel access)
+for example.
+
+Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and
+"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits.
+
+* qemu_st8 t0, t1, flags
+qemu_st16 t0, t1, flags
+qemu_st32 t0, t1, flags
+qemu_st64 t0, t1, flags
+
+Store the data t0 at the QEMU CPU Address t1. t1 has the QEMU CPU
+address type. 'flags' contains the QEMU memory index (selects user or
+kernel access) for example.
+
+Note 1: Some shortcuts are defined when the last operand is known to be
+a constant (e.g. addi for add, movi for mov).
+
+Note 2: When using TCG, the opcodes must never be generated directly
+as some of them may not be available as "real" opcodes. Always use the
+function tcg_gen_xxx(args).
+
+4) Backend
+
+tcg-target.h contains the target specific definitions. tcg-target.c
+contains the target specific code.
+
+4.1) Assumptions
+
+The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
+64 bit. It is expected that the pointer has the same size as the word.
+
+On a 32 bit target, all 64 bit operations are converted to 32 bits. A
+few specific operations must be implemented to allow it (see add2_i32,
+sub2_i32, brcond2_i32).
+
+Floating point operations are not supported in this version. A
+previous incarnation of the code generator had full support of them,
+but it is better to concentrate on integer operations first.
+
+On a 64 bit target, no assumption is made in TCG about the storage of
+the 32 bit values in 64 bit registers.
+
+4.2) Constraints
+
+GCC like constraints are used to define the constraints of every
+instruction. Memory constraints are not supported in this
+version. Aliases are specified in the input operands as for GCC.
+
+The same register may be used for both an input and an output, even when
+they are not explicitly aliased. If an op expands to multiple target
+instructions then care must be taken to avoid clobbering input values.
+GCC style "early clobber" outputs are not currently supported.
+
+A target can define specific register or constant constraints. If an
+operation uses a constant input constraint which does not allow all
+constants, it must also accept registers in order to have a fallback.
+
+The movi_i32 and movi_i64 operations must accept any constants.
+
+The mov_i32 and mov_i64 operations must accept any registers of the
+same type.
+
+The ld/st instructions must accept signed 32 bit constant offsets. It
+can be implemented by reserving a specific register to compute the
+address if the offset is too big.
+
+The ld/st instructions must accept any destination (ld) or source (st)
+register.
+
+4.3) Function call assumptions
+
+- The only supported types for parameters and return value are: 32 and
+ 64 bit integers and pointer.
+- The stack grows downwards.
+- The first N parameters are passed in registers.
+- The next parameters are passed on the stack by storing them as words.
+- Some registers are clobbered during the call.
+- The function can return 0 or 1 value in registers. On a 32 bit
+ target, functions must be able to return 2 values in registers for
+ 64 bit return type.
+
+5) Recommended coding rules for best performance
+
+- Use globals to represent the parts of the QEMU CPU state which are
+ often modified, e.g. the integer registers and the condition
+ codes. TCG will be able to use host registers to store them.
+
+- Avoid globals stored in fixed registers. They must be used only to
+ store the pointer to the CPU state and possibly to store a pointer
+ to a register window.
+
+- Use temporaries. Use local temporaries only when really needed,
+ e.g. when you need to use a value after a jump. Local temporaries
+ introduce a performance hit in the current TCG implementation: their
+ content is saved to memory at end of each basic block.
+
+- Free temporaries and local temporaries when they are no longer used
+ (tcg_temp_free). Since tcg_const_x() also creates a temporary, you
+ should free it after it is used. Freeing temporaries does not yield
+ a better generated code, but it reduces the memory usage of TCG and
+ the speed of the translation.
+
+- Don't hesitate to use helpers for complicated or seldom used target
+ intructions. There is little performance advantage in using TCG to
+ implement target instructions taking more than about twenty TCG
+ instructions.
+
+- Use the 'discard' instruction if you know that TCG won't be able to
+ prove that a given global is "dead" at a given program point. The
+ x86 target uses it to improve the condition codes optimisation.
diff --git a/src/recompiler/tcg/TODO b/src/recompiler/tcg/TODO
new file mode 100644
index 00000000..2bc2785d
--- /dev/null
+++ b/src/recompiler/tcg/TODO
@@ -0,0 +1,14 @@
+- Add new instructions such as: clz, ctz, popcnt.
+
+- See if it is worth exporting mul2, mulu2, div2, divu2.
+
+- Support of globals saved in fixed registers between TBs.
+
+Ideas:
+
+- Move the slow part of the qemu_ld/st ops after the end of the TB.
+
+- Change exception syntax to get closer to QOP system (exception
+ parameters given with a specific instruction).
+
+- Add float and vector support.
diff --git a/src/recompiler/tcg/i386/Makefile.kup b/src/recompiler/tcg/i386/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/recompiler/tcg/i386/Makefile.kup
diff --git a/src/recompiler/tcg/i386/tcg-target.c b/src/recompiler/tcg/i386/tcg-target.c
new file mode 100644
index 00000000..0943d54e
--- /dev/null
+++ b/src/recompiler/tcg/i386/tcg-target.c
@@ -0,0 +1,2231 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+#if TCG_TARGET_REG_BITS == 64
+ "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+#else
+ "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+#endif
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_RBP,
+ TCG_REG_RBX,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R10,
+ TCG_REG_R11,
+# if !defined(VBOX) || !defined(__MINGW64__)
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_RCX,
+ TCG_REG_RDX,
+# endif
+ TCG_REG_RSI,
+ TCG_REG_RDI,
+# if defined(VBOX) && defined(__MINGW64__)
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_RDX,
+ TCG_REG_RCX,
+# endif
+ TCG_REG_RAX,
+#else
+ TCG_REG_EBX,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+ TCG_REG_EBP,
+ TCG_REG_ECX,
+ TCG_REG_EDX,
+ TCG_REG_EAX,
+#endif
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+#if TCG_TARGET_REG_BITS == 64
+# if defined(VBOX) && defined(__MINGW64__)
+ TCG_REG_RCX,
+ TCG_REG_RDX,
+# else
+ TCG_REG_RDI,
+ TCG_REG_RSI,
+ TCG_REG_RDX,
+ TCG_REG_RCX,
+# endif
+ TCG_REG_R8,
+ TCG_REG_R9,
+#else
+ TCG_REG_EAX,
+ TCG_REG_EDX,
+ TCG_REG_ECX
+#endif
+};
+
+static const int tcg_target_call_oarg_regs[2] = {
+ TCG_REG_EAX,
+ TCG_REG_EDX
+};
+
+static uint8_t *tb_ret_addr;
+
+static void patch_reloc(uint8_t *code_ptr, int type,
+ tcg_target_long value, tcg_target_long addend)
+{
+ value += addend;
+ switch(type) {
+ case R_386_PC32:
+ value -= (uintptr_t)code_ptr;
+ if (value != (int32_t)value) {
+ tcg_abort();
+ }
+ *(uint32_t *)code_ptr = value;
+ break;
+ case R_386_PC8:
+ value -= (uintptr_t)code_ptr;
+ if (value != (int8_t)value) {
+ tcg_abort();
+ }
+ *(uint8_t *)code_ptr = value;
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+#ifdef VBOX
+/* emits stack alignment checks for strict builds. */
+DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s)
+{
+# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */
+ tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */
+ tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1);
+ tcg_out8(s, 0x74); /* jz imm8 */
+ tcg_out8(s, 1); /* $+3 (over int3) */
+ tcg_out8(s, 0xcc); /* int3 */
+# else
+ NOREF(s);
+# endif
+}
+#endif /* VBOX */
+
+/* maximum number of register used for input function arguments */
+static inline int tcg_target_get_call_iarg_regs_count(int flags)
+{
+ if (TCG_TARGET_REG_BITS == 64) {
+ return 6;
+ }
+
+ flags &= TCG_CALL_TYPE_MASK;
+ switch(flags) {
+ case TCG_CALL_TYPE_STD:
+ return 0;
+ case TCG_CALL_TYPE_REGPARM_1:
+ case TCG_CALL_TYPE_REGPARM_2:
+ case TCG_CALL_TYPE_REGPARM:
+ return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
+ default:
+ tcg_abort();
+ }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch(ct_str[0]) {
+ case 'a':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
+ break;
+ case 'b':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
+ break;
+ case 'c':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
+ break;
+ case 'd':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
+ break;
+ case 'S':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
+ break;
+ case 'D':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
+ break;
+ case 'q':
+ ct->ct |= TCG_CT_REG;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ } else {
+ tcg_regset_set32(ct->u.regs, 0, 0xf);
+ }
+ break;
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ } else {
+ tcg_regset_set32(ct->u.regs, 0, 0xff);
+ }
+ break;
+
+ /* qemu_ld/st address constraint */
+ case 'L':
+ ct->ct |= TCG_CT_REG;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+#if defined(VBOX) && defined(__MINGW64__)
+ tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+ tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+ tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
+#else
+ /** @todo figure why RDX isn't mentioned here. */
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
+#endif
+ } else {
+ tcg_regset_set32(ct->u.regs, 0, 0xff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
+ }
+ break;
+
+ case 'e':
+ ct->ct |= TCG_CT_CONST_S32;
+ break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_U32;
+ break;
+
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+ return 1;
+ }
+ return 0;
+}
+
+#if TCG_TARGET_REG_BITS == 64
+# define LOWREGMASK(x) ((x) & 7)
+#else
+# define LOWREGMASK(x) (x)
+#endif
+
+#define P_EXT 0x100 /* 0x0f opcode prefix */
+#define P_DATA16 0x200 /* 0x66 opcode prefix */
+#if TCG_TARGET_REG_BITS == 64
+# define P_ADDR32 0x400 /* 0x67 opcode prefix */
+# define P_REXW 0x800 /* Set REX.W = 1 */
+# define P_REXB_R 0x1000 /* REG field as byte register */
+# define P_REXB_RM 0x2000 /* R/M field as byte register */
+#else
+# define P_ADDR32 0
+# define P_REXW 0
+# define P_REXB_R 0
+# define P_REXB_RM 0
+#endif
+
+#define OPC_ARITH_EvIz (0x81)
+#define OPC_ARITH_EvIb (0x83)
+#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
+#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BSWAP (0xc8 | P_EXT)
+#define OPC_CALL_Jz (0xe8)
+#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
+#define OPC_DEC_r32 (0x48)
+#define OPC_IMUL_GvEv (0xaf | P_EXT)
+#define OPC_IMUL_GvEvIb (0x6b)
+#define OPC_IMUL_GvEvIz (0x69)
+#define OPC_INC_r32 (0x40)
+#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
+#define OPC_JCC_short (0x70) /* ... plus condition code */
+#define OPC_JMP_long (0xe9)
+#define OPC_JMP_short (0xeb)
+#define OPC_LEA (0x8d)
+#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
+#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
+#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
+#define OPC_MOVL_EvIz (0xc7)
+#define OPC_MOVL_Iv (0xb8)
+#define OPC_MOVSBL (0xbe | P_EXT)
+#define OPC_MOVSWL (0xbf | P_EXT)
+#define OPC_MOVSLQ (0x63 | P_REXW)
+#define OPC_MOVZBL (0xb6 | P_EXT)
+#define OPC_MOVZWL (0xb7 | P_EXT)
+#define OPC_POP_r32 (0x58)
+#define OPC_PUSH_r32 (0x50)
+#define OPC_PUSH_Iv (0x68)
+#define OPC_PUSH_Ib (0x6a)
+#define OPC_RET (0xc3)
+#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
+#define OPC_SHIFT_1 (0xd1)
+#define OPC_SHIFT_Ib (0xc1)
+#define OPC_SHIFT_cl (0xd3)
+#define OPC_TESTL (0x85)
+#define OPC_XCHG_ax_r32 (0x90)
+
+#define OPC_GRP3_Ev (0xf7)
+#define OPC_GRP5 (0xff)
+
+/* Group 1 opcode extensions for 0x80-0x83.
+ These are also used as modifiers for OPC_ARITH. */
+#define ARITH_ADD 0
+#define ARITH_OR 1
+#define ARITH_ADC 2
+#define ARITH_SBB 3
+#define ARITH_AND 4
+#define ARITH_SUB 5
+#define ARITH_XOR 6
+#define ARITH_CMP 7
+
+/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
+#define SHIFT_ROL 0
+#define SHIFT_ROR 1
+#define SHIFT_SHL 4
+#define SHIFT_SHR 5
+#define SHIFT_SAR 7
+
+/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
+#define EXT3_NOT 2
+#define EXT3_NEG 3
+#define EXT3_MUL 4
+#define EXT3_IMUL 5
+#define EXT3_DIV 6
+#define EXT3_IDIV 7
+
+/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
+#define EXT5_INC_Ev 0
+#define EXT5_DEC_Ev 1
+#define EXT5_CALLN_Ev 2
+#define EXT5_JMPN_Ev 4
+
+/* Condition codes to be added to OPC_JCC_{long,short}. */
+#define JCC_JMP (-1)
+#define JCC_JO 0x0
+#define JCC_JNO 0x1
+#define JCC_JB 0x2
+#define JCC_JAE 0x3
+#define JCC_JE 0x4
+#define JCC_JNE 0x5
+#define JCC_JBE 0x6
+#define JCC_JA 0x7
+#define JCC_JS 0x8
+#define JCC_JNS 0x9
+#define JCC_JP 0xa
+#define JCC_JNP 0xb
+#define JCC_JL 0xc
+#define JCC_JGE 0xd
+#define JCC_JLE 0xe
+#define JCC_JG 0xf
+
+static const uint8_t tcg_cond_to_jcc[10] = {
+ [TCG_COND_EQ] = JCC_JE,
+ [TCG_COND_NE] = JCC_JNE,
+ [TCG_COND_LT] = JCC_JL,
+ [TCG_COND_GE] = JCC_JGE,
+ [TCG_COND_LE] = JCC_JLE,
+ [TCG_COND_GT] = JCC_JG,
+ [TCG_COND_LTU] = JCC_JB,
+ [TCG_COND_GEU] = JCC_JAE,
+ [TCG_COND_LEU] = JCC_JBE,
+ [TCG_COND_GTU] = JCC_JA,
+};
+
+#if defined(VBOX)
+/* Calc the size of the tcg_out_opc() result. */
+static inline unsigned char tcg_calc_opc_len(TCGContext *s, int opc, int r, int rm, int x)
+{
+ unsigned char len = 1;
+# if TCG_TARGET_REG_BITS == 64
+ unsigned rex;
+ rex = 0;
+ rex |= (opc & P_REXW) >> 8; /* REX.W */
+ rex |= (r & 8) >> 1; /* REX.R */
+ rex |= (x & 8) >> 2; /* REX.X */
+ rex |= (rm & 8) >> 3; /* REX.B */
+ rex |= opc & (r >= 4 ? P_REXB_R : 0);
+ rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
+ if (rex) len++;
+ if (opc & P_ADDR32) len++;
+# endif
+ if (opc & P_DATA16) len++;
+ if (opc & P_EXT) len++;
+
+ return len;
+}
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
+{
+ int rex;
+
+ if (opc & P_DATA16) {
+ /* We should never be asking for both 16 and 64-bit operation. */
+ assert((opc & P_REXW) == 0);
+ tcg_out8(s, 0x66);
+ }
+ if (opc & P_ADDR32) {
+ tcg_out8(s, 0x67);
+ }
+
+ rex = 0;
+ rex |= (opc & P_REXW) >> 8; /* REX.W */
+ rex |= (r & 8) >> 1; /* REX.R */
+ rex |= (x & 8) >> 2; /* REX.X */
+ rex |= (rm & 8) >> 3; /* REX.B */
+
+ /* P_REXB_{R,RM} indicates that the given register is the low byte.
+ For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
+ as otherwise the encoding indicates %[abcd]h. Note that the values
+ that are ORed in merely indicate that the REX byte must be present;
+ those bits get discarded in output. */
+ rex |= opc & (r >= 4 ? P_REXB_R : 0);
+ rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
+
+ if (rex) {
+ tcg_out8(s, (uint8_t)(rex | 0x40));
+ }
+
+ if (opc & P_EXT) {
+ tcg_out8(s, 0x0f);
+ }
+ tcg_out8(s, opc);
+}
+#else
+static void tcg_out_opc(TCGContext *s, int opc)
+{
+ if (opc & P_DATA16) {
+ tcg_out8(s, 0x66);
+ }
+ if (opc & P_EXT) {
+ tcg_out8(s, 0x0f);
+ }
+ tcg_out8(s, opc);
+}
+/* Discard the register arguments to tcg_out_opc early, so as not to penalize
+ the 32-bit compilation paths. This method works with all versions of gcc,
+ whereas relying on optimization may not be able to exclude them. */
+#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
+#endif
+
+static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
+{
+ tcg_out_opc(s, opc, r, rm, 0);
+ tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
+/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
+ We handle either RM and INDEX missing with a negative value. In 64-bit
+ mode for absolute addresses, ~RM is the size of the immediate operand
+ that will follow the instruction. */
+
+static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
+ int index, int shift,
+ tcg_target_long offset)
+{
+ int mod, len;
+
+ if (index < 0 && rm < 0) {
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* Try for a rip-relative addressing mode. This has replaced
+ the 32-bit-mode absolute addressing encoding. */
+#ifdef VBOX
+ tcg_target_long pc = (tcg_target_long)s->code_ptr
+ + tcg_calc_opc_len(s, opc, r, 0, 0) + 1 + 4;
+#else
+ tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
+#endif
+ tcg_target_long disp = offset - pc;
+ if (disp == (int32_t)disp) {
+ tcg_out_opc(s, opc, r, 0, 0);
+ tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
+ tcg_out32(s, disp);
+#ifdef VBOX
+ Assert(pc == (tcg_target_long)s->code_ptr);
+#endif
+ return;
+ }
+
+ /* Try for an absolute address encoding. This requires the
+ use of the MODRM+SIB encoding and is therefore larger than
+ rip-relative addressing. */
+ if (offset == (int32_t)offset) {
+ tcg_out_opc(s, opc, r, 0, 0);
+ tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
+ tcg_out8(s, (4 << 3) | 5);
+ tcg_out32(s, offset);
+ return;
+ }
+
+ /* ??? The memory isn't directly addressable. */
+ tcg_abort();
+ } else {
+ /* Absolute address. */
+ tcg_out_opc(s, opc, r, 0, 0);
+ tcg_out8(s, (r << 3) | 5);
+ tcg_out32(s, offset);
+ return;
+ }
+ }
+
+ /* Find the length of the immediate addend. Note that the encoding
+ that would be used for (%ebp) indicates absolute addressing. */
+ if (rm < 0) {
+ mod = 0, len = 4, rm = 5;
+ } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
+ mod = 0, len = 0;
+ } else if (offset == (int8_t)offset) {
+ mod = 0x40, len = 1;
+ } else {
+ mod = 0x80, len = 4;
+ }
+
+ /* Use a single byte MODRM format if possible. Note that the encoding
+ that would be used for %esp is the escape to the two byte form. */
+ if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
+ /* Single byte MODRM format. */
+ tcg_out_opc(s, opc, r, rm, 0);
+ tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+ } else {
+ /* Two byte MODRM+SIB format. */
+
+ /* Note that the encoding that would place %esp into the index
+ field indicates no index register. In 64-bit mode, the REX.X
+ bit counts, so %r12 can be used as the index. */
+ if (index < 0) {
+ index = 4;
+ } else {
+ assert(index != TCG_REG_ESP);
+ }
+
+ tcg_out_opc(s, opc, r, rm, index);
+ tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
+ tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
+ }
+
+ if (len == 1) {
+ tcg_out8(s, offset);
+ } else if (len == 4) {
+ tcg_out32(s, offset);
+ }
+}
+
+/* A simplification of the above with no index or shift. */
+static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
+ int rm, tcg_target_long offset)
+{
+ tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
+}
+
+/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
+static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
+{
+ /* Propagate an opcode prefix, such as P_REXW. */
+ int ext = subop & ~0x7;
+ subop &= 0x7;
+
+ tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
+{
+ if (arg != ret) {
+ int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+ tcg_out_modrm(s, opc, ret, arg);
+ }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ int ret, tcg_target_long arg)
+{
+ if (arg == 0) {
+ tgen_arithr(s, ARITH_XOR, ret, ret);
+ return;
+ } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
+ tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
+ tcg_out32(s, arg);
+ } else if (arg == (int32_t)arg) {
+ tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
+ tcg_out32(s, arg);
+ } else {
+ tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
+ tcg_out32(s, arg);
+ tcg_out32(s, arg >> 31 >> 1);
+ }
+}
+
+static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
+{
+ if (val == (int8_t)val) {
+ tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
+ tcg_out8(s, val);
+ } else if (val == (int32_t)val) {
+ tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
+ tcg_out32(s, val);
+ } else {
+ tcg_abort();
+ }
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
+ int arg1, tcg_target_long arg2)
+{
+ int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+ tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
+ int arg1, tcg_target_long arg2)
+{
+ int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+ tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
+}
+
+static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
+{
+ /* Propagate an opcode prefix, such as P_DATA16. */
+ int ext = subopc & ~0x7;
+ subopc &= 0x7;
+
+ if (count == 1) {
+ tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
+ } else {
+ tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
+ tcg_out8(s, count);
+ }
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_rolw_8(TCGContext *s, int reg)
+{
+ tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
+}
+
+static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
+{
+ /* movzbl */
+ assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+ tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
+}
+
+static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
+{
+ /* movsbl */
+ assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+ tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
+{
+ /* movzwl */
+ tcg_out_modrm(s, OPC_MOVZWL, dest, src);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
+{
+ /* movsw[lq] */
+ tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
+}
+
+static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
+{
+ /* 32-bit mov zero extends. */
+ tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
+}
+
+static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
+{
+ tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
+}
+
+static inline void tcg_out_bswap64(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static void tgen_arithi(TCGContext *s, int c, int r0,
+ tcg_target_long val, int cf)
+{
+ int rexw = 0;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ rexw = c & -8;
+ c &= 7;
+ }
+
+ /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
+ partial flags update stalls on Pentium4 and are not recommended
+ by current Intel optimization manuals. */
+ if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
+ int is_inc = (c == ARITH_ADD) != (val < 0); /* VBox: cppcheck: "xor" for bools is "not-equals" */
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* The single-byte increment encodings are re-tasked as the
+ REX prefixes. Use the MODRM encoding. */
+ tcg_out_modrm(s, OPC_GRP5 + rexw,
+ (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
+ } else {
+ tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
+ }
+ return;
+ }
+
+ if (c == ARITH_AND) {
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (val == 0xffffffffu) {
+ tcg_out_ext32u(s, r0, r0);
+ return;
+ }
+ if (val == (uint32_t)val) {
+ /* AND with no high bits set can use a 32-bit operation. */
+ rexw = 0;
+ }
+ }
+ if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
+ tcg_out_ext8u(s, r0, r0);
+ return;
+ }
+ if (val == 0xffffu) {
+ tcg_out_ext16u(s, r0, r0);
+ return;
+ }
+ }
+
+ if (val == (int8_t)val) {
+ tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
+ tcg_out8(s, val);
+ return;
+ }
+ if (rexw == 0 || val == (int32_t)val) {
+ tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
+ tcg_out32(s, val);
+ return;
+ }
+
+ tcg_abort();
+}
+
+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+{
+ if (val != 0) {
+ tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
+ }
+}
+
+#if defined(VBOX) && defined(RT_OS_DARWIN) && ARCH_BITS == 32
+# define VBOX_16_BYTE_STACK_ALIGN
+#endif
+#ifdef VBOX_16_BYTE_STACK_ALIGN
+static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val)
+{
+ if (val != 0) {
+ tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0);
+ }
+}
+#endif
+
+/* Use SMALL != 0 to force a short forward branch. */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
+{
+ int32_t val, val1;
+ TCGLabel *l = &s->labels[label_index];
+
+ if (l->has_value) {
+ val = l->u.value - (tcg_target_long)s->code_ptr;
+ val1 = val - 2;
+ if ((int8_t)val1 == val1) {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_short);
+ } else {
+ tcg_out8(s, OPC_JCC_short + opc);
+ }
+ tcg_out8(s, val1);
+ } else {
+ if (small) {
+ tcg_abort();
+ }
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_long);
+ tcg_out32(s, val - 5);
+ } else {
+ tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
+ tcg_out32(s, val - 6);
+ }
+ }
+ } else if (small) {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_short);
+ } else {
+ tcg_out8(s, OPC_JCC_short + opc);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+ s->code_ptr += 1;
+ } else {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_long);
+ } else {
+ tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
+ s->code_ptr += 4;
+ }
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
+ int const_arg2, int rexw)
+{
+ if (const_arg2) {
+ if (arg2 == 0) {
+ /* test r, r */
+ tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
+ } else {
+ tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
+ }
+ } else {
+ tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
+ }
+}
+
+static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int small)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int small)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
+}
+#else
+/* XXX: we implement it at the target level to avoid having to
+ handle cross basic blocks temporaries */
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args, int small)
+{
+ int label_next;
+ label_next = gen_new_label();
+ switch(args[4]) {
+ case TCG_COND_EQ:
+ tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+ args[5], small);
+ break;
+ case TCG_COND_NE:
+ tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ args[5], small);
+ tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
+ args[5], small);
+ break;
+ case TCG_COND_LT:
+ tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ case TCG_COND_LE:
+ tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ case TCG_COND_GT:
+ tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ case TCG_COND_GE:
+ tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ case TCG_COND_LTU:
+ tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ case TCG_COND_LEU:
+ tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ case TCG_COND_GEU:
+ tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ args[5], small);
+ break;
+ default:
+ tcg_abort();
+ }
+ tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+}
+#endif
+
+static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+ tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+ tcg_out_ext8u(s, dest, dest);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+ tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+ tcg_out_ext8u(s, dest, dest);
+}
+#else
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGArg new_args[6];
+ int label_true, label_over;
+
+ memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+ if (args[0] == args[1] || args[0] == args[2]
+ || (!const_args[3] && args[0] == args[3])
+ || (!const_args[4] && args[0] == args[4])) {
+ /* When the destination overlaps with one of the argument
+ registers, don't do anything tricky. */
+ label_true = gen_new_label();
+ label_over = gen_new_label();
+
+ new_args[5] = label_true;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+ tcg_out_jxx(s, JCC_JMP, label_over, 1);
+ tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ } else {
+ /* When the destination does not overlap one of the arguments,
+ clear the destination first, jump if cond false, and emit an
+ increment in the true case. This results in smaller code. */
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+ label_over = gen_new_label();
+ new_args[4] = tcg_invert_cond(new_args[4]);
+ new_args[5] = label_over;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ }
+}
+#endif
+
+static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
+{
+#ifdef VBOX
+ tcg_target_long disp = dest - (tcg_target_long)s->code_ptr
+ - tcg_calc_opc_len(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0)
+ - 4;
+#else
+ tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
+#endif
+
+ if (disp == (int32_t)disp) {
+ tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
+ tcg_out32(s, disp);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
+ tcg_out_modrm(s, OPC_GRP5,
+ call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
+ }
+}
+
+static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
+{
+#ifdef VBOX
+ tcg_gen_stack_alignment_check(s);
+#endif
+ tcg_out_branch(s, 1, dest);
+}
+
+static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
+{
+ tcg_out_branch(s, 0, dest);
+}
+
+#if defined(CONFIG_SOFTMMU)
+
+#include "../../softmmu_defs.h"
+
+static void *qemu_ld_helpers[4] = {
+ __ldb_mmu,
+ __ldw_mmu,
+ __ldl_mmu,
+ __ldq_mmu,
+};
+
+static void *qemu_st_helpers[4] = {
+ __stb_mmu,
+ __stw_mmu,
+ __stl_mmu,
+ __stq_mmu,
+};
+
+/* Perform the TLB load and compare.
+
+ Inputs:
+ ADDRLO_IDX contains the index into ARGS of the low part of the
+ address; the high part of the address is at ADDR_LOW_IDX+1.
+
+ MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+ WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+ This should be offsetof addr_read or addr_write.
+
+ Outputs:
+ LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
+ positions of the displacements of forward jumps to the TLB miss case.
+
+ First argument register is loaded with the low part of the address.
+ In the TLB hit case, it has been adjusted as indicated by the TLB
+ and so is a host address. In the TLB miss case, it continues to
+ hold a guest address.
+
+ Second argument register is clobbered. */
+
+static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
+ int mem_index, int s_bits,
+ const TCGArg *args,
+ uint8_t **label_ptr, int which)
+{
+ const int addrlo = args[addrlo_idx];
+ const int r0 = tcg_target_call_iarg_regs[0];
+ const int r1 = tcg_target_call_iarg_regs[1];
+ TCGType type = TCG_TYPE_I32;
+ int rexw = 0;
+
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
+ type = TCG_TYPE_I64;
+ rexw = P_REXW;
+ }
+
+ tcg_out_mov(s, type, r1, addrlo);
+ tcg_out_mov(s, type, r0, addrlo);
+
+ tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ tgen_arithi(s, ARITH_AND + rexw, r0,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+ tgen_arithi(s, ARITH_AND + rexw, r1,
+ (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
+
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
+ offsetof(CPUState, tlb_table[mem_index][0])
+ + which);
+
+ /* cmp 0(r1), r0 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+
+ tcg_out_mov(s, type, r0, addrlo);
+
+ /* jne label1 */
+ tcg_out8(s, OPC_JCC_short + JCC_JNE);
+ label_ptr[0] = s->code_ptr;
+ s->code_ptr++;
+
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ /* cmp 4(r1), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
+
+ /* jne label1 */
+ tcg_out8(s, OPC_JCC_short + JCC_JNE);
+ label_ptr[1] = s->code_ptr;
+ s->code_ptr++;
+ }
+
+ /* TLB Hit. */
+
+ /* add addend(r1), r0 */
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+ offsetof(CPUTLBEntry, addend) - which);
+}
+#endif
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
+ int base, tcg_target_long ofs, int sizeop)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ const int bswap = 1;
+#else
+ const int bswap = 0;
+#endif
+ switch (sizeop) {
+ case 0:
+ tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+ break;
+ case 0 | 4:
+ tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+ break;
+ case 1:
+ tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ if (bswap) {
+ tcg_out_rolw_8(s, datalo);
+ }
+ break;
+ case 1 | 4:
+ if (bswap) {
+ tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_rolw_8(s, datalo);
+ tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+ }
+ break;
+ case 2:
+ tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ if (bswap) {
+ tcg_out_bswap32(s, datalo);
+ }
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case 2 | 4:
+ if (bswap) {
+ tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_bswap32(s, datalo);
+ tcg_out_ext32s(s, datalo, datalo);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+ }
+ break;
+#endif
+ case 3:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+ if (bswap) {
+ tcg_out_bswap64(s, datalo);
+ }
+ } else {
+ if (bswap) {
+ int t = datalo;
+ datalo = datahi;
+ datahi = t;
+ }
+ if (base != datalo) {
+ tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ }
+ if (bswap) {
+ tcg_out_bswap32(s, datalo);
+ tcg_out_bswap32(s, datahi);
+ }
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
+
+static void * const vbox_ld_helpers[] = {
+ __ldub_vbox_phys,
+ __lduw_vbox_phys,
+ __ldul_vbox_phys,
+ __ldq_vbox_phys,
+ __ldb_vbox_phys,
+ __ldw_vbox_phys,
+ __ldl_vbox_phys,
+ __ldq_vbox_phys,
+};
+
+static void * const vbox_st_helpers[] = {
+ __stb_vbox_phys,
+ __stw_vbox_phys,
+ __stl_vbox_phys,
+ __stq_vbox_phys
+};
+
+DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst)
+{
+ intptr_t disp;
+# ifdef VBOX
+ tcg_gen_stack_alignment_check(s);
+# endif
+ disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5;
+ tcg_out8(s, 0xe8); /* call disp32 */
+ tcg_out32(s, disp); /* disp32 */
+}
+
+static void tcg_out_vbox_phys_read(TCGContext *s, int index,
+ int addr_reg,
+ int data_reg, int data_reg2)
+{
+ int useReg2 = ((index & 3) == 3);
+
+ /** @todo should we make phys address accessors fastcalls - probably not a big deal */
+ /* out parameter (address), note that phys address is always 64-bit */
+ AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
+
+# if 0
+ tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
+ tcg_out_push(s, addr_reg);
+# else
+ /* mov addr_reg, %eax */
+ tcg_out_mov(s, TCG_REG_EAX, addr_reg);
+# endif
+
+ tcg_out_long_call(s, vbox_ld_helpers[index]);
+
+ /* mov %eax, data_reg */
+ tcg_out_mov(s, data_reg, TCG_REG_EAX);
+
+ /* returned 64-bit value */
+ if (useReg2)
+ tcg_out_mov(s, data_reg2, TCG_REG_EDX);
+}
+
+static void tcg_out_vbox_phys_write(TCGContext *s, int index,
+ int addr_reg,
+ int val_reg, int val_reg2) {
+ int useReg2 = ((index & 3) == 3);
+
+# if 0
+ /* out parameter (value2) */
+ if (useReg2)
+ tcg_out_push(s, val_reg2);
+ /* out parameter (value) */
+ tcg_out_push(s, val_reg);
+ /* out parameter (address), note that phys address is always 64-bit */
+ AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
+ tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
+ tcg_out_push(s, addr_reg);
+# else
+ Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX)));
+ /* mov addr_reg, %eax */
+ tcg_out_mov(s, TCG_REG_EAX, addr_reg);
+ Assert(!useReg2 || (val_reg2 != TCG_REG_EDX));
+ /* mov val_reg, %edx */
+ tcg_out_mov(s, TCG_REG_EDX, val_reg);
+ if (useReg2)
+ tcg_out_mov(s, TCG_REG_ECX, val_reg2);
+
+# endif
+ /* call it */
+ tcg_out_long_call(s, vbox_st_helpers[index]);
+
+ /* clean stack after us */
+# if 0
+ tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4));
+# endif
+}
+
+#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */
+
+/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
+ EAX. It will be useful once fixed registers globals are less
+ common. */
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
+ int opc)
+{
+ int data_reg, data_reg2 = 0;
+ int addrlo_idx;
+#if defined(CONFIG_SOFTMMU)
+ int mem_index, s_bits, arg_idx;
+ uint8_t *label_ptr[3];
+#endif
+
+ data_reg = args[0];
+ addrlo_idx = 1;
+ if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+ data_reg2 = args[1];
+ addrlo_idx = 2;
+ }
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+ s_bits = opc & 3;
+
+ tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
+ label_ptr, offsetof(CPUTLBEntry, addr_read));
+
+ /* TLB Hit. */
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
+ tcg_target_call_iarg_regs[0], 0, opc);
+
+ /* jmp label2 */
+ tcg_out8(s, OPC_JMP_short);
+ label_ptr[2] = s->code_ptr;
+ s->code_ptr++;
+
+ /* TLB Miss. */
+
+ /* label1: */
+ *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
+ }
+
+ /* XXX: move that code at the end of the TB */
+ /* The first argument is already loaded with addrlo. */
+ arg_idx = 1;
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
+ args[addrlo_idx + 1]);
+ }
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
+ mem_index);
+ tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
+
+ switch(opc) {
+ case 0 | 4:
+ tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+ case 1 | 4:
+ tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+ case 0:
+ tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
+ break;
+ case 1:
+ tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
+ break;
+ case 2:
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case 2 | 4:
+ tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+ break;
+#endif
+ case 3:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
+ } else if (data_reg == TCG_REG_EDX) {
+ /* xchg %edx, %eax */
+ tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+
+ /* label2: */
+ *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
+#else
+# if defined(VBOX) && defined(__MINGW64__)
+# error port me
+# endif
+ {
+ int32_t offset = GUEST_BASE;
+ int base = args[addrlo_idx];
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* ??? We assume all operations have left us with register
+ contents that are zero extended. So far this appears to
+ be true. If we want to enforce this, we can either do
+ an explicit zero-extension here, or (if GUEST_BASE == 0)
+ use the ADDR32 prefix. For now, do nothing. */
+
+ if (offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+ base = TCG_REG_RDI, offset = 0;
+ }
+ }
+
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+ }
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
+ int base, tcg_target_long ofs, int sizeop)
+{
+#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+#ifdef TARGET_WORDS_BIGENDIAN
+ const int bswap = 1;
+#else
+ const int bswap = 0;
+#endif
+ /* ??? Ideally we wouldn't need a scratch register. For user-only,
+ we could perform the bswap twice to restore the original value
+ instead of moving to the scratch. But as it is, the L constraint
+ means that the second argument reg is definitely free here. */
+ int scratch = tcg_target_call_iarg_regs[1];
+
+ switch (sizeop) {
+ case 0:
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+ break;
+ case 1:
+ if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+ tcg_out_rolw_8(s, scratch);
+ datalo = scratch;
+ }
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+ break;
+ case 2:
+ if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+ tcg_out_bswap32(s, scratch);
+ datalo = scratch;
+ }
+ tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+ break;
+ case 3:
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
+ tcg_out_bswap64(s, scratch);
+ datalo = scratch;
+ }
+ tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+ } else if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
+ tcg_out_bswap32(s, scratch);
+ tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+ tcg_out_bswap32(s, scratch);
+ tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+#else /* VBOX */
+# error "broken"
+ tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2);
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
+ int opc)
+{
+ int data_reg, data_reg2 = 0;
+ int addrlo_idx;
+#if defined(CONFIG_SOFTMMU)
+ int mem_index, s_bits;
+ int stack_adjust;
+ uint8_t *label_ptr[3];
+#endif
+
+ data_reg = args[0];
+ addrlo_idx = 1;
+ if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+ data_reg2 = args[1];
+ addrlo_idx = 2;
+ }
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+ s_bits = opc;
+
+ tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
+ label_ptr, offsetof(CPUTLBEntry, addr_write));
+
+ /* TLB Hit. */
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2,
+ tcg_target_call_iarg_regs[0], 0, opc);
+
+ /* jmp label2 */
+ tcg_out8(s, OPC_JMP_short);
+ label_ptr[2] = s->code_ptr;
+ s->code_ptr++;
+
+ /* TLB Miss. */
+
+ /* label1: */
+ *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
+ }
+
+# if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
+
+ /* XXX: move that code at the end of the TB */
+ if (TCG_TARGET_REG_BITS == 64) {
+# if defined(VBOX) && defined(__MINGW64__)
+ tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ tcg_target_call_iarg_regs[1], data_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+# else
+ tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ TCG_REG_RSI, data_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
+# endif
+ stack_adjust = 0;
+ } else if (TARGET_LONG_BITS == 32) {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
+ if (opc == 3) {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
+# ifdef VBOX_16_BYTE_STACK_ALIGN
+ tcg_out_subi(s, TCG_REG_ESP, 12);
+# endif
+ tcg_out_pushi(s, mem_index);
+ stack_adjust = 4;
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
+ stack_adjust = 0;
+ }
+ } else {
+ if (opc == 3) {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
+# ifdef VBOX_16_BYTE_STACK_ALIGN
+ tcg_out_pushi(s, 0);
+# endif
+ tcg_out_pushi(s, mem_index);
+ tcg_out_push(s, data_reg2);
+ tcg_out_push(s, data_reg);
+ stack_adjust = 12;
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
+ switch(opc) {
+ case 0:
+ tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
+ break;
+ case 1:
+ tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
+ break;
+ case 2:
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
+ break;
+ }
+# ifdef VBOX_16_BYTE_STACK_ALIGN
+ tcg_out_subi(s, TCG_REG_ESP, 12);
+# endif
+ tcg_out_pushi(s, mem_index);
+ stack_adjust = 4;
+ }
+ }
+
+ tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
+
+# ifdef VBOX_16_BYTE_STACK_ALIGN
+ if (stack_adjust != 0) {
+ tcg_out_addi(s, TCG_REG_ESP, RT_ALIGN(stack_adjust, 16));
+ }
+# else
+ if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
+ /* Pop and discard. This is 2 bytes smaller than the add. */
+ tcg_out_pop(s, TCG_REG_ECX);
+ } else if (stack_adjust != 0) {
+ tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
+ }
+# endif
+
+# else /* VBOX && REM_PHYS_ADDR_IN_TLB */
+# error Borked
+ tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2);
+# endif /* VBOX && REM_PHYS_ADDR_IN_TLB */
+
+ /* label2: */
+ *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
+#else
+# if defined(VBOX) && defined(__MINGW64__)
+# error port me
+# endif
+ {
+ int32_t offset = GUEST_BASE;
+ int base = args[addrlo_idx];
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* ??? We assume all operations have left us with register
+ contents that are zero extended. So far this appears to
+ be true. If we want to enforce this, we can either do
+ an explicit zero-extension here, or (if GUEST_BASE == 0)
+ use the ADDR32 prefix. For now, do nothing. */
+
+ if (offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+ base = TCG_REG_RDI, offset = 0;
+ }
+ }
+
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+ }
+#endif
+}
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg *args, const int *const_args)
+{
+ int c, rexw = 0;
+
+#if TCG_TARGET_REG_BITS == 64
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i64): \
+ rexw = P_REXW; /* FALLTHRU */ \
+ case glue(glue(INDEX_op_, x), _i32)
+#else
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i32)
+#endif
+
+ switch(opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
+ tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ /* direct jump method */
+ tcg_out8(s, OPC_JMP_long); /* jmp im */
+ s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+ tcg_out32(s, 0);
+ } else {
+ /* indirect jump method */
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
+ (tcg_target_long)(s->tb_next + args[0]));
+ }
+ s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+ break;
+ case INDEX_op_call:
+ if (const_args[0]) {
+ tcg_out_calli(s, args[0]);
+ } else {
+ /* call *reg */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
+ }
+ break;
+ case INDEX_op_jmp:
+ if (const_args[0]) {
+ tcg_out_jmp(s, args[0]);
+ } else {
+ /* jmp *reg */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
+ }
+ break;
+ case INDEX_op_br:
+ tcg_out_jxx(s, JCC_JMP, args[0], 0);
+ break;
+ case INDEX_op_movi_i32:
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ OP_32_64(ld8u):
+ /* Note that we can ignore REXW for the zero-extend to 64-bit. */
+ tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
+ break;
+ OP_32_64(ld8s):
+ tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
+ break;
+ OP_32_64(ld16u):
+ /* Note that we can ignore REXW for the zero-extend to 64-bit. */
+ tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
+ break;
+ OP_32_64(ld16s):
+ tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_ld32u_i64:
+#endif
+ case INDEX_op_ld_i32:
+ tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+
+ OP_32_64(st8):
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+ args[0], args[1], args[2]);
+ break;
+ OP_32_64(st16):
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
+ args[0], args[1], args[2]);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_st32_i64:
+#endif
+ case INDEX_op_st_i32:
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+
+ OP_32_64(add):
+ /* For 3-operand addition, use LEA. */
+ if (args[0] != args[1]) {
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
+
+ if (const_args[2]) {
+ c3 = a2, a2 = -1;
+ } else if (a0 == a2) {
+ /* Watch out for dest = src + dest, since we've removed
+ the matching constraint on the add. */
+ tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
+ break;
+ }
+
+ tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
+ break;
+ }
+ c = ARITH_ADD;
+ goto gen_arith;
+ OP_32_64(sub):
+ c = ARITH_SUB;
+ goto gen_arith;
+ OP_32_64(and):
+ c = ARITH_AND;
+ goto gen_arith;
+ OP_32_64(or):
+ c = ARITH_OR;
+ goto gen_arith;
+ OP_32_64(xor):
+ c = ARITH_XOR;
+ goto gen_arith;
+ gen_arith:
+ if (const_args[2]) {
+ tgen_arithi(s, c + rexw, args[0], args[2], 0);
+ } else {
+ tgen_arithr(s, c + rexw, args[0], args[2]);
+ }
+ break;
+
+ OP_32_64(mul):
+ if (const_args[2]) {
+ int32_t val;
+ val = args[2];
+ if (val == (int8_t)val) {
+ tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
+ tcg_out8(s, val);
+ } else {
+ tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
+ tcg_out32(s, val);
+ }
+ } else {
+ tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
+ }
+ break;
+
+ OP_32_64(div2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
+ break;
+ OP_32_64(divu2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
+ break;
+
+ OP_32_64(shl):
+ c = SHIFT_SHL;
+ goto gen_shift;
+ OP_32_64(shr):
+ c = SHIFT_SHR;
+ goto gen_shift;
+ OP_32_64(sar):
+ c = SHIFT_SAR;
+ goto gen_shift;
+ OP_32_64(rotl):
+ c = SHIFT_ROL;
+ goto gen_shift;
+ OP_32_64(rotr):
+ c = SHIFT_ROR;
+ goto gen_shift;
+ gen_shift:
+ if (const_args[2]) {
+ tcg_out_shifti(s, c + rexw, args[0], args[2]);
+ } else {
+ tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
+ }
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
+ args[3], 0);
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond32(s, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+
+ OP_32_64(bswap16):
+ tcg_out_rolw_8(s, args[0]);
+ break;
+ OP_32_64(bswap32):
+ tcg_out_bswap32(s, args[0]);
+ break;
+
+ OP_32_64(neg):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
+ break;
+ OP_32_64(not):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
+ break;
+
+ OP_32_64(ext8s):
+ tcg_out_ext8s(s, args[0], args[1], rexw);
+ break;
+ OP_32_64(ext16s):
+ tcg_out_ext16s(s, args[0], args[1], rexw);
+ break;
+ OP_32_64(ext8u):
+ tcg_out_ext8u(s, args[0], args[1]);
+ break;
+ OP_32_64(ext16u):
+ tcg_out_ext16u(s, args[0], args[1]);
+ break;
+
+ case INDEX_op_qemu_ld8u:
+ tcg_out_qemu_ld(s, args, 0);
+ break;
+ case INDEX_op_qemu_ld8s:
+ tcg_out_qemu_ld(s, args, 0 | 4);
+ break;
+ case INDEX_op_qemu_ld16u:
+ tcg_out_qemu_ld(s, args, 1);
+ break;
+ case INDEX_op_qemu_ld16s:
+ tcg_out_qemu_ld(s, args, 1 | 4);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_qemu_ld32u:
+#endif
+ case INDEX_op_qemu_ld32:
+ tcg_out_qemu_ld(s, args, 2);
+ break;
+ case INDEX_op_qemu_ld64:
+ tcg_out_qemu_ld(s, args, 3);
+ break;
+
+ case INDEX_op_qemu_st8:
+ tcg_out_qemu_st(s, args, 0);
+ break;
+ case INDEX_op_qemu_st16:
+ tcg_out_qemu_st(s, args, 1);
+ break;
+ case INDEX_op_qemu_st32:
+ tcg_out_qemu_st(s, args, 2);
+ break;
+ case INDEX_op_qemu_st64:
+ tcg_out_qemu_st(s, args, 3);
+ break;
+
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args, const_args, 0);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+ case INDEX_op_mulu2_i32:
+ tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
+ break;
+ case INDEX_op_add2_i32:
+ if (const_args[4]) {
+ tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
+ } else {
+ tgen_arithr(s, ARITH_ADD, args[0], args[4]);
+ }
+ if (const_args[5]) {
+ tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
+ } else {
+ tgen_arithr(s, ARITH_ADC, args[1], args[5]);
+ }
+ break;
+ case INDEX_op_sub2_i32:
+ if (const_args[4]) {
+ tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
+ } else {
+ tgen_arithr(s, ARITH_SUB, args[0], args[4]);
+ }
+ if (const_args[5]) {
+ tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
+ } else {
+ tgen_arithr(s, ARITH_SBB, args[1], args[5]);
+ }
+ break;
+#else /* TCG_TARGET_REG_BITS == 64 */
+ case INDEX_op_movi_i64:
+ tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_qemu_ld32s:
+ tcg_out_qemu_ld(s, args, 2 | 4);
+ break;
+
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
+ args[3], 0);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond64(s, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, args[0]);
+ break;
+ case INDEX_op_ext32u_i64:
+ tcg_out_ext32u(s, args[0], args[1]);
+ break;
+ case INDEX_op_ext32s_i64:
+ tcg_out_ext32s(s, args[0], args[1]);
+ break;
+#endif
+
+ default:
+ tcg_abort();
+ }
+
+#undef OP_32_64
+}
+
+static const TCGTargetOpDef x86_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_call, { "ri" } },
+ { INDEX_op_jmp, { "ri" } },
+ { INDEX_op_br, { } },
+ { INDEX_op_mov_i32, { "r", "r" } },
+ { INDEX_op_movi_i32, { "r" } },
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "q", "r" } },
+ { INDEX_op_st16_i32, { "r", "r" } },
+ { INDEX_op_st_i32, { "r", "r" } },
+
+ { INDEX_op_add_i32, { "r", "r", "ri" } },
+ { INDEX_op_sub_i32, { "r", "0", "ri" } },
+ { INDEX_op_mul_i32, { "r", "0", "ri" } },
+ { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_and_i32, { "r", "0", "ri" } },
+ { INDEX_op_or_i32, { "r", "0", "ri" } },
+ { INDEX_op_xor_i32, { "r", "0", "ri" } },
+
+ { INDEX_op_shl_i32, { "r", "0", "ci" } },
+ { INDEX_op_shr_i32, { "r", "0", "ci" } },
+ { INDEX_op_sar_i32, { "r", "0", "ci" } },
+ { INDEX_op_rotl_i32, { "r", "0", "ci" } },
+ { INDEX_op_rotr_i32, { "r", "0", "ci" } },
+
+ { INDEX_op_brcond_i32, { "r", "ri" } },
+
+ { INDEX_op_bswap16_i32, { "r", "0" } },
+ { INDEX_op_bswap32_i32, { "r", "0" } },
+
+ { INDEX_op_neg_i32, { "r", "0" } },
+
+ { INDEX_op_not_i32, { "r", "0" } },
+
+ { INDEX_op_ext8s_i32, { "r", "q" } },
+ { INDEX_op_ext16s_i32, { "r", "r" } },
+ { INDEX_op_ext8u_i32, { "r", "q" } },
+ { INDEX_op_ext16u_i32, { "r", "r" } },
+
+ { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+
+#if TCG_TARGET_REG_BITS == 32
+ { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
+ { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+ { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+ { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
+ { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+#else
+ { INDEX_op_mov_i64, { "r", "r" } },
+ { INDEX_op_movi_i64, { "r" } },
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+ { INDEX_op_st8_i64, { "r", "r" } },
+ { INDEX_op_st16_i64, { "r", "r" } },
+ { INDEX_op_st32_i64, { "r", "r" } },
+ { INDEX_op_st_i64, { "r", "r" } },
+
+ { INDEX_op_add_i64, { "r", "0", "re" } },
+ { INDEX_op_mul_i64, { "r", "0", "re" } },
+ { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_sub_i64, { "r", "0", "re" } },
+ { INDEX_op_and_i64, { "r", "0", "reZ" } },
+ { INDEX_op_or_i64, { "r", "0", "re" } },
+ { INDEX_op_xor_i64, { "r", "0", "re" } },
+
+ { INDEX_op_shl_i64, { "r", "0", "ci" } },
+ { INDEX_op_shr_i64, { "r", "0", "ci" } },
+ { INDEX_op_sar_i64, { "r", "0", "ci" } },
+ { INDEX_op_rotl_i64, { "r", "0", "ci" } },
+ { INDEX_op_rotr_i64, { "r", "0", "ci" } },
+
+ { INDEX_op_brcond_i64, { "r", "re" } },
+ { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+ { INDEX_op_bswap16_i64, { "r", "0" } },
+ { INDEX_op_bswap32_i64, { "r", "0" } },
+ { INDEX_op_bswap64_i64, { "r", "0" } },
+ { INDEX_op_neg_i64, { "r", "0" } },
+ { INDEX_op_not_i64, { "r", "0" } },
+
+ { INDEX_op_ext8s_i64, { "r", "r" } },
+ { INDEX_op_ext16s_i64, { "r", "r" } },
+ { INDEX_op_ext32s_i64, { "r", "r" } },
+ { INDEX_op_ext8u_i64, { "r", "r" } },
+ { INDEX_op_ext16u_i64, { "r", "r" } },
+ { INDEX_op_ext32u_i64, { "r", "r" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_qemu_ld8u, { "r", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L" } },
+ { INDEX_op_qemu_ld32u, { "r", "L" } },
+ { INDEX_op_qemu_ld32s, { "r", "L" } },
+ { INDEX_op_qemu_ld64, { "r", "L" } },
+
+ { INDEX_op_qemu_st8, { "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L" } },
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ { INDEX_op_qemu_ld8u, { "r", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L" } },
+ { INDEX_op_qemu_ld64, { "r", "r", "L" } },
+
+ { INDEX_op_qemu_st8, { "cb", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L", "L" } },
+#else
+ { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
+
+ { INDEX_op_qemu_st8, { "cb", "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
+#endif
+ { -1 },
+};
+
+static int tcg_target_callee_save_regs[] = {
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_RBP,
+ TCG_REG_RBX,
+# if defined(VBOX) && defined(__MINGW64__)
+ TCG_REG_RSI,
+ TCG_REG_RDI,
+# endif
+ TCG_REG_R12,
+ TCG_REG_R13,
+ /* TCG_REG_R14, */ /* Currently used for the global env. */
+ TCG_REG_R15,
+#else
+# ifndef VBOX
+ /* TCG_REG_EBP, */ /* Currently used for the global env. */
+ TCG_REG_EBX,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+# else
+ TCG_REG_EBP,
+ TCG_REG_EBX,
+ /* TCG_REG_ESI, */ /* Currently used for the global env. */
+ TCG_REG_EDI,
+# endif
+#endif
+};
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i, frame_size, push_size, stack_addend;
+
+ /* TB prologue */
+
+ /* Save all callee saved registers. */
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_push(s, tcg_target_callee_save_regs[i]);
+ }
+# if defined(VBOX_STRICT) && defined(RT_ARCH_X86)
+ tcg_out8(s, 0x31); /* xor ebp, ebp */
+ tcg_out8(s, 0xed);
+# endif
+
+ /* Reserve some stack space. */
+ push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
+ push_size *= TCG_TARGET_REG_BITS / 8;
+
+ frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
+#if defined(VBOX) && defined(__MINGW64__)
+ frame_size += TCG_TARGET_CALL_STACK_OFFSET;
+#endif
+ frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
+ ~(TCG_TARGET_STACK_ALIGN - 1);
+ stack_addend = frame_size - push_size;
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+
+ /* jmp *tb. */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
+# ifdef VBOX
+ tcg_gen_stack_alignment_check(s);
+# endif
+
+ tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
+
+ /* TB epilogue */
+ tb_ret_addr = s->code_ptr;
+
+ tcg_out_addi(s, TCG_REG_ESP, stack_addend);
+
+ for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
+ tcg_out_pop(s, tcg_target_callee_save_regs[i]);
+ }
+ tcg_out_opc(s, OPC_RET, 0, 0, 0);
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+#if !defined(CONFIG_USER_ONLY)
+ /* fail safe */
+ if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
+ tcg_abort();
+#endif
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
+ } else {
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
+ }
+
+ tcg_regset_clear(tcg_target_call_clobber_regs);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
+ if (TCG_TARGET_REG_BITS == 64) {
+# if !defined(VBOX) || !defined(__MINGW64__)
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
+# endif
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
+ }
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
+
+ tcg_add_target_add_op_defs(x86_op_defs);
+}
diff --git a/src/recompiler/tcg/i386/tcg-target.h b/src/recompiler/tcg/i386/tcg-target.h
new file mode 100644
index 00000000..e812bc58
--- /dev/null
+++ b/src/recompiler/tcg/i386/tcg-target.h
@@ -0,0 +1,134 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define TCG_TARGET_I386 1
+
+#if defined(__x86_64__)
+# define TCG_TARGET_REG_BITS 64
+#else
+# define TCG_TARGET_REG_BITS 32
+#endif
+//#define TCG_TARGET_WORDS_BIGENDIAN
+
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_TARGET_NB_REGS 16
+#else
+# define TCG_TARGET_NB_REGS 8
+#endif
+
+enum {
+ TCG_REG_EAX = 0,
+ TCG_REG_ECX,
+ TCG_REG_EDX,
+ TCG_REG_EBX,
+ TCG_REG_ESP,
+ TCG_REG_EBP,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+
+ /* 64-bit registers; always define the symbols to avoid
+ too much if-deffing. */
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_RAX = TCG_REG_EAX,
+ TCG_REG_RCX = TCG_REG_ECX,
+ TCG_REG_RDX = TCG_REG_EDX,
+ TCG_REG_RBX = TCG_REG_EBX,
+ TCG_REG_RSP = TCG_REG_ESP,
+ TCG_REG_RBP = TCG_REG_EBP,
+ TCG_REG_RSI = TCG_REG_ESI,
+ TCG_REG_RDI = TCG_REG_EDI,
+};
+
+#define TCG_CT_CONST_S32 0x100
+#define TCG_CT_CONST_U32 0x200
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_ESP
+#define TCG_TARGET_STACK_ALIGN 16
+#if defined(VBOX) && defined(__MINGW64__)
+# define TCG_TARGET_CALL_STACK_OFFSET 32 /* 4 qword argument/register spill zone */
+#else
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32
+#define TCG_TARGET_HAS_rot_i32
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#define TCG_TARGET_HAS_ext8u_i32
+#define TCG_TARGET_HAS_ext16u_i32
+#define TCG_TARGET_HAS_bswap16_i32
+#define TCG_TARGET_HAS_bswap32_i32
+#define TCG_TARGET_HAS_neg_i32
+#define TCG_TARGET_HAS_not_i32
+// #define TCG_TARGET_HAS_andc_i32
+// #define TCG_TARGET_HAS_orc_i32
+// #define TCG_TARGET_HAS_eqv_i32
+// #define TCG_TARGET_HAS_nand_i32
+// #define TCG_TARGET_HAS_nor_i32
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_div2_i64
+#define TCG_TARGET_HAS_rot_i64
+#define TCG_TARGET_HAS_ext8s_i64
+#define TCG_TARGET_HAS_ext16s_i64
+#define TCG_TARGET_HAS_ext32s_i64
+#define TCG_TARGET_HAS_ext8u_i64
+#define TCG_TARGET_HAS_ext16u_i64
+#define TCG_TARGET_HAS_ext32u_i64
+#define TCG_TARGET_HAS_bswap16_i64
+#define TCG_TARGET_HAS_bswap32_i64
+#define TCG_TARGET_HAS_bswap64_i64
+#define TCG_TARGET_HAS_neg_i64
+#define TCG_TARGET_HAS_not_i64
+// #define TCG_TARGET_HAS_andc_i64
+// #define TCG_TARGET_HAS_orc_i64
+// #define TCG_TARGET_HAS_eqv_i64
+// #define TCG_TARGET_HAS_nand_i64
+// #define TCG_TARGET_HAS_nor_i64
+#endif
+
+#define TCG_TARGET_HAS_GUEST_BASE
+
+/* Note: must be synced with dyngen-exec.h */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_AREG0 TCG_REG_R14
+#else
+# ifndef VBOX /* we're using ESI instead of EBP, probably due to frame pointer opt issues */
+# define TCG_AREG0 TCG_REG_EBP
+# else /* VBOX */
+# define TCG_AREG0 TCG_REG_ESI
+# endif /* VBOX */
+#endif
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+}
diff --git a/src/recompiler/tcg/tcg-dyngen.c b/src/recompiler/tcg/tcg-dyngen.c
new file mode 100644
index 00000000..b068a0a7
--- /dev/null
+++ b/src/recompiler/tcg/tcg-dyngen.c
@@ -0,0 +1,437 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef VBOX
+#include <assert.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#else
+# include <stdio.h>
+# include "osdep.h"
+#endif
+
+#include "config.h"
+#include "osdep.h"
+
+#include "tcg.h"
+
+int __op_param1, __op_param2, __op_param3;
+#if defined(__sparc__) || defined(__arm__)
+ void __op_gen_label1(){}
+ void __op_gen_label2(){}
+ void __op_gen_label3(){}
+#else
+ int __op_gen_label1, __op_gen_label2, __op_gen_label3;
+#endif
+int __op_jmp0, __op_jmp1, __op_jmp2, __op_jmp3;
+
+#if 0
+#if defined(__s390__)
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+}
+#elif defined(__ia64__)
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ while (start < stop) {
+ asm volatile ("fc %0" :: "r"(start));
+ start += 32;
+ }
+ asm volatile (";;sync.i;;srlz.i;;");
+}
+#elif defined(__powerpc__)
+
+#define MIN_CACHE_LINE_SIZE 8 /* conservative value */
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ uintptr_t p;
+
+ start &= ~(MIN_CACHE_LINE_SIZE - 1);
+ stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1);
+
+ for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
+ asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
+ }
+ asm volatile ("sync" : : : "memory");
+ for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
+ asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
+ }
+ asm volatile ("sync" : : : "memory");
+ asm volatile ("isync" : : : "memory");
+}
+#elif defined(__alpha__)
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ asm ("imb");
+}
+#elif defined(__sparc__)
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ uintptr_t p;
+
+ p = start & ~(8UL - 1UL);
+ stop = (stop + (8UL - 1UL)) & ~(8UL - 1UL);
+
+ for (; p < stop; p += 8)
+ __asm__ __volatile__("flush\t%0" : : "r" (p));
+}
+#elif defined(__arm__)
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ register uintptr_t _beg __asm ("a1") = start;
+ register uintptr_t _end __asm ("a2") = stop;
+ register uintptr_t _flg __asm ("a3") = 0;
+ __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
+}
+#elif defined(__mc68000)
+
+# include <asm/cachectl.h>
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ cacheflush(start,FLUSH_SCOPE_LINE,FLUSH_CACHE_BOTH,stop-start+16);
+}
+#elif defined(__mips__)
+
+#include <sys/cachectl.h>
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ _flush_cache ((void *)start, stop - start, BCACHE);
+}
+#else
+#error unsupported CPU
+#endif
+
+#ifdef __alpha__
+
+register int gp asm("$29");
+
+static inline void immediate_ldah(void *p, int val) {
+ uint32_t *dest = p;
+ long high = ((val >> 16) + ((val >> 15) & 1)) & 0xffff;
+
+ *dest &= ~0xffff;
+ *dest |= high;
+ *dest |= 31 << 16;
+}
+static inline void immediate_lda(void *dest, int val) {
+ *(uint16_t *) dest = val;
+}
+void fix_bsr(void *p, int offset) {
+ uint32_t *dest = p;
+ *dest &= ~((1 << 21) - 1);
+ *dest |= (offset >> 2) & ((1 << 21) - 1);
+}
+
+#endif /* __alpha__ */
+
+#ifdef __ia64
+
+/* Patch instruction with "val" where "mask" has 1 bits. */
+static inline void ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val)
+{
+ uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16);
+# define insn_mask ((1UL << 41) - 1)
+ uintptr_t shift;
+
+ b0 = b[0]; b1 = b[1];
+ shift = 5 + 41 * (insn_addr % 16); /* 5 template, 3 x 41-bit insns */
+ if (shift >= 64) {
+ m1 = mask << (shift - 64);
+ v1 = val << (shift - 64);
+ } else {
+ m0 = mask << shift; m1 = mask >> (64 - shift);
+ v0 = val << shift; v1 = val >> (64 - shift);
+ b[0] = (b0 & ~m0) | (v0 & m0);
+ }
+ b[1] = (b1 & ~m1) | (v1 & m1);
+}
+
+static inline void ia64_patch_imm60 (uint64_t insn_addr, uint64_t val)
+{
+ ia64_patch(insn_addr,
+ 0x011ffffe000UL,
+ ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
+ | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));
+ ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18);
+}
+
+static inline void ia64_imm64 (void *insn, uint64_t val)
+{
+ /* Ignore the slot number of the relocation; GCC and Intel
+ toolchains differed for some time on whether IMM64 relocs are
+ against slot 1 (Intel) or slot 2 (GCC). */
+ uint64_t insn_addr = (uint64_t) insn & ~3UL;
+
+ ia64_patch(insn_addr + 2,
+ 0x01fffefe000UL,
+ ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
+ | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */
+ | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */
+ | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */
+ | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)
+ );
+ ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
+}
+
+static inline void ia64_imm60b (void *insn, uint64_t val)
+{
+ /* Ignore the slot number of the relocation; GCC and Intel
+ toolchains differed for some time on whether IMM64 relocs are
+ against slot 1 (Intel) or slot 2 (GCC). */
+ uint64_t insn_addr = (uint64_t) insn & ~3UL;
+
+ if (val + ((uint64_t) 1 << 59) >= (1UL << 60))
+ fprintf(stderr, "%s: value %ld out of IMM60 range\n",
+ __FUNCTION__, (int64_t) val);
+ ia64_patch_imm60(insn_addr + 2, val);
+}
+
+static inline void ia64_imm22 (void *insn, uint64_t val)
+{
+ if (val + (1 << 21) >= (1 << 22))
+ fprintf(stderr, "%s: value %li out of IMM22 range\n",
+ __FUNCTION__, (int64_t)val);
+ ia64_patch((uint64_t) insn, 0x01fffcfe000UL,
+ ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+ | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */
+ | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */
+ | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */));
+}
+
+/* Like ia64_imm22(), but also clear bits 20-21. For addl, this has
+ the effect of turning "addl rX=imm22,rY" into "addl
+ rX=imm22,r0". */
+static inline void ia64_imm22_r0 (void *insn, uint64_t val)
+{
+ if (val + (1 << 21) >= (1 << 22))
+ fprintf(stderr, "%s: value %li out of IMM22 range\n",
+ __FUNCTION__, (int64_t)val);
+ ia64_patch((uint64_t) insn, 0x01fffcfe000UL | (0x3UL << 20),
+ ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
+ | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */
+ | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */
+ | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */));
+}
+
+static inline void ia64_imm21b (void *insn, uint64_t val)
+{
+ if (val + (1 << 20) >= (1 << 21))
+ fprintf(stderr, "%s: value %li out of IMM21b range\n",
+ __FUNCTION__, (int64_t)val);
+ ia64_patch((uint64_t) insn, 0x11ffffe000UL,
+ ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
+ | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */));
+}
+
+static inline void ia64_nop_b (void *insn)
+{
+ ia64_patch((uint64_t) insn, (1UL << 41) - 1, 2UL << 37);
+}
+
+static inline void ia64_ldxmov(void *insn, uint64_t val)
+{
+ if (val + (1 << 21) < (1 << 22))
+ ia64_patch((uint64_t) insn, 0x1fff80fe000UL, 8UL << 37);
+}
+
+static inline int ia64_patch_ltoff(void *insn, uint64_t val,
+ int relaxable)
+{
+ if (relaxable && (val + (1 << 21) < (1 << 22))) {
+ ia64_imm22_r0(insn, val);
+ return 0;
+ }
+ return 1;
+}
+
+struct ia64_fixup {
+ struct ia64_fixup *next;
+ void *addr; /* address that needs to be patched */
+ long value;
+};
+
+#define IA64_PLT(insn, plt_index) \
+do { \
+ struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \
+ fixup->next = plt_fixes; \
+ plt_fixes = fixup; \
+ fixup->addr = (insn); \
+ fixup->value = (plt_index); \
+ plt_offset[(plt_index)] = 1; \
+} while (0)
+
+#define IA64_LTOFF(insn, val, relaxable) \
+do { \
+ if (ia64_patch_ltoff(insn, val, relaxable)) { \
+ struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \
+ fixup->next = ltoff_fixes; \
+ ltoff_fixes = fixup; \
+ fixup->addr = (insn); \
+ fixup->value = (val); \
+ } \
+} while (0)
+
+static inline void ia64_apply_fixes (uint8_t **gen_code_pp,
+ struct ia64_fixup *ltoff_fixes,
+ uint64_t gp,
+ struct ia64_fixup *plt_fixes,
+ int num_plts,
+ uintptr_t *plt_target,
+ unsigned int *plt_offset)
+{
+ static const uint8_t plt_bundle[] = {
+ 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; movl r1=GP */
+ 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60,
+
+ 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; brl IP */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0
+ };
+ uint8_t *gen_code_ptr = *gen_code_pp, *plt_start, *got_start;
+ uint64_t *vp;
+ struct ia64_fixup *fixup;
+ unsigned int offset = 0;
+ struct fdesc {
+ long ip;
+ long gp;
+ } *fdesc;
+ int i;
+
+ if (plt_fixes) {
+ plt_start = gen_code_ptr;
+
+ for (i = 0; i < num_plts; ++i) {
+ if (plt_offset[i]) {
+ plt_offset[i] = offset;
+ offset += sizeof(plt_bundle);
+
+ fdesc = (struct fdesc *) plt_target[i];
+ memcpy(gen_code_ptr, plt_bundle, sizeof(plt_bundle));
+ ia64_imm64 (gen_code_ptr + 0x02, fdesc->gp);
+ ia64_imm60b(gen_code_ptr + 0x12,
+ (fdesc->ip - (long) (gen_code_ptr + 0x10)) >> 4);
+ gen_code_ptr += sizeof(plt_bundle);
+ }
+ }
+
+ for (fixup = plt_fixes; fixup; fixup = fixup->next)
+ ia64_imm21b(fixup->addr,
+ ((long) plt_start + plt_offset[fixup->value]
+ - ((long) fixup->addr & ~0xf)) >> 4);
+ }
+
+ got_start = gen_code_ptr;
+
+ /* First, create the GOT: */
+ for (fixup = ltoff_fixes; fixup; fixup = fixup->next) {
+ /* first check if we already have this value in the GOT: */
+ for (vp = (uint64_t *) got_start; vp < (uint64_t *) gen_code_ptr; ++vp)
+ if (*vp == fixup->value)
+ break;
+ if (vp == (uint64_t *) gen_code_ptr) {
+ /* Nope, we need to put the value in the GOT: */
+ *vp = fixup->value;
+ gen_code_ptr += 8;
+ }
+ ia64_imm22(fixup->addr, (long) vp - gp);
+ }
+ /* Keep code ptr aligned. */
+ if ((long) gen_code_ptr & 15)
+ gen_code_ptr += 8;
+ *gen_code_pp = gen_code_ptr;
+}
+#endif
+#endif
+
+#ifdef CONFIG_DYNGEN_OP
+
+#if defined __hppa__
+struct hppa_branch_stub {
+ uint32_t *location;
+ long target;
+ struct hppa_branch_stub *next;
+};
+
+#define HPPA_RECORD_BRANCH(LIST, LOC, TARGET) \
+do { \
+ struct hppa_branch_stub *stub = alloca(sizeof(struct hppa_branch_stub)); \
+ stub->location = LOC; \
+ stub->target = TARGET; \
+ stub->next = LIST; \
+ LIST = stub; \
+} while (0)
+
+static inline void hppa_process_stubs(struct hppa_branch_stub *stub,
+ uint8_t **gen_code_pp)
+{
+ uint32_t *s = (uint32_t *)*gen_code_pp;
+ uint32_t *p = s + 1;
+
+ if (!stub) return;
+
+ for (; stub != NULL; stub = stub->next) {
+ uintptr_t l = (uintptr_t)p;
+ /* stub:
+ * ldil L'target, %r1
+ * be,n R'target(%sr4,%r1)
+ */
+ *p++ = 0x20200000 | reassemble_21(lrsel(stub->target, 0));
+ *p++ = 0xe0202002 | (reassemble_17(rrsel(stub->target, 0) >> 2));
+ hppa_patch17f(stub->location, l, 0);
+ }
+ /* b,l,n stub,%r0 */
+ *s = 0xe8000002 | reassemble_17((p - s) - 2);
+ *gen_code_pp = (uint8_t *)p;
+}
+#endif /* __hppa__ */
+
+const TCGArg *dyngen_op(TCGContext *s, int opc, const TCGArg *opparam_ptr)
+{
+ uint8_t *gen_code_ptr;
+
+#ifdef __hppa__
+ struct hppa_branch_stub *hppa_stubs = NULL;
+#endif
+
+ gen_code_ptr = s->code_ptr;
+ switch(opc) {
+
+/* op.h is dynamically generated by dyngen.c from op.c */
+#include "op.h"
+
+ default:
+ tcg_abort();
+ }
+
+#ifdef __hppa__
+ hppa_process_stubs(hppa_stubs, &gen_code_ptr);
+#endif
+
+ s->code_ptr = gen_code_ptr;
+ return opparam_ptr;
+}
+#endif
diff --git a/src/recompiler/tcg/tcg-op.h b/src/recompiler/tcg/tcg-op.h
new file mode 100644
index 00000000..b3890804
--- /dev/null
+++ b/src/recompiler/tcg/tcg-op.h
@@ -0,0 +1,2469 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "tcg.h"
+
+int gen_new_label(void);
+
+static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+}
+
+static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 arg1)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+}
+
+static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg arg1)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = arg1;
+}
+
+static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+}
+
+static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+}
+
+static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGArg arg2)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = arg2;
+}
+
+static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGArg arg2)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = arg2;
+}
+
+static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg arg1, TCGArg arg2)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = arg1;
+ *gen_opparam_ptr++ = arg2;
+}
+
+static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+}
+
+static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+}
+
+static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 arg1,
+ TCGv_i32 arg2, TCGArg arg3)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = arg3;
+}
+
+static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 arg1,
+ TCGv_i64 arg2, TCGArg arg3)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = arg3;
+}
+
+static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
+ TCGv_ptr base, TCGArg offset)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(val);
+ *gen_opparam_ptr++ = GET_TCGV_PTR(base);
+ *gen_opparam_ptr++ = offset;
+}
+
+static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
+ TCGv_ptr base, TCGArg offset)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(val);
+ *gen_opparam_ptr++ = GET_TCGV_PTR(base);
+ *gen_opparam_ptr++ = offset;
+}
+
+static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val,
+ TCGv_i32 addr, TCGArg mem_index)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(val);
+ *gen_opparam_ptr++ = GET_TCGV_I32(addr);
+ *gen_opparam_ptr++ = mem_index;
+}
+
+static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val,
+ TCGv_i64 addr, TCGArg mem_index)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(val);
+ *gen_opparam_ptr++ = GET_TCGV_I64(addr);
+ *gen_opparam_ptr++ = mem_index;
+}
+
+static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+}
+
+static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+}
+
+static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGArg arg4)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = arg4;
+}
+
+static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGArg arg4)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = arg4;
+}
+
+static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGArg arg3, TCGArg arg4)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = arg3;
+ *gen_opparam_ptr++ = arg4;
+}
+
+static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGArg arg3, TCGArg arg4)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = arg3;
+ *gen_opparam_ptr++ = arg4;
+}
+
+static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+}
+
+static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+}
+
+static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = arg5;
+}
+
+static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = arg5;
+}
+
+static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5,
+ TCGv_i32 arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg6);
+}
+
+static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5,
+ TCGv_i64 arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
+}
+
+static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4,
+ TCGv_i32 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4,
+ TCGv_i64 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 arg1,
+ TCGv_i32 arg2, TCGv_i32 arg3,
+ TCGv_i32 arg4, TCGArg arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = arg5;
+ *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1,
+ TCGv_i64 arg2, TCGv_i64 arg3,
+ TCGv_i64 arg4, TCGArg arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = arg5;
+ *gen_opparam_ptr++ = arg6;
+}
+
+static inline void gen_set_label(int n)
+{
+ tcg_gen_op1i(INDEX_op_set_label, n);
+}
+
+static inline void tcg_gen_br(int label)
+{
+ tcg_gen_op1i(INDEX_op_br, label);
+}
+
+static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (!TCGV_EQUAL_I32(ret, arg))
+ tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg);
+}
+
+static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
+{
+ tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg);
+}
+
+/* A version of dh_sizemask from def-helper.h that doesn't rely on
+ preprocessor magic. */
+static inline int tcg_gen_sizemask(int n, int is_64bit, int is_signed)
+{
+ return (is_64bit << n*2) | (is_signed << (n*2 + 1));
+}
+
+/* helper calls */
+static inline void tcg_gen_helperN(void *func, int flags, int sizemask,
+ TCGArg ret, int nargs, TCGArg *args)
+{
+ TCGv_ptr fn;
+ fn = tcg_const_ptr((tcg_target_long)func);
+ tcg_gen_callN(&tcg_ctx, fn, flags, sizemask, ret,
+ nargs, args);
+ tcg_temp_free_ptr(fn);
+}
+
+/* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently
+ reserved for helpers in tcg-runtime.c. These helpers are all const
+ and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST |
+ TCG_CALL_PURE. This may need to be adjusted if these functions
+ start to be used with other helpers. */
+static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
+ TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_ptr fn;
+ TCGArg args[2];
+ fn = tcg_const_ptr((tcg_target_long)func);
+ args[0] = GET_TCGV_I32(a);
+ args[1] = GET_TCGV_I32(b);
+ tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
+ GET_TCGV_I32(ret), 2, args);
+ tcg_temp_free_ptr(fn);
+}
+
+static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret,
+ TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_ptr fn;
+ TCGArg args[2];
+ fn = tcg_const_ptr((tcg_target_long)func);
+ args[0] = GET_TCGV_I64(a);
+ args[1] = GET_TCGV_I64(b);
+ tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
+ GET_TCGV_I64(ret), 2, args);
+ tcg_temp_free_ptr(fn);
+}
+
+/* 32 bit ops */
+
+static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_add_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
+{
+ TCGv_i32 t0 = tcg_const_i32(arg1);
+ tcg_gen_sub_i32(ret, t0, arg2);
+ tcg_temp_free_i32(t0);
+}
+
+static inline void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_sub_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCGV_EQUAL_I32(arg1, arg2)) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2);
+ }
+}
+
+static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_movi_i32(ret, 0);
+ } else if (arg2 == 0xffffffff) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_and_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCGV_EQUAL_I32(arg1, arg2)) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2);
+ }
+}
+
+static inline void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0xffffffff) {
+ tcg_gen_movi_i32(ret, 0xffffffff);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_or_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCGV_EQUAL_I32(arg1, arg2)) {
+ tcg_gen_movi_i32(ret, 0);
+ } else {
+ tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2);
+ }
+}
+
+static inline void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_xor_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_shl_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_shr_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_sar_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1,
+ TCGv_i32 arg2, int label_index)
+{
+ tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+}
+
+static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1,
+ int32_t arg2, int label_index)
+{
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_brcond_i32(cond, arg1, t0, label_index);
+ tcg_temp_free_i32(t0);
+}
+
+static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, int32_t arg2)
+{
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_setcond_i32(cond, ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+}
+
+static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_mul_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+}
+
+#ifdef TCG_TARGET_HAS_div_i32
+static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
+}
+#elif defined(TCG_TARGET_HAS_div2_i32)
+static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t0, arg1, 31);
+ tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+}
+
+static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t0, arg1, 31);
+ tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+}
+
+static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+}
+
+static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+}
+#else
+static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 32-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 0, 1);
+ sizemask |= tcg_gen_sizemask(1, 0, 1);
+ sizemask |= tcg_gen_sizemask(2, 0, 1);
+
+ tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 32-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 0, 1);
+ sizemask |= tcg_gen_sizemask(1, 0, 1);
+ sizemask |= tcg_gen_sizemask(2, 0, 1);
+
+ tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 32-bit and unsigned. */
+ sizemask |= tcg_gen_sizemask(0, 0, 0);
+ sizemask |= tcg_gen_sizemask(1, 0, 0);
+ sizemask |= tcg_gen_sizemask(2, 0, 0);
+
+ tcg_gen_helper32(tcg_helper_divu_i32, ret, arg1, arg2, 0);
+}
+
+static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 32-bit and unsigned. */
+ sizemask |= tcg_gen_sizemask(0, 0, 0);
+ sizemask |= tcg_gen_sizemask(1, 0, 0);
+ sizemask |= tcg_gen_sizemask(2, 0, 0);
+
+ tcg_gen_helper32(tcg_helper_remu_i32, ret, arg1, arg2, 0);
+}
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+
+static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (!TCGV_EQUAL_I64(ret, arg)) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+ }
+}
+
+static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
+{
+ tcg_gen_movi_i32(TCGV_LOW(ret), arg);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
+}
+
+static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31);
+}
+
+static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ /* since arg2 and ret have different types, they cannot be the
+ same temporary */
+#ifdef TCG_TARGET_WORDS_BIGENDIAN
+ tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
+#else
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
+#endif
+}
+
+static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
+}
+
+static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
+}
+
+static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+}
+
+static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+#ifdef TCG_TARGET_WORDS_BIGENDIAN
+ tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
+#else
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+ tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
+#endif
+}
+
+static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+ TCGV_HIGH(arg2));
+}
+
+static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+ TCGV_HIGH(arg2));
+}
+
+static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+}
+
+static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+}
+
+static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+}
+
+/* XXX: use generic code when basic block handling is OK or CPU
+ specific code (x86) */
+static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+ tcg_gen_helper64(tcg_helper_shl_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
+}
+
+static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+ tcg_gen_helper64(tcg_helper_shr_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
+}
+
+static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+ tcg_gen_helper64(tcg_helper_sar_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
+}
+
+static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
+ TCGv_i64 arg2, int label_index)
+{
+ tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+ TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+ TCGV_HIGH(arg2), cond, label_index);
+}
+
+static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0;
+ TCGv_i32 t1;
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
+ TCGV_LOW(arg1), TCGV_LOW(arg2));
+
+ tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
+ tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+ tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
+ tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+
+ tcg_gen_mov_i64(ret, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i32(t1);
+}
+
+static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+ tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+ tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and unsigned. */
+ sizemask |= tcg_gen_sizemask(0, 1, 0);
+ sizemask |= tcg_gen_sizemask(1, 1, 0);
+ sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+ tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and unsigned. */
+ sizemask |= tcg_gen_sizemask(0, 1, 0);
+ sizemask |= tcg_gen_sizemask(1, 1, 0);
+ sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+ tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2);
+}
+
+#else
+
+static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (!TCGV_EQUAL_I64(ret, arg))
+ tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg);
+}
+
+static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
+{
+ tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg);
+}
+
+static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_i64 arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_i64 arg2, tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCGV_EQUAL_I64(arg1, arg2)) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2);
+ }
+}
+
+static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_and_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCGV_EQUAL_I64(arg1, arg2)) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2);
+ }
+}
+
+static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_or_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCGV_EQUAL_I64(arg1, arg2)) {
+ tcg_gen_movi_i64(ret, 0);
+ } else {
+ tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2);
+ }
+}
+
+static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_xor_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_shl_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_shr_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_sar_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
+ TCGv_i64 arg2, int label_index)
+{
+ tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+}
+
+static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2);
+}
+
+#ifdef TCG_TARGET_HAS_div_i64
+static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
+}
+#elif defined(TCG_TARGET_HAS_div2_i64)
+static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t0, arg1, 63);
+ tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t0, arg1, 63);
+ tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, 0);
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, 0);
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+}
+#else
+static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+ tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and signed. */
+ sizemask |= tcg_gen_sizemask(0, 1, 1);
+ sizemask |= tcg_gen_sizemask(1, 1, 1);
+ sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+ tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and unsigned. */
+ sizemask |= tcg_gen_sizemask(0, 1, 0);
+ sizemask |= tcg_gen_sizemask(1, 1, 0);
+ sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+ tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ int sizemask = 0;
+ /* Return value and both arguments are 64-bit and unsigned. */
+ sizemask |= tcg_gen_sizemask(0, 1, 0);
+ sizemask |= tcg_gen_sizemask(1, 1, 0);
+ sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+ tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2);
+}
+#endif
+
+#endif
+
+static inline void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_add_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+static inline void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg1);
+ tcg_gen_sub_i64(ret, t0, arg2);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_sub_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1,
+ int64_t arg2, int label_index)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_brcond_i64(cond, arg1, t0, label_index);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, int64_t arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_setcond_i64(cond, ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+static inline void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_mul_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+
+/***************************************/
+/* optional operations */
+
+static inline void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_ext8s_i32
+ tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
+#else
+ tcg_gen_shli_i32(ret, arg, 24);
+ tcg_gen_sari_i32(ret, ret, 24);
+#endif
+}
+
+static inline void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_ext16s_i32
+ tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
+#else
+ tcg_gen_shli_i32(ret, arg, 16);
+ tcg_gen_sari_i32(ret, ret, 16);
+#endif
+}
+
+static inline void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_ext8u_i32
+ tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
+#else
+ tcg_gen_andi_i32(ret, arg, 0xffu);
+#endif
+}
+
+static inline void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_ext16u_i32
+ tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
+#else
+ tcg_gen_andi_i32(ret, arg, 0xffffu);
+#endif
+}
+
+/* Note: we assume the two high bytes are set to zero */
+static inline void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_bswap16_i32
+ tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
+#else
+ TCGv_i32 t0 = tcg_temp_new_i32();
+
+ tcg_gen_ext8u_i32(t0, arg);
+ tcg_gen_shli_i32(t0, t0, 8);
+ tcg_gen_shri_i32(ret, arg, 8);
+ tcg_gen_or_i32(ret, ret, t0);
+ tcg_temp_free_i32(t0);
+#endif
+}
+
+static inline void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_bswap32_i32
+ tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
+#else
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(t0, arg, 24);
+
+ tcg_gen_andi_i32(t1, arg, 0x0000ff00);
+ tcg_gen_shli_i32(t1, t1, 8);
+ tcg_gen_or_i32(t0, t0, t1);
+
+ tcg_gen_shri_i32(t1, arg, 8);
+ tcg_gen_andi_i32(t1, t1, 0x0000ff00);
+ tcg_gen_or_i32(t0, t0, t1);
+
+ tcg_gen_shri_i32(t1, arg, 24);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+#endif
+}
+
+#if TCG_TARGET_REG_BITS == 32
+static inline void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+static inline void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+{
+ tcg_gen_mov_i32(ret, TCGV_LOW(arg));
+}
+
+static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+static inline void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+/* Note: we assume the six high bytes are set to zero */
+static inline void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+ tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+}
+
+/* Note: we assume the four high bytes are set to zero */
+static inline void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+ tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+}
+
+static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
+ tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
+ tcg_gen_mov_i32(TCGV_LOW(ret), t1);
+ tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+}
+#else
+
+static inline void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_ext8s_i64
+ tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
+#else
+ tcg_gen_shli_i64(ret, arg, 56);
+ tcg_gen_sari_i64(ret, ret, 56);
+#endif
+}
+
+static inline void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_ext16s_i64
+ tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
+#else
+ tcg_gen_shli_i64(ret, arg, 48);
+ tcg_gen_sari_i64(ret, ret, 48);
+#endif
+}
+
+static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_ext32s_i64
+ tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
+#else
+ tcg_gen_shli_i64(ret, arg, 32);
+ tcg_gen_sari_i64(ret, ret, 32);
+#endif
+}
+
+static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_ext8u_i64
+ tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
+#else
+ tcg_gen_andi_i64(ret, arg, 0xffu);
+#endif
+}
+
+static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_ext16u_i64
+ tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
+#else
+ tcg_gen_andi_i64(ret, arg, 0xffffu);
+#endif
+}
+
+static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_ext32u_i64
+ tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
+#else
+ tcg_gen_andi_i64(ret, arg, 0xffffffffu);
+#endif
+}
+
+/* Note: we assume the target supports move between 32 and 64 bit
+ registers. This will probably break MIPS64 targets. */
+static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+{
+ tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg)));
+}
+
+/* Note: we assume the target supports move between 32 and 64 bit
+ registers */
+static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+}
+
+/* Note: we assume the target supports move between 32 and 64 bit
+ registers */
+static inline void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+}
+
+/* Note: we assume the six high bytes are set to zero */
+static inline void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_bswap16_i64
+ tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
+#else
+ TCGv_i64 t0 = tcg_temp_new_i64();
+
+ tcg_gen_ext8u_i64(t0, arg);
+ tcg_gen_shli_i64(t0, t0, 8);
+ tcg_gen_shri_i64(ret, arg, 8);
+ tcg_gen_or_i64(ret, ret, t0);
+ tcg_temp_free_i64(t0);
+#endif
+}
+
+/* Note: we assume the four high bytes are set to zero */
+static inline void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_bswap32_i64
+ tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
+#else
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t0, arg, 24);
+ tcg_gen_ext32u_i64(t0, t0);
+
+ tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+ tcg_gen_shli_i64(t1, t1, 8);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 8);
+ tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 24);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+#endif
+}
+
+static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_bswap64_i64
+ tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
+#else
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t0, arg, 56);
+
+ tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+ tcg_gen_shli_i64(t1, t1, 40);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_andi_i64(t1, arg, 0x00ff0000);
+ tcg_gen_shli_i64(t1, t1, 24);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_andi_i64(t1, arg, 0xff000000);
+ tcg_gen_shli_i64(t1, t1, 8);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 8);
+ tcg_gen_andi_i64(t1, t1, 0xff000000);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 24);
+ tcg_gen_andi_i64(t1, t1, 0x00ff0000);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 40);
+ tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 56);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+#endif
+}
+
+#endif
+
+static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_neg_i32
+ tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg);
+#else
+ TCGv_i32 t0 = tcg_const_i32(0);
+ tcg_gen_sub_i32(ret, t0, arg);
+ tcg_temp_free_i32(t0);
+#endif
+}
+
+static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_neg_i64
+ tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg);
+#else
+ TCGv_i64 t0 = tcg_const_i64(0);
+ tcg_gen_sub_i64(ret, t0, arg);
+ tcg_temp_free_i64(t0);
+#endif
+}
+
+static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+#ifdef TCG_TARGET_HAS_not_i32
+ tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg);
+#else
+ tcg_gen_xori_i32(ret, arg, -1);
+#endif
+}
+
+static inline void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+#ifdef TCG_TARGET_HAS_not_i64
+ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
+#elif defined(TCG_TARGET_HAS_not_i32) && TCG_TARGET_REG_BITS == 32
+ tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+#else
+ tcg_gen_xori_i64(ret, arg, -1);
+#endif
+}
+
+static inline void tcg_gen_discard_i32(TCGv_i32 arg)
+{
+ tcg_gen_op1_i32(INDEX_op_discard, arg);
+}
+
+#if TCG_TARGET_REG_BITS == 32
+static inline void tcg_gen_discard_i64(TCGv_i64 arg)
+{
+ tcg_gen_discard_i32(TCGV_LOW(arg));
+ tcg_gen_discard_i32(TCGV_HIGH(arg));
+}
+#else
+static inline void tcg_gen_discard_i64(TCGv_i64 arg)
+{
+ tcg_gen_op1_i64(INDEX_op_discard, arg);
+}
+#endif
+
+static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
+{
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(TCGV_LOW(dest), low);
+ tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ /* This extension is only needed for type correctness.
+ We may be able to do better given target specific information. */
+ tcg_gen_extu_i32_i64(tmp, high);
+ tcg_gen_shli_i64(tmp, tmp, 32);
+ tcg_gen_extu_i32_i64(dest, low);
+ tcg_gen_or_i64(dest, dest, tmp);
+ tcg_temp_free_i64(tmp);
+#endif
+}
+
+static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, TCGv_i64 high)
+{
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_concat_i32_i64(dest, TCGV_LOW(low), TCGV_LOW(high));
+#else
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(dest, low);
+ tcg_gen_shli_i64(tmp, high, 32);
+ tcg_gen_or_i64(dest, dest, tmp);
+ tcg_temp_free_i64(tmp);
+#endif
+}
+
+static inline void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+#ifdef TCG_TARGET_HAS_andc_i32
+ tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
+#else
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_not_i32(t0, arg2);
+ tcg_gen_and_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+#endif
+}
+
+static inline void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifdef TCG_TARGET_HAS_andc_i64
+ tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_andc_i32) && TCG_TARGET_REG_BITS == 32
+ tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_not_i64(t0, arg2);
+ tcg_gen_and_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+#endif
+}
+
+static inline void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+#ifdef TCG_TARGET_HAS_eqv_i32
+ tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
+#else
+ tcg_gen_xor_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+#endif
+}
+
+static inline void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifdef TCG_TARGET_HAS_eqv_i64
+ tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_eqv_i32) && TCG_TARGET_REG_BITS == 32
+ tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
+ tcg_gen_xor_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+#endif
+}
+
+static inline void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+#ifdef TCG_TARGET_HAS_nand_i32
+ tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
+#else
+ tcg_gen_and_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+#endif
+}
+
+static inline void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifdef TCG_TARGET_HAS_nand_i64
+ tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_nand_i32) && TCG_TARGET_REG_BITS == 32
+ tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
+ tcg_gen_and_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+#endif
+}
+
+static inline void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+#ifdef TCG_TARGET_HAS_nor_i32
+ tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
+#else
+ tcg_gen_or_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+#endif
+}
+
+static inline void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifdef TCG_TARGET_HAS_nor_i64
+ tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_nor_i32) && TCG_TARGET_REG_BITS == 32
+ tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
+ tcg_gen_or_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+#endif
+}
+
+static inline void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+#ifdef TCG_TARGET_HAS_orc_i32
+ tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
+#else
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_not_i32(t0, arg2);
+ tcg_gen_or_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+#endif
+}
+
+static inline void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifdef TCG_TARGET_HAS_orc_i64
+ tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_orc_i32) && TCG_TARGET_REG_BITS == 32
+ tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_not_i64(t0, arg2);
+ tcg_gen_or_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+#endif
+}
+
+static inline void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+#ifdef TCG_TARGET_HAS_rot_i32
+ tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
+#else
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shl_i32(t0, arg1, arg2);
+ tcg_gen_subfi_i32(t1, 32, arg2);
+ tcg_gen_shr_i32(t1, arg1, t1);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+#endif
+}
+
+static inline void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifdef TCG_TARGET_HAS_rot_i64
+ tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
+#else
+ TCGv_i64 t0, t1;
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shl_i64(t0, arg1, arg2);
+ tcg_gen_subfi_i64(t1, 64, arg2);
+ tcg_gen_shr_i64(t1, arg1, t1);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+#endif
+}
+
+static inline void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+#ifdef TCG_TARGET_HAS_rot_i32
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_rotl_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+#else
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shli_i32(t0, arg1, arg2);
+ tcg_gen_shri_i32(t1, arg1, 32 - arg2);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+#endif
+ }
+}
+
+static inline void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+#ifdef TCG_TARGET_HAS_rot_i64
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_rotl_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+#else
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shli_i64(t0, arg1, arg2);
+ tcg_gen_shri_i64(t1, arg1, 64 - arg2);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+#endif
+ }
+}
+
+static inline void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+#ifdef TCG_TARGET_HAS_rot_i32
+ tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
+#else
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shr_i32(t0, arg1, arg2);
+ tcg_gen_subfi_i32(t1, 32, arg2);
+ tcg_gen_shl_i32(t1, arg1, t1);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+#endif
+}
+
+static inline void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifdef TCG_TARGET_HAS_rot_i64
+ tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
+#else
+ TCGv_i64 t0, t1;
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shr_i64(t0, arg1, arg2);
+ tcg_gen_subfi_i64(t1, 64, arg2);
+ tcg_gen_shl_i64(t1, arg1, t1);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+#endif
+}
+
+static inline void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
+ }
+}
+
+static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
+ }
+}
+
+/***************************************/
+/* QEMU specific operations. Their type depend on the QEMU CPU
+ type. */
+#ifndef TARGET_LONG_BITS
+#error must include QEMU headers
+#endif
+
+#if TARGET_LONG_BITS == 32
+#define TCGv TCGv_i32
+#define tcg_temp_new() tcg_temp_new_i32()
+#define tcg_global_reg_new tcg_global_reg_new_i32
+#define tcg_global_mem_new tcg_global_mem_new_i32
+#define tcg_temp_local_new() tcg_temp_local_new_i32()
+#define tcg_temp_free tcg_temp_free_i32
+#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i32
+#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i32
+#define TCGV_UNUSED(x) TCGV_UNUSED_I32(x)
+#define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b)
+#else
+#define TCGv TCGv_i64
+#define tcg_temp_new() tcg_temp_new_i64()
+#define tcg_global_reg_new tcg_global_reg_new_i64
+#define tcg_global_mem_new tcg_global_mem_new_i64
+#define tcg_temp_local_new() tcg_temp_local_new_i64()
+#define tcg_temp_free tcg_temp_free_i64
+#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i64
+#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i64
+#define TCGV_UNUSED(x) TCGV_UNUSED_I64(x)
+#define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b)
+#endif
+
+/* debug info: write the PC of the corresponding QEMU CPU instruction */
+static inline void tcg_gen_debug_insn_start(uint64_t pc)
+{
+ /* XXX: must really use a 32 bit size for TCGArg in all cases */
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ tcg_gen_op2ii(INDEX_op_debug_insn_start,
+ (uint32_t)(pc), (uint32_t)(pc >> 32));
+#else
+ tcg_gen_op1i(INDEX_op_debug_insn_start, pc);
+#endif
+}
+
+static inline void tcg_gen_exit_tb(tcg_target_long val)
+{
+ tcg_gen_op1i(INDEX_op_exit_tb, val);
+}
+
+static inline void tcg_gen_goto_tb(int idx)
+{
+ tcg_gen_op1i(INDEX_op_goto_tb, idx);
+}
+
+#if TCG_TARGET_REG_BITS == 32
+static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_ld8u, ret, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_ld8u, TCGV_LOW(ret), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_ld8s, ret, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_ld8s, TCGV_LOW(ret), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_ld16u, ret, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_ld16u, TCGV_LOW(ret), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_ld16s, ret, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_ld16s, TCGV_LOW(ret), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op4i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), addr, mem_index);
+#else
+ tcg_gen_op5i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret),
+ TCGV_LOW(addr), TCGV_HIGH(addr), mem_index);
+#endif
+}
+
+static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_st8, arg, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_st8, TCGV_LOW(arg), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+#endif
+}
+
+static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_st16, arg, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_st16, TCGV_LOW(arg), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+#endif
+}
+
+static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(INDEX_op_qemu_st32, arg, addr, mem_index);
+#else
+ tcg_gen_op4i_i32(INDEX_op_qemu_st32, TCGV_LOW(arg), TCGV_LOW(addr),
+ TCGV_HIGH(addr), mem_index);
+#endif
+}
+
+static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op4i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), addr,
+ mem_index);
+#else
+ tcg_gen_op5i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg),
+ TCGV_LOW(addr), TCGV_HIGH(addr), mem_index);
+#endif
+}
+
+#define tcg_gen_ld_ptr tcg_gen_ld_i32
+#define tcg_gen_discard_ptr tcg_gen_discard_i32
+
+#else /* TCG_TARGET_REG_BITS == 32 */
+
+static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8u, ret, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8s, ret, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16u, ret, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16s, ret, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index);
+#else
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32u, ret, addr, mem_index);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
+{
+#if TARGET_LONG_BITS == 32
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index);
+#else
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32s, ret, addr, mem_index);
+#endif
+}
+
+static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_ld64, ret, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_st8, arg, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_st16, arg, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op(INDEX_op_qemu_st32, arg, addr, mem_index);
+}
+
+static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_st64, arg, addr, mem_index);
+}
+
+#define tcg_gen_ld_ptr tcg_gen_ld_i64
+#define tcg_gen_discard_ptr tcg_gen_discard_i64
+
+#endif /* TCG_TARGET_REG_BITS != 32 */
+
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_movi_tl tcg_gen_movi_i64
+#define tcg_gen_mov_tl tcg_gen_mov_i64
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64
+#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64
+#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64
+#define tcg_gen_ld_tl tcg_gen_ld_i64
+#define tcg_gen_st8_tl tcg_gen_st8_i64
+#define tcg_gen_st16_tl tcg_gen_st16_i64
+#define tcg_gen_st32_tl tcg_gen_st32_i64
+#define tcg_gen_st_tl tcg_gen_st_i64
+#define tcg_gen_add_tl tcg_gen_add_i64
+#define tcg_gen_addi_tl tcg_gen_addi_i64
+#define tcg_gen_sub_tl tcg_gen_sub_i64
+#define tcg_gen_neg_tl tcg_gen_neg_i64
+#define tcg_gen_subfi_tl tcg_gen_subfi_i64
+#define tcg_gen_subi_tl tcg_gen_subi_i64
+#define tcg_gen_and_tl tcg_gen_and_i64
+#define tcg_gen_andi_tl tcg_gen_andi_i64
+#define tcg_gen_or_tl tcg_gen_or_i64
+#define tcg_gen_ori_tl tcg_gen_ori_i64
+#define tcg_gen_xor_tl tcg_gen_xor_i64
+#define tcg_gen_xori_tl tcg_gen_xori_i64
+#define tcg_gen_not_tl tcg_gen_not_i64
+#define tcg_gen_shl_tl tcg_gen_shl_i64
+#define tcg_gen_shli_tl tcg_gen_shli_i64
+#define tcg_gen_shr_tl tcg_gen_shr_i64
+#define tcg_gen_shri_tl tcg_gen_shri_i64
+#define tcg_gen_sar_tl tcg_gen_sar_i64
+#define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_brcond_tl tcg_gen_brcond_i64
+#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
+#define tcg_gen_mul_tl tcg_gen_mul_i64
+#define tcg_gen_muli_tl tcg_gen_muli_i64
+#define tcg_gen_div_tl tcg_gen_div_i64
+#define tcg_gen_rem_tl tcg_gen_rem_i64
+#define tcg_gen_divu_tl tcg_gen_divu_i64
+#define tcg_gen_remu_tl tcg_gen_remu_i64
+#define tcg_gen_discard_tl tcg_gen_discard_i64
+#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32
+#define tcg_gen_trunc_i64_tl tcg_gen_mov_i64
+#define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64
+#define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64
+#define tcg_gen_extu_tl_i64 tcg_gen_mov_i64
+#define tcg_gen_ext_tl_i64 tcg_gen_mov_i64
+#define tcg_gen_ext8u_tl tcg_gen_ext8u_i64
+#define tcg_gen_ext8s_tl tcg_gen_ext8s_i64
+#define tcg_gen_ext16u_tl tcg_gen_ext16u_i64
+#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64
+#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64
+#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64
+#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64
+#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
+#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
+#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
+#define tcg_gen_andc_tl tcg_gen_andc_i64
+#define tcg_gen_eqv_tl tcg_gen_eqv_i64
+#define tcg_gen_nand_tl tcg_gen_nand_i64
+#define tcg_gen_nor_tl tcg_gen_nor_i64
+#define tcg_gen_orc_tl tcg_gen_orc_i64
+#define tcg_gen_rotl_tl tcg_gen_rotl_i64
+#define tcg_gen_rotli_tl tcg_gen_rotli_i64
+#define tcg_gen_rotr_tl tcg_gen_rotr_i64
+#define tcg_gen_rotri_tl tcg_gen_rotri_i64
+#define tcg_const_tl tcg_const_i64
+#define tcg_const_local_tl tcg_const_local_i64
+#else
+#define tcg_gen_movi_tl tcg_gen_movi_i32
+#define tcg_gen_mov_tl tcg_gen_mov_i32
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32
+#define tcg_gen_ld32u_tl tcg_gen_ld_i32
+#define tcg_gen_ld32s_tl tcg_gen_ld_i32
+#define tcg_gen_ld_tl tcg_gen_ld_i32
+#define tcg_gen_st8_tl tcg_gen_st8_i32
+#define tcg_gen_st16_tl tcg_gen_st16_i32
+#define tcg_gen_st32_tl tcg_gen_st_i32
+#define tcg_gen_st_tl tcg_gen_st_i32
+#define tcg_gen_add_tl tcg_gen_add_i32
+#define tcg_gen_addi_tl tcg_gen_addi_i32
+#define tcg_gen_sub_tl tcg_gen_sub_i32
+#define tcg_gen_neg_tl tcg_gen_neg_i32
+#define tcg_gen_subfi_tl tcg_gen_subfi_i32
+#define tcg_gen_subi_tl tcg_gen_subi_i32
+#define tcg_gen_and_tl tcg_gen_and_i32
+#define tcg_gen_andi_tl tcg_gen_andi_i32
+#define tcg_gen_or_tl tcg_gen_or_i32
+#define tcg_gen_ori_tl tcg_gen_ori_i32
+#define tcg_gen_xor_tl tcg_gen_xor_i32
+#define tcg_gen_xori_tl tcg_gen_xori_i32
+#define tcg_gen_not_tl tcg_gen_not_i32
+#define tcg_gen_shl_tl tcg_gen_shl_i32
+#define tcg_gen_shli_tl tcg_gen_shli_i32
+#define tcg_gen_shr_tl tcg_gen_shr_i32
+#define tcg_gen_shri_tl tcg_gen_shri_i32
+#define tcg_gen_sar_tl tcg_gen_sar_i32
+#define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_brcond_tl tcg_gen_brcond_i32
+#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
+#define tcg_gen_mul_tl tcg_gen_mul_i32
+#define tcg_gen_muli_tl tcg_gen_muli_i32
+#define tcg_gen_div_tl tcg_gen_div_i32
+#define tcg_gen_rem_tl tcg_gen_rem_i32
+#define tcg_gen_divu_tl tcg_gen_divu_i32
+#define tcg_gen_remu_tl tcg_gen_remu_i32
+#define tcg_gen_discard_tl tcg_gen_discard_i32
+#define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32
+#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32
+#define tcg_gen_extu_i32_tl tcg_gen_mov_i32
+#define tcg_gen_ext_i32_tl tcg_gen_mov_i32
+#define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64
+#define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64
+#define tcg_gen_ext8u_tl tcg_gen_ext8u_i32
+#define tcg_gen_ext8s_tl tcg_gen_ext8s_i32
+#define tcg_gen_ext16u_tl tcg_gen_ext16u_i32
+#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32
+#define tcg_gen_ext32u_tl tcg_gen_mov_i32
+#define tcg_gen_ext32s_tl tcg_gen_mov_i32
+#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
+#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
+#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
+#define tcg_gen_andc_tl tcg_gen_andc_i32
+#define tcg_gen_eqv_tl tcg_gen_eqv_i32
+#define tcg_gen_nand_tl tcg_gen_nand_i32
+#define tcg_gen_nor_tl tcg_gen_nor_i32
+#define tcg_gen_orc_tl tcg_gen_orc_i32
+#define tcg_gen_rotl_tl tcg_gen_rotl_i32
+#define tcg_gen_rotli_tl tcg_gen_rotli_i32
+#define tcg_gen_rotr_tl tcg_gen_rotr_i32
+#define tcg_gen_rotri_tl tcg_gen_rotri_i32
+#define tcg_const_tl tcg_const_i32
+#define tcg_const_local_tl tcg_const_local_i32
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+#define tcg_gen_add_ptr tcg_gen_add_i32
+#define tcg_gen_addi_ptr tcg_gen_addi_i32
+#define tcg_gen_ext_i32_ptr tcg_gen_mov_i32
+#else /* TCG_TARGET_REG_BITS == 32 */
+#define tcg_gen_add_ptr tcg_gen_add_i64
+#define tcg_gen_addi_ptr tcg_gen_addi_i64
+#define tcg_gen_ext_i32_ptr tcg_gen_ext_i32_i64
+#endif /* TCG_TARGET_REG_BITS != 32 */
diff --git a/src/recompiler/tcg/tcg-opc.h b/src/recompiler/tcg/tcg-opc.h
new file mode 100644
index 00000000..2a98fed9
--- /dev/null
+++ b/src/recompiler/tcg/tcg-opc.h
@@ -0,0 +1,304 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * DEF(name, oargs, iargs, cargs, flags)
+ */
+
+/* predefined ops */
+DEF(end, 0, 0, 0, 0) /* must be kept first */
+DEF(nop, 0, 0, 0, 0)
+DEF(nop1, 0, 0, 1, 0)
+DEF(nop2, 0, 0, 2, 0)
+DEF(nop3, 0, 0, 3, 0)
+DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */
+
+DEF(discard, 1, 0, 0, 0)
+
+DEF(set_label, 0, 0, 1, 0)
+DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
+DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+
+DEF(mov_i32, 1, 1, 0, 0)
+DEF(movi_i32, 1, 0, 1, 0)
+DEF(setcond_i32, 1, 2, 1, 0)
+/* load/store */
+DEF(ld8u_i32, 1, 1, 1, 0)
+DEF(ld8s_i32, 1, 1, 1, 0)
+DEF(ld16u_i32, 1, 1, 1, 0)
+DEF(ld16s_i32, 1, 1, 1, 0)
+DEF(ld_i32, 1, 1, 1, 0)
+DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+/* arith */
+DEF(add_i32, 1, 2, 0, 0)
+DEF(sub_i32, 1, 2, 0, 0)
+DEF(mul_i32, 1, 2, 0, 0)
+#ifdef TCG_TARGET_HAS_div_i32
+DEF(div_i32, 1, 2, 0, 0)
+DEF(divu_i32, 1, 2, 0, 0)
+DEF(rem_i32, 1, 2, 0, 0)
+DEF(remu_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_div2_i32
+DEF(div2_i32, 2, 3, 0, 0)
+DEF(divu2_i32, 2, 3, 0, 0)
+#endif
+DEF(and_i32, 1, 2, 0, 0)
+DEF(or_i32, 1, 2, 0, 0)
+DEF(xor_i32, 1, 2, 0, 0)
+/* shifts/rotates */
+DEF(shl_i32, 1, 2, 0, 0)
+DEF(shr_i32, 1, 2, 0, 0)
+DEF(sar_i32, 1, 2, 0, 0)
+#ifdef TCG_TARGET_HAS_rot_i32
+DEF(rotl_i32, 1, 2, 0, 0)
+DEF(rotr_i32, 1, 2, 0, 0)
+#endif
+
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+#if TCG_TARGET_REG_BITS == 32
+DEF(add2_i32, 2, 4, 0, 0)
+DEF(sub2_i32, 2, 4, 0, 0)
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(mulu2_i32, 2, 2, 0, 0)
+DEF(setcond2_i32, 1, 4, 1, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext8s_i32
+DEF(ext8s_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext16s_i32
+DEF(ext16s_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext8u_i32
+DEF(ext8u_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext16u_i32
+DEF(ext16u_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_bswap16_i32
+DEF(bswap16_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_bswap32_i32
+DEF(bswap32_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_not_i32
+DEF(not_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_neg_i32
+DEF(neg_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_andc_i32
+DEF(andc_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_orc_i32
+DEF(orc_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_eqv_i32
+DEF(eqv_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nand_i32
+DEF(nand_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nor_i32
+DEF(nor_i32, 1, 2, 0, 0)
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+DEF(mov_i64, 1, 1, 0, 0)
+DEF(movi_i64, 1, 0, 1, 0)
+DEF(setcond_i64, 1, 2, 1, 0)
+/* load/store */
+DEF(ld8u_i64, 1, 1, 1, 0)
+DEF(ld8s_i64, 1, 1, 1, 0)
+DEF(ld16u_i64, 1, 1, 1, 0)
+DEF(ld16s_i64, 1, 1, 1, 0)
+DEF(ld32u_i64, 1, 1, 1, 0)
+DEF(ld32s_i64, 1, 1, 1, 0)
+DEF(ld_i64, 1, 1, 1, 0)
+DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+/* arith */
+DEF(add_i64, 1, 2, 0, 0)
+DEF(sub_i64, 1, 2, 0, 0)
+DEF(mul_i64, 1, 2, 0, 0)
+#ifdef TCG_TARGET_HAS_div_i64
+DEF(div_i64, 1, 2, 0, 0)
+DEF(divu_i64, 1, 2, 0, 0)
+DEF(rem_i64, 1, 2, 0, 0)
+DEF(remu_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_div2_i64
+DEF(div2_i64, 2, 3, 0, 0)
+DEF(divu2_i64, 2, 3, 0, 0)
+#endif
+DEF(and_i64, 1, 2, 0, 0)
+DEF(or_i64, 1, 2, 0, 0)
+DEF(xor_i64, 1, 2, 0, 0)
+/* shifts/rotates */
+DEF(shl_i64, 1, 2, 0, 0)
+DEF(shr_i64, 1, 2, 0, 0)
+DEF(sar_i64, 1, 2, 0, 0)
+#ifdef TCG_TARGET_HAS_rot_i64
+DEF(rotl_i64, 1, 2, 0, 0)
+DEF(rotr_i64, 1, 2, 0, 0)
+#endif
+
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+#ifdef TCG_TARGET_HAS_ext8s_i64
+DEF(ext8s_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext16s_i64
+DEF(ext16s_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext32s_i64
+DEF(ext32s_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext8u_i64
+DEF(ext8u_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext16u_i64
+DEF(ext16u_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext32u_i64
+DEF(ext32u_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_bswap16_i64
+DEF(bswap16_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_bswap32_i64
+DEF(bswap32_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_bswap64_i64
+DEF(bswap64_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_not_i64
+DEF(not_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_neg_i64
+DEF(neg_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_andc_i64
+DEF(andc_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_orc_i64
+DEF(orc_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_eqv_i64
+DEF(eqv_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nand_i64
+DEF(nand_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nor_i64
+DEF(nor_i64, 1, 2, 0, 0)
+#endif
+#endif
+
+/* QEMU specific */
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+DEF(debug_insn_start, 0, 0, 2, 0)
+#else
+DEF(debug_insn_start, 0, 0, 1, 0)
+#endif
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+/* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op
+ constants must be defined */
+#if TCG_TARGET_REG_BITS == 32
+#if TARGET_LONG_BITS == 32
+DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_ld32, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+
+#if TARGET_LONG_BITS == 32
+DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+#if TARGET_LONG_BITS == 32
+DEF(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#else
+DEF(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+#endif
+
+#else /* TCG_TARGET_REG_BITS == 32 */
+
+DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+
+DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+
+#endif /* TCG_TARGET_REG_BITS != 32 */
+
+#undef DEF
diff --git a/src/recompiler/tcg/tcg-runtime.h b/src/recompiler/tcg/tcg-runtime.h
new file mode 100644
index 00000000..5615b133
--- /dev/null
+++ b/src/recompiler/tcg/tcg-runtime.h
@@ -0,0 +1,18 @@
+#ifndef TCG_RUNTIME_H
+#define TCG_RUNTIME_H
+
+/* tcg-runtime.c */
+int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2);
+int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2);
+uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2);
+uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2);
+
+int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2);
+int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2);
+int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2);
+int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2);
+int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2);
+uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2);
+uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2);
+
+#endif
diff --git a/src/recompiler/tcg/tcg.c b/src/recompiler/tcg/tcg.c
new file mode 100644
index 00000000..4e636099
--- /dev/null
+++ b/src/recompiler/tcg/tcg.c
@@ -0,0 +1,2212 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* define it to use liveness analysis (better code) */
+#define USE_LIVENESS_ANALYSIS
+
+#include "config.h"
+
+#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
+/* define it to suppress various consistency checks (faster) */
+#define NDEBUG
+#endif
+
+#ifndef VBOX
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+#ifdef _AIX
+#include <alloca.h>
+#endif
+#else /* VBOX */
+# include <stdio.h>
+# include "osdep.h"
+#endif /* VBOX */
+
+#include "qemu-common.h"
+#include "cache-utils.h"
+#include "host-utils.h"
+#include "qemu-timer.h"
+
+/* Note: the long term plan is to reduce the dependancies on the QEMU
+ CPU definitions. Currently they are used for qemu_ld/st
+ instructions */
+#define NO_CPU_IO_DEFS
+#include "cpu.h"
+#include "exec-all.h"
+
+#include "tcg-op.h"
+#include "elf.h"
+
+#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE)
+#error GUEST_BASE not supported on this host.
+#endif
+
+#ifdef VBOX
+/*
+ * Liveness analysis doesn't work well with 32-bit hosts and 64-bit targets,
+ * second element of the register pair to store 64-bit value is considered
+ * dead, it seems. */
+ /** @todo re-test this */
+# if defined(TARGET_X86_64) && (TCG_TARGET_REG_BITS == 32)
+# undef USE_LIVENESS_ANALYSIS
+# endif
+#endif /* VBOX */
+
+static void tcg_target_init(TCGContext *s);
+static void tcg_target_qemu_prologue(TCGContext *s);
+static void patch_reloc(uint8_t *code_ptr, int type,
+ tcg_target_long value, tcg_target_long addend);
+
+static TCGOpDef tcg_op_defs[] = {
+#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
+#include "tcg-opc.h"
+#undef DEF
+};
+
+static TCGRegSet tcg_target_available_regs[2];
+static TCGRegSet tcg_target_call_clobber_regs;
+
+/* XXX: move that inside the context */
+uint16_t *gen_opc_ptr;
+TCGArg *gen_opparam_ptr;
+
+static inline void tcg_out8(TCGContext *s, uint8_t v)
+{
+ *s->code_ptr++ = v;
+}
+
+static inline void tcg_out16(TCGContext *s, uint16_t v)
+{
+ *(uint16_t *)s->code_ptr = v;
+ s->code_ptr += 2;
+}
+
+static inline void tcg_out32(TCGContext *s, uint32_t v)
+{
+ *(uint32_t *)s->code_ptr = v;
+ s->code_ptr += 4;
+}
+
+/* label relocation processing */
+
+static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type,
+ int label_index, tcg_target_long addend)
+{
+ TCGLabel *l;
+ TCGRelocation *r;
+
+ l = &s->labels[label_index];
+ if (l->has_value) {
+ /* FIXME: This may break relocations on RISC targets that
+ modify instruction fields in place. The caller may not have
+ written the initial value. */
+ patch_reloc(code_ptr, type, l->u.value, addend);
+ } else {
+ /* add a new relocation entry */
+ r = tcg_malloc(sizeof(TCGRelocation));
+ r->type = type;
+ r->ptr = code_ptr;
+ r->addend = addend;
+ r->next = l->u.first_reloc;
+ l->u.first_reloc = r;
+ }
+}
+
+static void tcg_out_label(TCGContext *s, int label_index,
+ tcg_target_long value)
+{
+ TCGLabel *l;
+ TCGRelocation *r;
+
+ l = &s->labels[label_index];
+ if (l->has_value)
+ tcg_abort();
+ r = l->u.first_reloc;
+ while (r != NULL) {
+ patch_reloc(r->ptr, r->type, value, r->addend);
+ r = r->next;
+ }
+ l->has_value = 1;
+ l->u.value = value;
+}
+
+int gen_new_label(void)
+{
+ TCGContext *s = &tcg_ctx;
+ int idx;
+ TCGLabel *l;
+
+ if (s->nb_labels >= TCG_MAX_LABELS)
+ tcg_abort();
+ idx = s->nb_labels++;
+ l = &s->labels[idx];
+ l->has_value = 0;
+ l->u.first_reloc = NULL;
+ return idx;
+}
+
+#include "tcg-target.c"
+
+/* pool based memory allocation */
+void *tcg_malloc_internal(TCGContext *s, int size)
+{
+ TCGPool *p;
+ int pool_size;
+
+ if (size > TCG_POOL_CHUNK_SIZE) {
+ /* big malloc: insert a new pool (XXX: could optimize) */
+ p = qemu_malloc(sizeof(TCGPool) + size);
+ p->size = size;
+ if (s->pool_current)
+ s->pool_current->next = p;
+ else
+ s->pool_first = p;
+ p->next = s->pool_current;
+ } else {
+ p = s->pool_current;
+ if (!p) {
+ p = s->pool_first;
+ if (!p)
+ goto new_pool;
+ } else {
+ if (!p->next) {
+ new_pool:
+ pool_size = TCG_POOL_CHUNK_SIZE;
+ p = qemu_malloc(sizeof(TCGPool) + pool_size);
+ p->size = pool_size;
+ p->next = NULL;
+ if (s->pool_current)
+ s->pool_current->next = p;
+ else
+ s->pool_first = p;
+ } else {
+ p = p->next;
+ }
+ }
+ }
+ s->pool_current = p;
+ s->pool_cur = p->data + size;
+ s->pool_end = p->data + p->size;
+ return p->data;
+}
+
+void tcg_pool_reset(TCGContext *s)
+{
+ s->pool_cur = s->pool_end = NULL;
+ s->pool_current = NULL;
+}
+
+void tcg_context_init(TCGContext *s)
+{
+ int op, total_args, n;
+ TCGOpDef *def;
+ TCGArgConstraint *args_ct;
+ int *sorted_args;
+
+ memset(s, 0, sizeof(*s));
+ s->temps = s->static_temps;
+ s->nb_globals = 0;
+
+ /* Count total number of arguments and allocate the corresponding
+ space */
+ total_args = 0;
+ for(op = 0; op < NB_OPS; op++) {
+ def = &tcg_op_defs[op];
+ n = def->nb_iargs + def->nb_oargs;
+ total_args += n;
+ }
+
+ args_ct = qemu_malloc(sizeof(TCGArgConstraint) * total_args);
+ sorted_args = qemu_malloc(sizeof(int) * total_args);
+
+ for(op = 0; op < NB_OPS; op++) {
+ def = &tcg_op_defs[op];
+ def->args_ct = args_ct;
+ def->sorted_args = sorted_args;
+ n = def->nb_iargs + def->nb_oargs;
+ sorted_args += n;
+ args_ct += n;
+ }
+
+ tcg_target_init(s);
+}
+
+void tcg_prologue_init(TCGContext *s)
+{
+ /* init global prologue and epilogue */
+ s->code_buf = code_gen_prologue;
+ s->code_ptr = s->code_buf;
+ tcg_target_qemu_prologue(s);
+ flush_icache_range((uintptr_t)s->code_buf,
+ (uintptr_t)s->code_ptr);
+}
+
+void tcg_set_frame(TCGContext *s, int reg,
+ tcg_target_long start, tcg_target_long size)
+{
+ s->frame_start = start;
+ s->frame_end = start + size;
+ s->frame_reg = reg;
+}
+
+void tcg_func_start(TCGContext *s)
+{
+ int i;
+ tcg_pool_reset(s);
+ s->nb_temps = s->nb_globals;
+ for(i = 0; i < (TCG_TYPE_COUNT * 2); i++)
+ s->first_free_temp[i] = -1;
+ s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
+ s->nb_labels = 0;
+ s->current_frame_offset = s->frame_start;
+
+ gen_opc_ptr = gen_opc_buf;
+ gen_opparam_ptr = gen_opparam_buf;
+}
+
+static inline void tcg_temp_alloc(TCGContext *s, int n)
+{
+ if (n > TCG_MAX_TEMPS)
+ tcg_abort();
+}
+
+static inline int tcg_global_reg_new_internal(TCGType type, int reg,
+ const char *name)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int idx;
+
+#if TCG_TARGET_REG_BITS == 32
+ if (type != TCG_TYPE_I32)
+ tcg_abort();
+#endif
+ if (tcg_regset_test_reg(s->reserved_regs, reg))
+ tcg_abort();
+ idx = s->nb_globals;
+ tcg_temp_alloc(s, s->nb_globals + 1);
+ ts = &s->temps[s->nb_globals];
+ ts->base_type = type;
+ ts->type = type;
+ ts->fixed_reg = 1;
+ ts->reg = reg;
+ ts->name = name;
+ s->nb_globals++;
+ tcg_regset_set_reg(s->reserved_regs, reg);
+ return idx;
+}
+
+TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name)
+{
+ int idx;
+
+ idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name);
+ return MAKE_TCGV_I32(idx);
+}
+
+TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name)
+{
+ int idx;
+
+ idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name);
+ return MAKE_TCGV_I64(idx);
+}
+
+static inline int tcg_global_mem_new_internal(TCGType type, int reg,
+ tcg_target_long offset,
+ const char *name)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int idx;
+
+ idx = s->nb_globals;
+#if TCG_TARGET_REG_BITS == 32
+ if (type == TCG_TYPE_I64) {
+ char buf[64];
+ tcg_temp_alloc(s, s->nb_globals + 2);
+ ts = &s->temps[s->nb_globals];
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->fixed_reg = 0;
+ ts->mem_allocated = 1;
+ ts->mem_reg = reg;
+#ifdef TCG_TARGET_WORDS_BIGENDIAN
+ ts->mem_offset = offset + 4;
+#else
+ ts->mem_offset = offset;
+#endif
+ pstrcpy(buf, sizeof(buf), name);
+ pstrcat(buf, sizeof(buf), "_0");
+ ts->name = strdup(buf);
+ ts++;
+
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->fixed_reg = 0;
+ ts->mem_allocated = 1;
+ ts->mem_reg = reg;
+#ifdef TCG_TARGET_WORDS_BIGENDIAN
+ ts->mem_offset = offset;
+#else
+ ts->mem_offset = offset + 4;
+#endif
+ pstrcpy(buf, sizeof(buf), name);
+ pstrcat(buf, sizeof(buf), "_1");
+ ts->name = strdup(buf);
+
+ s->nb_globals += 2;
+ } else
+#endif
+ {
+ tcg_temp_alloc(s, s->nb_globals + 1);
+ ts = &s->temps[s->nb_globals];
+ ts->base_type = type;
+ ts->type = type;
+ ts->fixed_reg = 0;
+ ts->mem_allocated = 1;
+ ts->mem_reg = reg;
+ ts->mem_offset = offset;
+ ts->name = name;
+ s->nb_globals++;
+ }
+ return idx;
+}
+
+TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset,
+ const char *name)
+{
+ int idx;
+
+ idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
+ return MAKE_TCGV_I32(idx);
+}
+
+TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset,
+ const char *name)
+{
+ int idx;
+
+ idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
+ return MAKE_TCGV_I64(idx);
+}
+
+static inline int tcg_temp_new_internal(TCGType type, int temp_local)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int idx, k;
+
+ k = type;
+ if (temp_local)
+ k += TCG_TYPE_COUNT;
+ idx = s->first_free_temp[k];
+ if (idx != -1) {
+ /* There is already an available temp with the
+ right type */
+ ts = &s->temps[idx];
+ s->first_free_temp[k] = ts->next_free_temp;
+ ts->temp_allocated = 1;
+ assert(ts->temp_local == temp_local);
+ } else {
+ idx = s->nb_temps;
+#if TCG_TARGET_REG_BITS == 32
+ if (type == TCG_TYPE_I64) {
+ tcg_temp_alloc(s, s->nb_temps + 2);
+ ts = &s->temps[s->nb_temps];
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->temp_allocated = 1;
+ ts->temp_local = temp_local;
+ ts->name = NULL;
+ ts++;
+ ts->base_type = TCG_TYPE_I32;
+ ts->type = TCG_TYPE_I32;
+ ts->temp_allocated = 1;
+ ts->temp_local = temp_local;
+ ts->name = NULL;
+ s->nb_temps += 2;
+ } else
+#endif
+ {
+ tcg_temp_alloc(s, s->nb_temps + 1);
+ ts = &s->temps[s->nb_temps];
+ ts->base_type = type;
+ ts->type = type;
+ ts->temp_allocated = 1;
+ ts->temp_local = temp_local;
+ ts->name = NULL;
+ s->nb_temps++;
+ }
+ }
+ return idx;
+}
+
+TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
+{
+ int idx;
+
+ idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
+ return MAKE_TCGV_I32(idx);
+}
+
+TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
+{
+ int idx;
+
+ idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
+ return MAKE_TCGV_I64(idx);
+}
+
+static inline void tcg_temp_free_internal(int idx)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int k;
+
+ assert(idx >= s->nb_globals && idx < s->nb_temps);
+ ts = &s->temps[idx];
+ assert(ts->temp_allocated != 0);
+ ts->temp_allocated = 0;
+ k = ts->base_type;
+ if (ts->temp_local)
+ k += TCG_TYPE_COUNT;
+ ts->next_free_temp = s->first_free_temp[k];
+ s->first_free_temp[k] = idx;
+}
+
+void tcg_temp_free_i32(TCGv_i32 arg)
+{
+ tcg_temp_free_internal(GET_TCGV_I32(arg));
+}
+
+void tcg_temp_free_i64(TCGv_i64 arg)
+{
+ tcg_temp_free_internal(GET_TCGV_I64(arg));
+}
+
+TCGv_i32 tcg_const_i32(int32_t val)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, val);
+ return t0;
+}
+
+TCGv_i64 tcg_const_i64(int64_t val)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, val);
+ return t0;
+}
+
+TCGv_i32 tcg_const_local_i32(int32_t val)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_local_new_i32();
+ tcg_gen_movi_i32(t0, val);
+ return t0;
+}
+
+TCGv_i64 tcg_const_local_i64(int64_t val)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_local_new_i64();
+ tcg_gen_movi_i64(t0, val);
+ return t0;
+}
+
+void tcg_register_helper(void *func, const char *name)
+{
+ TCGContext *s = &tcg_ctx;
+ int n;
+ if ((s->nb_helpers + 1) > s->allocated_helpers) {
+ n = s->allocated_helpers;
+ if (n == 0) {
+ n = 4;
+ } else {
+ n *= 2;
+ }
+#ifdef VBOX
+ s->helpers = qemu_realloc(s->helpers, n * sizeof(TCGHelperInfo));
+#else
+ s->helpers = realloc(s->helpers, n * sizeof(TCGHelperInfo));
+#endif
+ s->allocated_helpers = n;
+ }
+ s->helpers[s->nb_helpers].func = (tcg_target_ulong)func;
+ s->helpers[s->nb_helpers].name = name;
+ s->nb_helpers++;
+}
+
+/* Note: we convert the 64 bit args to 32 bit and do some alignment
+ and endian swap. Maybe it would be better to do the alignment
+ and endian swap in tcg_reg_alloc_call(). */
+void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
+ int sizemask, TCGArg ret, int nargs, TCGArg *args)
+{
+#ifdef TCG_TARGET_I386
+ int call_type;
+#endif
+ int i;
+ int real_args;
+ int nb_rets;
+ TCGArg *nparam;
+
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+ for (i = 0; i < nargs; ++i) {
+ int is_64bit = sizemask & (1 << (i+1)*2);
+ int is_signed = sizemask & (2 << (i+1)*2);
+ if (!is_64bit) {
+ TCGv_i64 temp = tcg_temp_new_i64();
+ TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
+ if (is_signed) {
+ tcg_gen_ext32s_i64(temp, orig);
+ } else {
+ tcg_gen_ext32u_i64(temp, orig);
+ }
+ args[i] = GET_TCGV_I64(temp);
+ }
+ }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+
+ *gen_opc_ptr++ = INDEX_op_call;
+ nparam = gen_opparam_ptr++;
+#ifdef TCG_TARGET_I386
+ call_type = (flags & TCG_CALL_TYPE_MASK);
+#endif
+ if (ret != TCG_CALL_DUMMY_ARG) {
+#if TCG_TARGET_REG_BITS < 64
+ if (sizemask & 1) {
+#ifdef TCG_TARGET_WORDS_BIGENDIAN
+ *gen_opparam_ptr++ = ret + 1;
+ *gen_opparam_ptr++ = ret;
+#else
+ *gen_opparam_ptr++ = ret;
+ *gen_opparam_ptr++ = ret + 1;
+#endif
+ nb_rets = 2;
+ } else
+#endif
+ {
+ *gen_opparam_ptr++ = ret;
+ nb_rets = 1;
+ }
+ } else {
+ nb_rets = 0;
+ }
+ real_args = 0;
+ for (i = 0; i < nargs; i++) {
+#if TCG_TARGET_REG_BITS < 64
+ int is_64bit = sizemask & (1 << (i+1)*2);
+ if (is_64bit) {
+#ifdef TCG_TARGET_I386
+ /* REGPARM case: if the third parameter is 64 bit, it is
+ allocated on the stack */
+ if (i == 2 && call_type == TCG_CALL_TYPE_REGPARM) {
+ call_type = TCG_CALL_TYPE_REGPARM_2;
+ flags = (flags & ~TCG_CALL_TYPE_MASK) | call_type;
+ }
+#endif
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ /* some targets want aligned 64 bit args */
+ if (real_args & 1) {
+ *gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+ real_args++;
+ }
+#endif
+ /* If stack grows up, then we will be placing successive
+ arguments at lower addresses, which means we need to
+ reverse the order compared to how we would normally
+ treat either big or little-endian. For those arguments
+ that will wind up in registers, this still works for
+ HPPA (the only current STACK_GROWSUP target) since the
+ argument registers are *also* allocated in decreasing
+ order. If another such target is added, this logic may
+ have to get more complicated to differentiate between
+ stack arguments and register arguments. */
+#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
+ *gen_opparam_ptr++ = args[i] + 1;
+ *gen_opparam_ptr++ = args[i];
+#else
+ *gen_opparam_ptr++ = args[i];
+ *gen_opparam_ptr++ = args[i] + 1;
+#endif
+ real_args += 2;
+ continue;
+ }
+#endif /* TCG_TARGET_REG_BITS < 64 */
+
+ *gen_opparam_ptr++ = args[i];
+ real_args++;
+ }
+ *gen_opparam_ptr++ = GET_TCGV_PTR(func);
+
+ *gen_opparam_ptr++ = flags;
+
+ *nparam = (nb_rets << 16) | (real_args + 1);
+
+ /* total parameters, needed to go backward in the instruction stream */
+ *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+ for (i = 0; i < nargs; ++i) {
+ int is_64bit = sizemask & (1 << (i+1)*2);
+ if (!is_64bit) {
+ TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
+ tcg_temp_free_i64(temp);
+ }
+ }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+}
+
+#if TCG_TARGET_REG_BITS == 32
+void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
+ int c, int right, int arith)
+{
+ if (c == 0) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ } else if (c >= 32) {
+ c -= 32;
+ if (right) {
+ if (arith) {
+ tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
+ } else {
+ tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ }
+ } else {
+ tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
+ tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+ }
+ } else {
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ if (right) {
+ tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c);
+ if (arith)
+ tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c);
+ else
+ tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c);
+ tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0);
+ tcg_gen_mov_i32(TCGV_HIGH(ret), t1);
+ } else {
+ tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
+ /* Note: ret can be the same as arg1, so we use t1 */
+ tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c);
+ tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0);
+ tcg_gen_mov_i32(TCGV_LOW(ret), t1);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+#endif
+
+
+static void tcg_reg_alloc_start(TCGContext *s)
+{
+ int i;
+ TCGTemp *ts;
+ for(i = 0; i < s->nb_globals; i++) {
+ ts = &s->temps[i];
+ if (ts->fixed_reg) {
+ ts->val_type = TEMP_VAL_REG;
+ } else {
+ ts->val_type = TEMP_VAL_MEM;
+ }
+ }
+ for(i = s->nb_globals; i < s->nb_temps; i++) {
+ ts = &s->temps[i];
+ ts->val_type = TEMP_VAL_DEAD;
+ ts->mem_allocated = 0;
+ ts->fixed_reg = 0;
+ }
+ for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ s->reg_to_temp[i] = -1;
+ }
+}
+
+static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size,
+ int idx)
+{
+ TCGTemp *ts;
+
+ ts = &s->temps[idx];
+ if (idx < s->nb_globals) {
+ pstrcpy(buf, buf_size, ts->name);
+ } else {
+ if (ts->temp_local)
+ snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
+ else
+ snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
+ }
+ return buf;
+}
+
+char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg)
+{
+ return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg));
+}
+
+char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg)
+{
+ return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg));
+}
+
+static int helper_cmp(const void *p1, const void *p2)
+{
+ const TCGHelperInfo *th1 = p1;
+ const TCGHelperInfo *th2 = p2;
+ if (th1->func < th2->func)
+ return -1;
+ else if (th1->func == th2->func)
+ return 0;
+ else
+ return 1;
+}
+
+/* find helper definition (Note: A hash table would be better) */
+static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val)
+{
+ int m, m_min, m_max;
+ TCGHelperInfo *th;
+ tcg_target_ulong v;
+
+ if (unlikely(!s->helpers_sorted)) {
+#ifdef VBOX
+ qemu_qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo),
+ helper_cmp);
+#else
+ qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo),
+ helper_cmp);
+#endif
+ s->helpers_sorted = 1;
+ }
+
+ /* binary search */
+ m_min = 0;
+ m_max = s->nb_helpers - 1;
+ while (m_min <= m_max) {
+ m = (m_min + m_max) >> 1;
+ th = &s->helpers[m];
+ v = th->func;
+ if (v == val)
+ return th;
+ else if (val < v) {
+ m_max = m - 1;
+ } else {
+ m_min = m + 1;
+ }
+ }
+ return NULL;
+}
+
+static const char * const cond_name[] =
+{
+ [TCG_COND_EQ] = "eq",
+ [TCG_COND_NE] = "ne",
+ [TCG_COND_LT] = "lt",
+ [TCG_COND_GE] = "ge",
+ [TCG_COND_LE] = "le",
+ [TCG_COND_GT] = "gt",
+ [TCG_COND_LTU] = "ltu",
+ [TCG_COND_GEU] = "geu",
+ [TCG_COND_LEU] = "leu",
+ [TCG_COND_GTU] = "gtu"
+};
+
+void tcg_dump_ops(TCGContext *s, FILE *outfile)
+{
+ const uint16_t *opc_ptr;
+ const TCGArg *args;
+ TCGArg arg;
+ TCGOpcode c;
+ int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
+ const TCGOpDef *def;
+ char buf[128];
+
+ first_insn = 1;
+ opc_ptr = gen_opc_buf;
+ args = gen_opparam_buf;
+ while (opc_ptr < gen_opc_ptr) {
+ c = *opc_ptr++;
+ def = &tcg_op_defs[c];
+ if (c == INDEX_op_debug_insn_start) {
+ uint64_t pc;
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ pc = ((uint64_t)args[1] << 32) | args[0];
+#else
+ pc = args[0];
+#endif
+ if (!first_insn)
+ fprintf(outfile, "\n");
+ fprintf(outfile, " ---- 0x%" PRIx64, pc);
+ first_insn = 0;
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+ } else if (c == INDEX_op_call) {
+ TCGArg arg;
+
+ /* variable number of arguments */
+ arg = *args++;
+ nb_oargs = arg >> 16;
+ nb_iargs = arg & 0xffff;
+ nb_cargs = def->nb_cargs;
+
+ fprintf(outfile, " %s ", def->name);
+
+ /* function name */
+ fprintf(outfile, "%s",
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + nb_iargs - 1]));
+ /* flags */
+ fprintf(outfile, ",$0x%" TCG_PRIlx,
+ args[nb_oargs + nb_iargs]);
+ /* nb out args */
+ fprintf(outfile, ",$%d", nb_oargs);
+ for(i = 0; i < nb_oargs; i++) {
+ fprintf(outfile, ",");
+ fprintf(outfile, "%s",
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), args[i]));
+ }
+ for(i = 0; i < (nb_iargs - 1); i++) {
+ fprintf(outfile, ",");
+ if (args[nb_oargs + i] == TCG_CALL_DUMMY_ARG) {
+ fprintf(outfile, "<dummy>");
+ } else {
+ fprintf(outfile, "%s",
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + i]));
+ }
+ }
+ } else if (c == INDEX_op_movi_i32
+#if TCG_TARGET_REG_BITS == 64
+ || c == INDEX_op_movi_i64
+#endif
+ ) {
+ tcg_target_ulong val;
+ TCGHelperInfo *th;
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+ fprintf(outfile, " %s %s,$", def->name,
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0]));
+ val = args[1];
+ th = tcg_find_helper(s, val);
+ if (th) {
+ fprintf(outfile, "%s", th->name);
+ } else {
+ if (c == INDEX_op_movi_i32)
+ fprintf(outfile, "0x%x", (uint32_t)val);
+ else
+ fprintf(outfile, "0x%" PRIx64 , (uint64_t)val);
+ }
+ } else {
+ fprintf(outfile, " %s ", def->name);
+ if (c == INDEX_op_nopn) {
+ /* variable number of arguments */
+ nb_cargs = *args;
+ nb_oargs = 0;
+ nb_iargs = 0;
+ } else {
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+ }
+
+ k = 0;
+ for(i = 0; i < nb_oargs; i++) {
+ if (k != 0)
+ fprintf(outfile, ",");
+ fprintf(outfile, "%s",
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
+ }
+ for(i = 0; i < nb_iargs; i++) {
+ if (k != 0)
+ fprintf(outfile, ",");
+ fprintf(outfile, "%s",
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
+ }
+ switch (c) {
+ case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_brcond_i64:
+#endif
+ case INDEX_op_setcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_setcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_setcond_i64:
+#endif
+ if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
+ fprintf(outfile, ",%s", cond_name[args[k++]]);
+ else
+ fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
+ i = 1;
+ break;
+ default:
+ i = 0;
+ break;
+ }
+ for(; i < nb_cargs; i++) {
+ if (k != 0)
+ fprintf(outfile, ",");
+ arg = args[k++];
+ fprintf(outfile, "$0x%" TCG_PRIlx, arg);
+ }
+ }
+ fprintf(outfile, "\n");
+ args += nb_iargs + nb_oargs + nb_cargs;
+ }
+}
+
+/* we give more priority to constraints with less registers */
+static int get_constraint_priority(const TCGOpDef *def, int k)
+{
+ const TCGArgConstraint *arg_ct;
+
+ int i, n;
+ arg_ct = &def->args_ct[k];
+ if (arg_ct->ct & TCG_CT_ALIAS) {
+ /* an alias is equivalent to a single register */
+ n = 1;
+ } else {
+ if (!(arg_ct->ct & TCG_CT_REG))
+ return 0;
+ n = 0;
+ for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ if (tcg_regset_test_reg(arg_ct->u.regs, i))
+ n++;
+ }
+ }
+ return TCG_TARGET_NB_REGS - n + 1;
+}
+
+/* sort from highest priority to lowest */
+static void sort_constraints(TCGOpDef *def, int start, int n)
+{
+ int i, j, p1, p2, tmp;
+
+ for(i = 0; i < n; i++)
+ def->sorted_args[start + i] = start + i;
+ if (n <= 1)
+ return;
+ for(i = 0; i < n - 1; i++) {
+ for(j = i + 1; j < n; j++) {
+ p1 = get_constraint_priority(def, def->sorted_args[start + i]);
+ p2 = get_constraint_priority(def, def->sorted_args[start + j]);
+ if (p1 < p2) {
+ tmp = def->sorted_args[start + i];
+ def->sorted_args[start + i] = def->sorted_args[start + j];
+ def->sorted_args[start + j] = tmp;
+ }
+ }
+ }
+}
+
+void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
+{
+ TCGOpcode op;
+ TCGOpDef *def;
+ const char *ct_str;
+ int i, nb_args;
+
+ for(;;) {
+ if (tdefs->op == (TCGOpcode)-1)
+ break;
+ op = tdefs->op;
+ assert((unsigned)op < (unsigned)NB_OPS);
+ def = &tcg_op_defs[op];
+#if defined(CONFIG_DEBUG_TCG)
+ /* Duplicate entry in op definitions? */
+ assert(!def->used);
+ def->used = 1;
+#endif
+ nb_args = def->nb_iargs + def->nb_oargs;
+ for(i = 0; i < nb_args; i++) {
+ ct_str = tdefs->args_ct_str[i];
+ /* Incomplete TCGTargetOpDef entry? */
+ assert(ct_str != NULL);
+ tcg_regset_clear(def->args_ct[i].u.regs);
+ def->args_ct[i].ct = 0;
+ if (ct_str[0] >= '0' && ct_str[0] <= '9') {
+ int oarg;
+ oarg = ct_str[0] - '0';
+ assert(oarg < def->nb_oargs);
+ assert(def->args_ct[oarg].ct & TCG_CT_REG);
+ /* TCG_CT_ALIAS is for the output arguments. The input
+ argument is tagged with TCG_CT_IALIAS. */
+ def->args_ct[i] = def->args_ct[oarg];
+ def->args_ct[oarg].ct = TCG_CT_ALIAS;
+ def->args_ct[oarg].alias_index = i;
+ def->args_ct[i].ct |= TCG_CT_IALIAS;
+ def->args_ct[i].alias_index = oarg;
+ } else {
+ for(;;) {
+ if (*ct_str == '\0')
+ break;
+ switch(*ct_str) {
+ case 'i':
+ def->args_ct[i].ct |= TCG_CT_CONST;
+ ct_str++;
+ break;
+ default:
+ if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
+ fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
+ ct_str, i, def->name);
+#ifndef VBOX
+ exit(1);
+#else
+ tcg_exit(1);
+#endif
+ }
+ }
+ }
+ }
+ }
+
+ /* TCGTargetOpDef entry with too much information? */
+ assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
+
+ /* sort the constraints (XXX: this is just an heuristic) */
+ sort_constraints(def, 0, def->nb_oargs);
+ sort_constraints(def, def->nb_oargs, def->nb_iargs);
+
+#if 0
+ {
+ int i;
+
+ printf("%s: sorted=", def->name);
+ for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
+ printf(" %d", def->sorted_args[i]);
+ printf("\n");
+ }
+#endif
+ tdefs++;
+ }
+
+#if defined(CONFIG_DEBUG_TCG)
+ i = 0;
+ for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) {
+ if (op < INDEX_op_call || op == INDEX_op_debug_insn_start) {
+ /* Wrong entry in op definitions? */
+ if (tcg_op_defs[op].used) {
+ fprintf(stderr, "Invalid op definition for %s\n",
+ tcg_op_defs[op].name);
+ i = 1;
+ }
+ } else {
+ /* Missing entry in op definitions? */
+ if (!tcg_op_defs[op].used) {
+ fprintf(stderr, "Missing op definition for %s\n",
+ tcg_op_defs[op].name);
+ i = 1;
+ }
+ }
+ }
+ if (i == 1) {
+ tcg_abort();
+ }
+#endif
+}
+
+#ifdef USE_LIVENESS_ANALYSIS
+
+/* set a nop for an operation using 'nb_args' */
+static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
+ TCGArg *args, int nb_args)
+{
+ if (nb_args == 0) {
+ *opc_ptr = INDEX_op_nop;
+ } else {
+ *opc_ptr = INDEX_op_nopn;
+ args[0] = nb_args;
+ args[nb_args - 1] = nb_args;
+ }
+}
+
+/* liveness analysis: end of function: globals are live, temps are
+ dead. */
+/* XXX: at this stage, not used as there would be little gains because
+ most TBs end with a conditional jump. */
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps)
+{
+ memset(dead_temps, 0, s->nb_globals);
+ memset(dead_temps + s->nb_globals, 1, s->nb_temps - s->nb_globals);
+}
+
+/* liveness analysis: end of basic block: globals are live, temps are
+ dead, local temps are live. */
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps)
+{
+ int i;
+ TCGTemp *ts;
+
+ memset(dead_temps, 0, s->nb_globals);
+ ts = &s->temps[s->nb_globals];
+ for(i = s->nb_globals; i < s->nb_temps; i++) {
+ if (ts->temp_local)
+ dead_temps[i] = 0;
+ else
+ dead_temps[i] = 1;
+ ts++;
+ }
+}
+
+/* Liveness analysis : update the opc_dead_iargs array to tell if a
+ given input arguments is dead. Instructions updating dead
+ temporaries are removed. */
+static void tcg_liveness_analysis(TCGContext *s)
+{
+ int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
+ TCGOpcode op;
+ TCGArg *args;
+ const TCGOpDef *def;
+ uint8_t *dead_temps;
+ unsigned int dead_iargs;
+
+ gen_opc_ptr++; /* skip end */
+
+ nb_ops = gen_opc_ptr - gen_opc_buf;
+
+ s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
+
+ dead_temps = tcg_malloc(s->nb_temps);
+ memset(dead_temps, 1, s->nb_temps);
+
+ args = gen_opparam_ptr;
+ op_index = nb_ops - 1;
+ while (op_index >= 0) {
+ op = gen_opc_buf[op_index];
+ def = &tcg_op_defs[op];
+ switch(op) {
+ case INDEX_op_call:
+ {
+ int call_flags;
+
+ nb_args = args[-1];
+ args -= nb_args;
+ nb_iargs = args[0] & 0xffff;
+ nb_oargs = args[0] >> 16;
+ args++;
+ call_flags = args[nb_oargs + nb_iargs];
+
+ /* pure functions can be removed if their result is not
+ used */
+ if (call_flags & TCG_CALL_PURE) {
+ for(i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (!dead_temps[arg])
+ goto do_not_remove_call;
+ }
+ tcg_set_nop(s, gen_opc_buf + op_index,
+ args - 1, nb_args);
+ } else {
+ do_not_remove_call:
+
+ /* output args are dead */
+ for(i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ dead_temps[arg] = 1;
+ }
+
+ if (!(call_flags & TCG_CALL_CONST)) {
+ /* globals are live (they may be used by the call) */
+ memset(dead_temps, 0, s->nb_globals);
+ }
+
+ /* input args are live */
+ dead_iargs = 0;
+ for(i = 0; i < nb_iargs; i++) {
+ arg = args[i + nb_oargs];
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ if (dead_temps[arg]) {
+ dead_iargs |= (1 << i);
+ }
+ dead_temps[arg] = 0;
+ }
+ }
+ s->op_dead_iargs[op_index] = dead_iargs;
+ }
+ args--;
+ }
+ break;
+ case INDEX_op_set_label:
+ args--;
+ /* mark end of basic block */
+ tcg_la_bb_end(s, dead_temps);
+ break;
+ case INDEX_op_debug_insn_start:
+ args -= def->nb_args;
+ break;
+ case INDEX_op_nopn:
+ nb_args = args[-1];
+ args -= nb_args;
+ break;
+ case INDEX_op_discard:
+ args--;
+ /* mark the temporary as dead */
+ dead_temps[args[0]] = 1;
+ break;
+ case INDEX_op_end:
+ break;
+ /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+ default:
+ args -= def->nb_args;
+ nb_iargs = def->nb_iargs;
+ nb_oargs = def->nb_oargs;
+
+ /* Test if the operation can be removed because all
+ its outputs are dead. We assume that nb_oargs == 0
+ implies side effects */
+ if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
+ for(i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (!dead_temps[arg])
+ goto do_not_remove;
+ }
+ tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
+#ifdef CONFIG_PROFILER
+ s->del_op_count++;
+#endif
+ } else {
+ do_not_remove:
+
+ /* output args are dead */
+ for(i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ dead_temps[arg] = 1;
+ }
+
+ /* if end of basic block, update */
+ if (def->flags & TCG_OPF_BB_END) {
+ tcg_la_bb_end(s, dead_temps);
+ } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
+ /* globals are live */
+ memset(dead_temps, 0, s->nb_globals);
+ }
+
+ /* input args are live */
+ dead_iargs = 0;
+ for(i = 0; i < nb_iargs; i++) {
+ arg = args[i + nb_oargs];
+ if (dead_temps[arg]) {
+ dead_iargs |= (1 << i);
+ }
+ dead_temps[arg] = 0;
+ }
+ s->op_dead_iargs[op_index] = dead_iargs;
+ }
+ break;
+ }
+ op_index--;
+ }
+
+ if (args != gen_opparam_buf)
+ tcg_abort();
+}
+#else
+/* dummy liveness analysis */
+static void tcg_liveness_analysis(TCGContext *s)
+{
+ int nb_ops;
+ nb_ops = gen_opc_ptr - gen_opc_buf;
+
+ s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
+ memset(s->op_dead_iargs, 0, nb_ops * sizeof(uint16_t));
+}
+#endif
+
+#ifndef NDEBUG
+static void dump_regs(TCGContext *s)
+{
+ TCGTemp *ts;
+ int i;
+ char buf[64];
+
+ for(i = 0; i < s->nb_temps; i++) {
+ ts = &s->temps[i];
+ printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
+ switch(ts->val_type) {
+ case TEMP_VAL_REG:
+ printf("%s", tcg_target_reg_names[ts->reg]);
+ break;
+ case TEMP_VAL_MEM:
+ printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]);
+ break;
+ case TEMP_VAL_CONST:
+ printf("$0x%" TCG_PRIlx, ts->val);
+ break;
+ case TEMP_VAL_DEAD:
+ printf("D");
+ break;
+ default:
+ printf("???");
+ break;
+ }
+ printf("\n");
+ }
+
+ for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ if (s->reg_to_temp[i] >= 0) {
+ printf("%s: %s\n",
+ tcg_target_reg_names[i],
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
+ }
+ }
+}
+
+static void check_regs(TCGContext *s)
+{
+ int reg, k;
+ TCGTemp *ts;
+ char buf[64];
+
+ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ k = s->reg_to_temp[reg];
+ if (k >= 0) {
+ ts = &s->temps[k];
+ if (ts->val_type != TEMP_VAL_REG ||
+ ts->reg != reg) {
+ printf("Inconsistency for register %s:\n",
+ tcg_target_reg_names[reg]);
+ goto fail;
+ }
+ }
+ }
+ for(k = 0; k < s->nb_temps; k++) {
+ ts = &s->temps[k];
+ if (ts->val_type == TEMP_VAL_REG &&
+ !ts->fixed_reg &&
+ s->reg_to_temp[ts->reg] != k) {
+ printf("Inconsistency for temp %s:\n",
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
+ fail:
+ printf("reg state:\n");
+ dump_regs(s);
+ tcg_abort();
+ }
+ }
+}
+#endif
+
+static void temp_allocate_frame(TCGContext *s, int temp)
+{
+ TCGTemp *ts;
+ ts = &s->temps[temp];
+ s->current_frame_offset = (s->current_frame_offset + sizeof(tcg_target_long) - 1) & ~(sizeof(tcg_target_long) - 1);
+#ifndef VBOX
+ if (s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end)
+#else
+ if ((tcg_target_long)s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end)
+#endif
+ tcg_abort();
+ ts->mem_offset = s->current_frame_offset;
+ ts->mem_reg = s->frame_reg;
+ ts->mem_allocated = 1;
+ s->current_frame_offset += sizeof(tcg_target_long);
+}
+
+/* free register 'reg' by spilling the corresponding temporary if necessary */
+static void tcg_reg_free(TCGContext *s, int reg)
+{
+ TCGTemp *ts;
+ int temp;
+
+ temp = s->reg_to_temp[reg];
+ if (temp != -1) {
+ ts = &s->temps[temp];
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (!ts->mem_coherent) {
+ if (!ts->mem_allocated)
+ temp_allocate_frame(s, temp);
+ tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ }
+ ts->val_type = TEMP_VAL_MEM;
+ s->reg_to_temp[reg] = -1;
+ }
+}
+
+/* Allocate a register belonging to reg1 & ~reg2 */
+static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
+{
+ int i, reg;
+ TCGRegSet reg_ct;
+
+ tcg_regset_andnot(reg_ct, reg1, reg2);
+
+ /* first try free registers */
+ for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
+ reg = tcg_target_reg_alloc_order[i];
+ if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1)
+ return reg;
+ }
+
+ /* XXX: do better spill choice */
+ for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
+ reg = tcg_target_reg_alloc_order[i];
+ if (tcg_regset_test_reg(reg_ct, reg)) {
+ tcg_reg_free(s, reg);
+ return reg;
+ }
+ }
+
+ tcg_abort();
+}
+
+/* save a temporary to memory. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+{
+ TCGTemp *ts;
+ int reg;
+
+ ts = &s->temps[temp];
+ if (!ts->fixed_reg) {
+ switch(ts->val_type) {
+ case TEMP_VAL_REG:
+ tcg_reg_free(s, ts->reg);
+ break;
+ case TEMP_VAL_DEAD:
+ ts->val_type = TEMP_VAL_MEM;
+ break;
+ case TEMP_VAL_CONST:
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ allocated_regs);
+ if (!ts->mem_allocated)
+ temp_allocate_frame(s, temp);
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ ts->val_type = TEMP_VAL_MEM;
+ break;
+ case TEMP_VAL_MEM:
+ break;
+ default:
+ tcg_abort();
+ }
+ }
+}
+
+/* save globals to their cannonical location and assume they can be
+ modified be the following code. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i;
+
+ for(i = 0; i < s->nb_globals; i++) {
+ temp_save(s, i, allocated_regs);
+ }
+}
+
+/* at the end of a basic block, we assume all temporaries are dead and
+ all globals are stored at their canonical location. */
+static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
+{
+ TCGTemp *ts;
+ int i;
+
+ for(i = s->nb_globals; i < s->nb_temps; i++) {
+ ts = &s->temps[i];
+ if (ts->temp_local) {
+ temp_save(s, i, allocated_regs);
+ } else {
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = -1;
+ }
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ }
+
+ save_globals(s, allocated_regs);
+}
+
+#define IS_DEAD_IARG(n) ((dead_iargs >> (n)) & 1)
+
+static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
+{
+ TCGTemp *ots;
+ tcg_target_ulong val;
+
+ ots = &s->temps[args[0]];
+ val = args[1];
+
+ if (ots->fixed_reg) {
+ /* for fixed registers, we do not do any constant
+ propagation */
+ tcg_out_movi(s, ots->type, ots->reg, val);
+ } else {
+ /* The movi is not explicitly generated here */
+ if (ots->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ots->reg] = -1;
+ ots->val_type = TEMP_VAL_CONST;
+ ots->val = val;
+ }
+}
+
+static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
+ const TCGArg *args,
+ unsigned int dead_iargs)
+{
+ TCGTemp *ts, *ots;
+ int reg;
+ const TCGArgConstraint *arg_ct;
+
+ ots = &s->temps[args[0]];
+ ts = &s->temps[args[1]];
+ arg_ct = &def->args_ct[0];
+
+ /* XXX: always mark arg dead if IS_DEAD_IARG(0) */
+ if (ts->val_type == TEMP_VAL_REG) {
+ if (IS_DEAD_IARG(0) && !ts->fixed_reg && !ots->fixed_reg) {
+ /* the mov can be suppressed */
+ if (ots->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ots->reg] = -1;
+ reg = ts->reg;
+ s->reg_to_temp[reg] = -1;
+ ts->val_type = TEMP_VAL_DEAD;
+ } else {
+ if (ots->val_type == TEMP_VAL_REG) {
+ reg = ots->reg;
+ } else {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
+ }
+ if (ts->reg != reg) {
+ tcg_out_mov(s, ots->type, reg, ts->reg);
+ }
+ }
+ } else if (ts->val_type == TEMP_VAL_MEM) {
+ if (ots->val_type == TEMP_VAL_REG) {
+ reg = ots->reg;
+ } else {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
+ }
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ if (ots->fixed_reg) {
+ reg = ots->reg;
+ tcg_out_movi(s, ots->type, reg, ts->val);
+ } else {
+ /* propagate constant */
+ if (ots->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ots->reg] = -1;
+ ots->val_type = TEMP_VAL_CONST;
+ ots->val = ts->val;
+ return;
+ }
+ } else {
+ tcg_abort();
+ }
+ s->reg_to_temp[reg] = args[0];
+ ots->reg = reg;
+ ots->val_type = TEMP_VAL_REG;
+ ots->mem_coherent = 0;
+}
+
+static void tcg_reg_alloc_op(TCGContext *s,
+ const TCGOpDef *def, TCGOpcode opc,
+ const TCGArg *args,
+ unsigned int dead_iargs)
+{
+ TCGRegSet allocated_regs;
+ int i, k, nb_iargs, nb_oargs, reg;
+ TCGArg arg;
+ const TCGArgConstraint *arg_ct;
+ TCGTemp *ts;
+ TCGArg new_args[TCG_MAX_OP_ARGS];
+ int const_args[TCG_MAX_OP_ARGS];
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+
+ /* copy constants */
+ memcpy(new_args + nb_oargs + nb_iargs,
+ args + nb_oargs + nb_iargs,
+ sizeof(TCGArg) * def->nb_cargs);
+
+ /* satisfy input constraints */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ for(k = 0; k < nb_iargs; k++) {
+ i = def->sorted_args[nb_oargs + k];
+ arg = args[i];
+ arg_ct = &def->args_ct[i];
+ ts = &s->temps[arg];
+ if (ts->val_type == TEMP_VAL_MEM) {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 1;
+ s->reg_to_temp[reg] = arg;
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ if (tcg_target_const_match(ts->val, arg_ct)) {
+ /* constant is OK for instruction */
+ const_args[i] = 1;
+ new_args[i] = ts->val;
+ goto iarg_end;
+ } else {
+ /* need to move to a register */
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ }
+ }
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (arg_ct->ct & TCG_CT_IALIAS) {
+ if (ts->fixed_reg) {
+ /* if fixed register, we must allocate a new register
+ if the alias is not the same register */
+ if (arg != args[arg_ct->alias_index])
+ goto allocate_in_reg;
+ } else {
+ /* if the input is aliased to an output and if it is
+ not dead after the instruction, we must allocate
+ a new register and move it */
+ if (!IS_DEAD_IARG(i - nb_oargs))
+ goto allocate_in_reg;
+ }
+ }
+ reg = ts->reg;
+ if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
+ /* nothing to do : the constraint is satisfied */
+ } else {
+ allocate_in_reg:
+ /* allocate a new register matching the constraint
+ and move the temporary register into it */
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_mov(s, ts->type, reg, ts->reg);
+ }
+ new_args[i] = reg;
+ const_args[i] = 0;
+ tcg_regset_set_reg(allocated_regs, reg);
+ iarg_end: ;
+ }
+
+ if (def->flags & TCG_OPF_BB_END) {
+ tcg_reg_alloc_bb_end(s, allocated_regs);
+ } else {
+ /* mark dead temporaries and free the associated registers */
+ for(i = 0; i < nb_iargs; i++) {
+ arg = args[nb_oargs + i];
+ if (IS_DEAD_IARG(i)) {
+ ts = &s->temps[arg];
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ts->reg] = -1;
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ }
+ }
+
+ if (def->flags & TCG_OPF_CALL_CLOBBER) {
+ /* XXX: permit generic clobber register list ? */
+ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
+ tcg_reg_free(s, reg);
+ }
+ }
+ /* XXX: for load/store we could do that only for the slow path
+ (i.e. when a memory callback is called) */
+
+ /* store globals and free associated registers (we assume the insn
+ can modify any global. */
+ save_globals(s, allocated_regs);
+ }
+
+ /* satisfy the output constraints */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ for(k = 0; k < nb_oargs; k++) {
+ i = def->sorted_args[k];
+ arg = args[i];
+ arg_ct = &def->args_ct[i];
+ ts = &s->temps[arg];
+ if (arg_ct->ct & TCG_CT_ALIAS) {
+ reg = new_args[arg_ct->alias_index];
+ } else {
+ /* if fixed register, we try to use it */
+ reg = ts->reg;
+ if (ts->fixed_reg &&
+ tcg_regset_test_reg(arg_ct->u.regs, reg)) {
+ goto oarg_end;
+ }
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ }
+ tcg_regset_set_reg(allocated_regs, reg);
+ /* if a fixed register is used, then a move will be done afterwards */
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ts->reg] = -1;
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ /* temp value is modified, so the value kept in memory is
+ potentially not the same */
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ }
+ oarg_end:
+ new_args[i] = reg;
+ }
+ }
+
+ /* emit instruction */
+ tcg_out_op(s, opc, new_args, const_args);
+
+ /* move the outputs in the correct register if needed */
+ for(i = 0; i < nb_oargs; i++) {
+ ts = &s->temps[args[i]];
+ reg = new_args[i];
+ if (ts->fixed_reg && ts->reg != reg) {
+ tcg_out_mov(s, ts->type, ts->reg, reg);
+ }
+ }
+}
+
+#ifdef TCG_TARGET_STACK_GROWSUP
+#define STACK_DIR(x) (-(x))
+#else
+#define STACK_DIR(x) (x)
+#endif
+
+static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
+ TCGOpcode opc, const TCGArg *args,
+ unsigned int dead_iargs)
+{
+ int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
+ TCGArg arg, func_arg;
+ TCGTemp *ts;
+ tcg_target_long stack_offset, call_stack_size, func_addr;
+ int const_func_arg, allocate_args;
+ TCGRegSet allocated_regs;
+ const TCGArgConstraint *arg_ct;
+
+ arg = *args++;
+
+ nb_oargs = arg >> 16;
+ nb_iargs = arg & 0xffff;
+ nb_params = nb_iargs - 1;
+
+ flags = args[nb_oargs + nb_iargs];
+
+ nb_regs = tcg_target_get_call_iarg_regs_count(flags);
+ if (nb_regs > nb_params)
+ nb_regs = nb_params;
+
+ /* assign stack slots first */
+ /* XXX: preallocate call stack */
+ call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
+ call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
+ ~(TCG_TARGET_STACK_ALIGN - 1);
+ allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
+ if (allocate_args) {
+ tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size));
+ }
+
+ stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
+ for(i = nb_regs; i < nb_params; i++) {
+ arg = args[nb_oargs + i];
+#ifdef TCG_TARGET_STACK_GROWSUP
+ stack_offset -= sizeof(tcg_target_long);
+#endif
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ ts = &s->temps[arg];
+ if (ts->val_type == TEMP_VAL_REG) {
+ tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
+ } else if (ts->val_type == TEMP_VAL_MEM) {
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ s->reserved_regs);
+ /* XXX: not correct if reading values from the stack */
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ s->reserved_regs);
+ /* XXX: sign extend may be needed on some targets */
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
+ } else {
+ tcg_abort();
+ }
+ }
+#ifndef TCG_TARGET_STACK_GROWSUP
+ stack_offset += sizeof(tcg_target_long);
+#endif
+ }
+
+ /* assign input registers */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ for(i = 0; i < nb_regs; i++) {
+ arg = args[nb_oargs + i];
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ ts = &s->temps[arg];
+ reg = tcg_target_call_iarg_regs[i];
+ tcg_reg_free(s, reg);
+ if (ts->val_type == TEMP_VAL_REG) {
+ if (ts->reg != reg) {
+ tcg_out_mov(s, ts->type, reg, ts->reg);
+ }
+ } else if (ts->val_type == TEMP_VAL_MEM) {
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ /* XXX: sign extend ? */
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ } else {
+ tcg_abort();
+ }
+ tcg_regset_set_reg(allocated_regs, reg);
+ }
+ }
+
+ /* assign function address */
+ func_arg = args[nb_oargs + nb_iargs - 1];
+ arg_ct = &def->args_ct[0];
+ ts = &s->temps[func_arg];
+ func_addr = ts->val;
+ const_func_arg = 0;
+ if (ts->val_type == TEMP_VAL_MEM) {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ func_arg = reg;
+ tcg_regset_set_reg(allocated_regs, reg);
+ } else if (ts->val_type == TEMP_VAL_REG) {
+ reg = ts->reg;
+ if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_mov(s, ts->type, reg, ts->reg);
+ }
+ func_arg = reg;
+ tcg_regset_set_reg(allocated_regs, reg);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ if (tcg_target_const_match(func_addr, arg_ct)) {
+ const_func_arg = 1;
+ func_arg = func_addr;
+ } else {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_movi(s, ts->type, reg, func_addr);
+ func_arg = reg;
+ tcg_regset_set_reg(allocated_regs, reg);
+ }
+ } else {
+ tcg_abort();
+ }
+
+
+ /* mark dead temporaries and free the associated registers */
+ for(i = 0; i < nb_iargs; i++) {
+ arg = args[nb_oargs + i];
+ if (IS_DEAD_IARG(i)) {
+ ts = &s->temps[arg];
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ts->reg] = -1;
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ }
+ }
+
+ /* clobber call registers */
+ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
+ tcg_reg_free(s, reg);
+ }
+ }
+
+ /* store globals and free associated registers (we assume the call
+ can modify any global. */
+ if (!(flags & TCG_CALL_CONST)) {
+ save_globals(s, allocated_regs);
+ }
+
+ tcg_out_op(s, opc, &func_arg, &const_func_arg);
+
+ if (allocate_args) {
+ tcg_out_addi(s, TCG_REG_CALL_STACK, STACK_DIR(call_stack_size));
+ }
+
+ /* assign output registers and emit moves if needed */
+ for(i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ ts = &s->temps[arg];
+ reg = tcg_target_call_oarg_regs[i];
+ assert(s->reg_to_temp[reg] == -1);
+ if (ts->fixed_reg) {
+ if (ts->reg != reg) {
+ tcg_out_mov(s, ts->type, ts->reg, reg);
+ }
+ } else {
+ if (ts->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ts->reg] = -1;
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ }
+ }
+
+ return nb_iargs + nb_oargs + def->nb_cargs + 1;
+}
+
+#ifdef CONFIG_PROFILER
+
+static int64_t tcg_table_op_count[NB_OPS];
+
+static void dump_op_count(void)
+{
+ int i;
+ FILE *f;
+ f = fopen("/tmp/op.log", "w");
+ for(i = INDEX_op_end; i < NB_OPS; i++) {
+ fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, tcg_table_op_count[i]);
+ }
+ fclose(f);
+}
+#endif
+
+
+static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
+ intptr_t search_pc)
+{
+ TCGOpcode opc;
+ int op_index;
+ const TCGOpDef *def;
+ unsigned int dead_iargs;
+ const TCGArg *args;
+
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+ qemu_log("OP:\n");
+ tcg_dump_ops(s, logfile);
+ qemu_log("\n");
+ }
+#endif
+
+#ifdef CONFIG_PROFILER
+ s->la_time -= profile_getclock();
+#endif
+ tcg_liveness_analysis(s);
+#ifdef CONFIG_PROFILER
+ s->la_time += profile_getclock();
+#endif
+
+#ifdef DEBUG_DISAS
+# ifdef USE_LIVENESS_ANALYSIS /* vbox */
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
+ qemu_log("OP after liveness analysis:\n");
+ tcg_dump_ops(s, logfile);
+ qemu_log("\n");
+ }
+# endif /* USE_LIVENESS_ANALYSIS - vbox */
+#endif
+
+ tcg_reg_alloc_start(s);
+
+ s->code_buf = gen_code_buf;
+ s->code_ptr = gen_code_buf;
+
+ args = gen_opparam_buf;
+ op_index = 0;
+
+ for(;;) {
+ opc = gen_opc_buf[op_index];
+#ifdef CONFIG_PROFILER
+ tcg_table_op_count[opc]++;
+#endif
+ def = &tcg_op_defs[opc];
+#if 0
+ printf("%s: %d %d %d\n", def->name,
+ def->nb_oargs, def->nb_iargs, def->nb_cargs);
+ // dump_regs(s);
+#endif
+ switch(opc) {
+ case INDEX_op_mov_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_mov_i64:
+#endif
+ dead_iargs = s->op_dead_iargs[op_index];
+ tcg_reg_alloc_mov(s, def, args, dead_iargs);
+ break;
+ case INDEX_op_movi_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_movi_i64:
+#endif
+ tcg_reg_alloc_movi(s, args);
+ break;
+ case INDEX_op_debug_insn_start:
+ /* debug instruction */
+ break;
+ case INDEX_op_nop:
+ case INDEX_op_nop1:
+ case INDEX_op_nop2:
+ case INDEX_op_nop3:
+ break;
+ case INDEX_op_nopn:
+ args += args[0];
+ goto next;
+ case INDEX_op_discard:
+ {
+ TCGTemp *ts;
+ ts = &s->temps[args[0]];
+ /* mark the temporary as dead */
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ts->reg] = -1;
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ }
+ break;
+ case INDEX_op_set_label:
+ tcg_reg_alloc_bb_end(s, s->reserved_regs);
+ tcg_out_label(s, args[0], (intptr_t)s->code_ptr);
+ break;
+ case INDEX_op_call:
+ dead_iargs = s->op_dead_iargs[op_index];
+ args += tcg_reg_alloc_call(s, def, opc, args, dead_iargs);
+ goto next;
+ case INDEX_op_end:
+ goto the_end;
+ default:
+ /* Note: in order to speed up the code, it would be much
+ faster to have specialized register allocator functions for
+ some common argument patterns */
+ dead_iargs = s->op_dead_iargs[op_index];
+ tcg_reg_alloc_op(s, def, opc, args, dead_iargs);
+ break;
+ }
+ args += def->nb_args;
+ next:
+ if (search_pc >= 0 && search_pc < s->code_ptr - gen_code_buf) {
+ return op_index;
+ }
+ op_index++;
+#ifndef NDEBUG
+ check_regs(s);
+#endif
+ }
+ the_end:
+ return -1;
+}
+
+int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf)
+{
+#ifdef CONFIG_PROFILER
+ {
+ int n;
+ n = (gen_opc_ptr - gen_opc_buf);
+ s->op_count += n;
+ if (n > s->op_count_max)
+ s->op_count_max = n;
+
+ s->temp_count += s->nb_temps;
+ if (s->nb_temps > s->temp_count_max)
+ s->temp_count_max = s->nb_temps;
+ }
+#endif
+
+ tcg_gen_code_common(s, gen_code_buf, -1);
+
+ /* flush instruction cache */
+ flush_icache_range((uintptr_t)gen_code_buf,
+ (uintptr_t)s->code_ptr);
+ return s->code_ptr - gen_code_buf;
+}
+
+/* Return the index of the micro operation such as the pc after is <
+ offset bytes from the start of the TB. The contents of gen_code_buf must
+ not be changed, though writing the same values is ok.
+ Return -1 if not found. */
+int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, intptr_t offset)
+{
+ return tcg_gen_code_common(s, gen_code_buf, offset);
+}
+
+#ifdef CONFIG_PROFILER
+void tcg_dump_info(FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+{
+ TCGContext *s = &tcg_ctx;
+ int64_t tot;
+
+ tot = s->interm_time + s->code_time;
+ cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
+ tot, tot / 2.4e9);
+ cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
+ s->tb_count,
+ s->tb_count1 - s->tb_count,
+ s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
+ cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
+ s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
+ cpu_fprintf(f, "deleted ops/TB %0.2f\n",
+ s->tb_count ?
+ (double)s->del_op_count / s->tb_count : 0);
+ cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
+ s->tb_count ?
+ (double)s->temp_count / s->tb_count : 0,
+ s->temp_count_max);
+
+ cpu_fprintf(f, "cycles/op %0.1f\n",
+ s->op_count ? (double)tot / s->op_count : 0);
+ cpu_fprintf(f, "cycles/in byte %0.1f\n",
+ s->code_in_len ? (double)tot / s->code_in_len : 0);
+ cpu_fprintf(f, "cycles/out byte %0.1f\n",
+ s->code_out_len ? (double)tot / s->code_out_len : 0);
+ if (tot == 0)
+ tot = 1;
+ cpu_fprintf(f, " gen_interm time %0.1f%%\n",
+ (double)s->interm_time / tot * 100.0);
+ cpu_fprintf(f, " gen_code time %0.1f%%\n",
+ (double)s->code_time / tot * 100.0);
+ cpu_fprintf(f, "liveness/code time %0.1f%%\n",
+ (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
+ cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
+ s->restore_count);
+ cpu_fprintf(f, " avg cycles %0.1f\n",
+ s->restore_count ? (double)s->restore_time / s->restore_count : 0);
+
+ dump_op_count();
+}
+#else
+void tcg_dump_info(FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+{
+ cpu_fprintf(f, "[TCG profiler not compiled]\n");
+}
+#endif
diff --git a/src/recompiler/tcg/tcg.h b/src/recompiler/tcg/tcg.h
new file mode 100644
index 00000000..819fa6c7
--- /dev/null
+++ b/src/recompiler/tcg/tcg.h
@@ -0,0 +1,512 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "tcg-target.h"
+#include "tcg-runtime.h"
+
+#if TCG_TARGET_REG_BITS == 32
+typedef int32_t tcg_target_long;
+typedef uint32_t tcg_target_ulong;
+#define TCG_PRIlx PRIx32
+#define TCG_PRIld PRId32
+#elif TCG_TARGET_REG_BITS == 64
+typedef int64_t tcg_target_long;
+typedef uint64_t tcg_target_ulong;
+#define TCG_PRIlx PRIx64
+#define TCG_PRIld PRId64
+#else
+#error unsupported
+#endif
+
+#if TCG_TARGET_NB_REGS <= 32
+typedef uint32_t TCGRegSet;
+#elif TCG_TARGET_NB_REGS <= 64
+typedef uint64_t TCGRegSet;
+#else
+#error unsupported
+#endif
+
+typedef enum TCGOpcode {
+#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
+#include "tcg-opc.h"
+#undef DEF
+ NB_OPS,
+} TCGOpcode;
+
+#define tcg_regset_clear(d) (d) = 0
+#define tcg_regset_set(d, s) (d) = (s)
+#define tcg_regset_set32(d, reg, val32) (d) |= (val32) << (reg)
+#define tcg_regset_set_reg(d, r) (d) |= 1L << (r)
+#define tcg_regset_reset_reg(d, r) (d) &= ~(1L << (r))
+#define tcg_regset_test_reg(d, r) (((d) >> (r)) & 1)
+#define tcg_regset_or(d, a, b) (d) = (a) | (b)
+#define tcg_regset_and(d, a, b) (d) = (a) & (b)
+#define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b)
+#define tcg_regset_not(d, a) (d) = ~(a)
+
+typedef struct TCGRelocation {
+ struct TCGRelocation *next;
+ int type;
+ uint8_t *ptr;
+ tcg_target_long addend;
+} TCGRelocation;
+
+typedef struct TCGLabel {
+ int has_value;
+ union {
+ tcg_target_ulong value;
+ TCGRelocation *first_reloc;
+ } u;
+} TCGLabel;
+
+typedef struct TCGPool {
+ struct TCGPool *next;
+ int size;
+ uint8_t data[0] __attribute__ ((aligned));
+} TCGPool;
+
+#define TCG_POOL_CHUNK_SIZE 32768
+
+#define TCG_MAX_LABELS 512
+
+#define TCG_MAX_TEMPS 512
+
+/* when the size of the arguments of a called function is smaller than
+ this value, they are statically allocated in the TB stack frame */
+#define TCG_STATIC_CALL_ARGS_SIZE 128
+
+typedef enum TCGType {
+ TCG_TYPE_I32,
+ TCG_TYPE_I64,
+ TCG_TYPE_COUNT, /* number of different types */
+
+ /* An alias for the size of the host register. */
+#if TCG_TARGET_REG_BITS == 32
+ TCG_TYPE_REG = TCG_TYPE_I32,
+#else
+ TCG_TYPE_REG = TCG_TYPE_I64,
+#endif
+
+ /* An alias for the size of the native pointer. We don't currently
+ support any hosts with 64-bit registers and 32-bit pointers. */
+ TCG_TYPE_PTR = TCG_TYPE_REG,
+
+ /* An alias for the size of the target "long", aka register. */
+#if TARGET_LONG_BITS == 64
+ TCG_TYPE_TL = TCG_TYPE_I64,
+#else
+ TCG_TYPE_TL = TCG_TYPE_I32,
+#endif
+} TCGType;
+
+typedef tcg_target_ulong TCGArg;
+
+/* Define a type and accessor macros for varables. Using a struct is
+ nice because it gives some level of type safely. Ideally the compiler
+ be able to see through all this. However in practice this is not true,
+ expecially on targets with braindamaged ABIs (e.g. i386).
+ We use plain int by default to avoid this runtime overhead.
+ Users of tcg_gen_* don't need to know about any of this, and should
+ treat TCGv as an opaque type.
+ In additon we do typechecking for different types of variables. TCGv_i32
+ and TCGv_i64 are 32/64-bit variables respectively. TCGv and TCGv_ptr
+ are aliases for target_ulong and host pointer sized values respectively.
+ */
+
+#ifdef CONFIG_DEBUG_TCG
+#define DEBUG_TCGV 1
+#endif
+
+#ifdef DEBUG_TCGV
+
+typedef struct
+{
+ int i32;
+} TCGv_i32;
+
+typedef struct
+{
+ int i64;
+} TCGv_i64;
+
+#define MAKE_TCGV_I32(i) __extension__ \
+ ({ TCGv_i32 make_tcgv_tmp = {i}; make_tcgv_tmp;})
+#define MAKE_TCGV_I64(i) __extension__ \
+ ({ TCGv_i64 make_tcgv_tmp = {i}; make_tcgv_tmp;})
+#define GET_TCGV_I32(t) ((t).i32)
+#define GET_TCGV_I64(t) ((t).i64)
+#if TCG_TARGET_REG_BITS == 32
+#define TCGV_LOW(t) MAKE_TCGV_I32(GET_TCGV_I64(t))
+#define TCGV_HIGH(t) MAKE_TCGV_I32(GET_TCGV_I64(t) + 1)
+#endif
+
+#else /* !DEBUG_TCGV */
+
+typedef int TCGv_i32;
+typedef int TCGv_i64;
+#define MAKE_TCGV_I32(x) (x)
+#define MAKE_TCGV_I64(x) (x)
+#define GET_TCGV_I32(t) (t)
+#define GET_TCGV_I64(t) (t)
+
+#if TCG_TARGET_REG_BITS == 32
+#define TCGV_LOW(t) (t)
+#define TCGV_HIGH(t) ((t) + 1)
+#endif
+
+#endif /* DEBUG_TCGV */
+
+#define TCGV_EQUAL_I32(a, b) (GET_TCGV_I32(a) == GET_TCGV_I32(b))
+#define TCGV_EQUAL_I64(a, b) (GET_TCGV_I64(a) == GET_TCGV_I64(b))
+
+/* Dummy definition to avoid compiler warnings. */
+#define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1)
+#define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
+
+/* call flags */
+#define TCG_CALL_TYPE_MASK 0x000f
+#define TCG_CALL_TYPE_STD 0x0000 /* standard C call */
+#define TCG_CALL_TYPE_REGPARM_1 0x0001 /* i386 style regparm call (1 reg) */
+#define TCG_CALL_TYPE_REGPARM_2 0x0002 /* i386 style regparm call (2 regs) */
+#define TCG_CALL_TYPE_REGPARM 0x0003 /* i386 style regparm call (3 regs) */
+/* A pure function only reads its arguments and TCG global variables
+ and cannot raise exceptions. Hence a call to a pure function can be
+ safely suppressed if the return value is not used. */
+#define TCG_CALL_PURE 0x0010
+/* A const function only reads its arguments and does not use TCG
+ global variables. Hence a call to such a function does not
+ save TCG global variables back to their canonical location. */
+#define TCG_CALL_CONST 0x0020
+
+/* used to align parameters */
+#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1)
+#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1))
+
+typedef enum {
+ TCG_COND_EQ,
+ TCG_COND_NE,
+ TCG_COND_LT,
+ TCG_COND_GE,
+ TCG_COND_LE,
+ TCG_COND_GT,
+ /* unsigned */
+ TCG_COND_LTU,
+ TCG_COND_GEU,
+ TCG_COND_LEU,
+ TCG_COND_GTU,
+} TCGCond;
+
+/* Invert the sense of the comparison. */
+static inline TCGCond tcg_invert_cond(TCGCond c)
+{
+ return (TCGCond)(c ^ 1);
+}
+
+/* Swap the operands in a comparison. */
+static inline TCGCond tcg_swap_cond(TCGCond c)
+{
+ int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15);
+ return (TCGCond)(c ^ mask);
+}
+
+static inline TCGCond tcg_unsigned_cond(TCGCond c)
+{
+ return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c);
+}
+
+#define TEMP_VAL_DEAD 0
+#define TEMP_VAL_REG 1
+#define TEMP_VAL_MEM 2
+#define TEMP_VAL_CONST 3
+
+/* XXX: optimize memory layout */
+typedef struct TCGTemp {
+ TCGType base_type;
+ TCGType type;
+ int val_type;
+ int reg;
+ tcg_target_long val;
+ int mem_reg;
+ tcg_target_long mem_offset;
+ unsigned int fixed_reg:1;
+ unsigned int mem_coherent:1;
+ unsigned int mem_allocated:1;
+ unsigned int temp_local:1; /* If true, the temp is saved accross
+ basic blocks. Otherwise, it is not
+ preserved accross basic blocks. */
+ unsigned int temp_allocated:1; /* never used for code gen */
+ /* index of next free temp of same base type, -1 if end */
+ int next_free_temp;
+ const char *name;
+} TCGTemp;
+
+typedef struct TCGHelperInfo {
+ tcg_target_ulong func;
+ const char *name;
+} TCGHelperInfo;
+
+typedef struct TCGContext TCGContext;
+
+struct TCGContext {
+ uint8_t *pool_cur, *pool_end;
+ TCGPool *pool_first, *pool_current;
+ TCGLabel *labels;
+ int nb_labels;
+ TCGTemp *temps; /* globals first, temps after */
+ int nb_globals;
+ int nb_temps;
+ /* index of free temps, -1 if none */
+ int first_free_temp[TCG_TYPE_COUNT * 2];
+
+ /* goto_tb support */
+ uint8_t *code_buf;
+ uintptr_t *tb_next;
+ uint16_t *tb_next_offset;
+ uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
+
+ /* liveness analysis */
+ uint16_t *op_dead_iargs; /* for each operation, each bit tells if the
+ corresponding input argument is dead */
+
+ /* tells in which temporary a given register is. It does not take
+ into account fixed registers */
+ int reg_to_temp[TCG_TARGET_NB_REGS];
+ TCGRegSet reserved_regs;
+ tcg_target_long current_frame_offset;
+ tcg_target_long frame_start;
+ tcg_target_long frame_end;
+ int frame_reg;
+
+ uint8_t *code_ptr;
+ TCGTemp static_temps[TCG_MAX_TEMPS];
+
+ TCGHelperInfo *helpers;
+ int nb_helpers;
+ int allocated_helpers;
+ int helpers_sorted;
+
+#ifdef CONFIG_PROFILER
+ /* profiling info */
+ int64_t tb_count1;
+ int64_t tb_count;
+ int64_t op_count; /* total insn count */
+ int op_count_max; /* max insn per TB */
+ int64_t temp_count;
+ int temp_count_max;
+ int64_t del_op_count;
+ int64_t code_in_len;
+ int64_t code_out_len;
+ int64_t interm_time;
+ int64_t code_time;
+ int64_t la_time;
+ int64_t restore_count;
+ int64_t restore_time;
+#endif
+};
+
+extern TCGContext tcg_ctx;
+extern uint16_t *gen_opc_ptr;
+extern TCGArg *gen_opparam_ptr;
+extern uint16_t gen_opc_buf[];
+extern TCGArg gen_opparam_buf[];
+
+/* pool based memory allocation */
+
+void *tcg_malloc_internal(TCGContext *s, int size);
+void tcg_pool_reset(TCGContext *s);
+void tcg_pool_delete(TCGContext *s);
+
+static inline void *tcg_malloc(int size)
+{
+ TCGContext *s = &tcg_ctx;
+ uint8_t *ptr, *ptr_end;
+ size = (size + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
+ ptr = s->pool_cur;
+ ptr_end = ptr + size;
+ if (unlikely(ptr_end > s->pool_end)) {
+ return tcg_malloc_internal(&tcg_ctx, size);
+ } else {
+ s->pool_cur = ptr_end;
+ return ptr;
+ }
+}
+
+void tcg_context_init(TCGContext *s);
+void tcg_prologue_init(TCGContext *s);
+void tcg_func_start(TCGContext *s);
+
+int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf);
+int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, intptr_t offset);
+
+void tcg_set_frame(TCGContext *s, int reg,
+ tcg_target_long start, tcg_target_long size);
+
+TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name);
+TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset,
+ const char *name);
+TCGv_i32 tcg_temp_new_internal_i32(int temp_local);
+static inline TCGv_i32 tcg_temp_new_i32(void)
+{
+ return tcg_temp_new_internal_i32(0);
+}
+static inline TCGv_i32 tcg_temp_local_new_i32(void)
+{
+ return tcg_temp_new_internal_i32(1);
+}
+void tcg_temp_free_i32(TCGv_i32 arg);
+char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg);
+
+TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name);
+TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset,
+ const char *name);
+TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
+static inline TCGv_i64 tcg_temp_new_i64(void)
+{
+ return tcg_temp_new_internal_i64(0);
+}
+static inline TCGv_i64 tcg_temp_local_new_i64(void)
+{
+ return tcg_temp_new_internal_i64(1);
+}
+void tcg_temp_free_i64(TCGv_i64 arg);
+char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
+
+void tcg_dump_info(FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+
+#define TCG_CT_ALIAS 0x80
+#define TCG_CT_IALIAS 0x40
+#define TCG_CT_REG 0x01
+#define TCG_CT_CONST 0x02 /* any constant of register size */
+
+typedef struct TCGArgConstraint {
+ uint16_t ct;
+ uint8_t alias_index;
+ union {
+ TCGRegSet regs;
+ } u;
+} TCGArgConstraint;
+
+#define TCG_MAX_OP_ARGS 16
+
+#define TCG_OPF_BB_END 0x01 /* instruction defines the end of a basic
+ block */
+#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers
+ and potentially update globals. */
+#define TCG_OPF_SIDE_EFFECTS 0x04 /* instruction has side effects : it
+ cannot be removed if its output
+ are not used */
+
+typedef struct TCGOpDef {
+ const char *name;
+ uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
+ uint8_t flags;
+ TCGArgConstraint *args_ct;
+ int *sorted_args;
+#if defined(CONFIG_DEBUG_TCG)
+ int used;
+#endif
+} TCGOpDef;
+
+typedef struct TCGTargetOpDef {
+ TCGOpcode op;
+ const char *args_ct_str[TCG_MAX_OP_ARGS];
+} TCGTargetOpDef;
+
+#ifndef VBOX
+#define tcg_abort() \
+do {\
+ fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\
+ abort();\
+} while (0)
+#else /* VBOX */
+# define tcg_abort() \
+ do {\
+ remAbort(-1, "TCG fatal error: "__FILE__":" RT_XSTR(__LINE__)); \
+ } while (0)
+extern void qemu_qsort(void* base, size_t nmemb, size_t size,
+ int(*compar)(const void*, const void*));
+#define tcg_exit(status) \
+ do {\
+ remAbort(-1, "TCG exit: "__FILE__":" RT_XSTR(__LINE__));\
+ } while (0)
+#endif /* VBOX */
+
+void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
+
+#if TCG_TARGET_REG_BITS == 32
+#define tcg_const_ptr tcg_const_i32
+#define tcg_add_ptr tcg_add_i32
+#define tcg_sub_ptr tcg_sub_i32
+#define TCGv_ptr TCGv_i32
+#define GET_TCGV_PTR GET_TCGV_I32
+#define tcg_global_reg_new_ptr tcg_global_reg_new_i32
+#define tcg_global_mem_new_ptr tcg_global_mem_new_i32
+#define tcg_temp_new_ptr tcg_temp_new_i32
+#define tcg_temp_free_ptr tcg_temp_free_i32
+#else
+#define tcg_const_ptr tcg_const_i64
+#define tcg_add_ptr tcg_add_i64
+#define tcg_sub_ptr tcg_sub_i64
+#define TCGv_ptr TCGv_i64
+#define GET_TCGV_PTR GET_TCGV_I64
+#define tcg_global_reg_new_ptr tcg_global_reg_new_i64
+#define tcg_global_mem_new_ptr tcg_global_mem_new_i64
+#define tcg_temp_new_ptr tcg_temp_new_i64
+#define tcg_temp_free_ptr tcg_temp_free_i64
+#endif
+
+void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
+ int sizemask, TCGArg ret, int nargs, TCGArg *args);
+
+void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
+ int c, int right, int arith);
+
+/* only used for debugging purposes */
+void tcg_register_helper(void *func, const char *name);
+const char *tcg_helper_get_name(TCGContext *s, void *func);
+void tcg_dump_ops(TCGContext *s, FILE *outfile);
+
+void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf);
+TCGv_i32 tcg_const_i32(int32_t val);
+TCGv_i64 tcg_const_i64(int64_t val);
+TCGv_i32 tcg_const_local_i32(int32_t val);
+TCGv_i64 tcg_const_local_i64(int64_t val);
+
+#ifndef VBOX
+extern uint8_t code_gen_prologue[];
+#else
+extern uint8_t *code_gen_prologue;
+#endif
+#if defined(_ARCH_PPC) && !defined(_ARCH_PPC64)
+#define tcg_qemu_tb_exec(tb_ptr) \
+ ((intptr_t REGPARM __attribute__ ((longcall)) (*)(void *))code_gen_prologue)(tb_ptr)
+#else
+# if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) && !defined(__MINGW64__)
+# define tcg_qemu_tb_exec(tb_ptr, ret) \
+ __asm__ __volatile__("call *%%ecx" : "=a"(ret) : "a"(tb_ptr), "c" (&code_gen_prologue[0]) : "memory", "%edx", "cc")
+# else
+#define tcg_qemu_tb_exec(tb_ptr) ((intptr_t REGPARM (*)(void *))code_gen_prologue)(tb_ptr)
+# endif
+#endif
diff --git a/src/recompiler/tests/Makefile b/src/recompiler/tests/Makefile
new file mode 100644
index 00000000..ff7f787a
--- /dev/null
+++ b/src/recompiler/tests/Makefile
@@ -0,0 +1,109 @@
+-include ../config-host.mak
+
+$(call set-vpath, $(SRC_PATH)/tests)
+
+CFLAGS=-Wall -O2 -g -fno-strict-aliasing
+#CFLAGS+=-msse2
+LDFLAGS=
+
+ifeq ($(ARCH),i386)
+TESTS=linux-test testthread sha1-i386 test-i386
+endif
+ifeq ($(ARCH),x86_64)
+TESTS=test-x86_64
+endif
+TESTS+=sha1# test_path
+#TESTS+=test_path
+#TESTS+=runcom
+
+QEMU=../i386-linux-user/qemu-i386
+
+all: $(TESTS)
+
+hello-i386: hello-i386.c
+ $(CC) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $<
+ strip $@
+
+testthread: testthread.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lpthread
+
+test_path: test_path.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+ ./$@ || { rm $@; exit 1; }
+
+# i386/x86_64 emulation test (test various opcodes) */
+test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \
+ test-i386.h test-i386-shift.h test-i386-muldiv.h
+ $(CC) -m32 $(CFLAGS) $(LDFLAGS) -static -o $@ \
+ $(<D)/test-i386.c $(<D)/test-i386-code16.S $(<D)/test-i386-vm86.S -lm
+
+test-x86_64: test-i386.c \
+ test-i386.h test-i386-shift.h test-i386-muldiv.h
+ $(CC) -m64 $(CFLAGS) $(LDFLAGS) -static -o $@ $(<D)/test-i386.c -lm
+
+ifeq ($(ARCH),i386)
+test: test-i386
+ ./test-i386 > test-i386.ref
+else
+test:
+endif
+ $(QEMU) test-i386 > test-i386.out
+ @if diff -u test-i386.ref test-i386.out ; then echo "Auto Test OK"; fi
+
+.PHONY: test-mmap
+test-mmap: test-mmap.c
+ $(CC) $(CFLAGS) -Wall -static -O2 $(LDFLAGS) -o $@ $<
+ -./test-mmap
+ -$(QEMU) ./test-mmap
+ -$(QEMU) -p 8192 ./test-mmap 8192
+ -$(QEMU) -p 16384 ./test-mmap 16384
+ -$(QEMU) -p 32768 ./test-mmap 32768
+
+# generic Linux and CPU test
+linux-test: linux-test.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lm
+
+# speed test
+sha1-i386: sha1.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+sha1: sha1.c
+ $(HOST_CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+speed: sha1 sha1-i386
+ time ./sha1
+ time $(QEMU) ./sha1-i386
+
+# vm86 test
+runcom: runcom.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+# NOTE: -fomit-frame-pointer is currently needed : this is a bug in libqemu
+qruncom: qruncom.c ../ioport-user.c ../i386-user/libqemu.a
+ $(CC) $(CFLAGS) -fomit-frame-pointer $(LDFLAGS) -I../target-i386 -I.. -I../i386-user -I../fpu \
+ -o $@ $(filter %.c, $^) -L../i386-user -lqemu -lm
+
+# arm test
+hello-arm: hello-arm.o
+ arm-linux-ld -o $@ $<
+
+hello-arm.o: hello-arm.c
+ arm-linux-gcc -Wall -g -O2 -c -o $@ $<
+
+test-arm-iwmmxt: test-arm-iwmmxt.s
+ cpp < $< | arm-linux-gnu-gcc -Wall -static -march=iwmmxt -mabi=aapcs -x assembler - -o $@
+
+# MIPS test
+hello-mips: hello-mips.c
+ mips-linux-gnu-gcc -nostdlib -static -mno-abicalls -fno-PIC -mabi=32 -Wall -Wextra -g -O2 -o $@ $<
+
+hello-mipsel: hello-mips.c
+ mipsel-linux-gnu-gcc -nostdlib -static -mno-abicalls -fno-PIC -mabi=32 -Wall -Wextra -g -O2 -o $@ $<
+
+# testsuite for the CRIS port.
+test-cris:
+ $(MAKE) -C cris check
+
+clean:
+ rm -f *~ *.o test-i386.out test-i386.ref \
+ test-x86_64.log test-x86_64.ref qruncom $(TESTS)
diff --git a/src/recompiler/tests/hello-arm.c b/src/recompiler/tests/hello-arm.c
new file mode 100644
index 00000000..e0daa7ad
--- /dev/null
+++ b/src/recompiler/tests/hello-arm.c
@@ -0,0 +1,113 @@
+#define __NR_SYSCALL_BASE 0x900000
+#define __NR_exit1 (__NR_SYSCALL_BASE+ 1)
+#define __NR_write (__NR_SYSCALL_BASE+ 4)
+
+#define __sys2(x) #x
+#define __sys1(x) __sys2(x)
+
+#ifndef __syscall
+#define __syscall(name) "swi\t" __sys1(__NR_##name) "\n\t"
+#endif
+
+#define __syscall_return(type, res) \
+do { \
+ return (type) (res); \
+} while (0)
+
+#define _syscall0(type,name) \
+type name(void) { \
+ long __res; \
+ __asm__ __volatile__ ( \
+ __syscall(name) \
+ "mov %0,r0" \
+ :"=r" (__res) : : "r0","lr"); \
+ __syscall_return(type,__res); \
+}
+
+#define _syscall1(type,name,type1,arg1) \
+type name(type1 arg1) { \
+ long __res; \
+ __asm__ __volatile__ ( \
+ "mov\tr0,%1\n\t" \
+ __syscall(name) \
+ "mov %0,r0" \
+ : "=r" (__res) \
+ : "r" ((long)(arg1)) \
+ : "r0","lr"); \
+ __syscall_return(type,__res); \
+}
+
+#define _syscall2(type,name,type1,arg1,type2,arg2) \
+type name(type1 arg1,type2 arg2) { \
+ long __res; \
+ __asm__ __volatile__ ( \
+ "mov\tr0,%1\n\t" \
+ "mov\tr1,%2\n\t" \
+ __syscall(name) \
+ "mov\t%0,r0" \
+ : "=r" (__res) \
+ : "r" ((long)(arg1)),"r" ((long)(arg2)) \
+ : "r0","r1","lr"); \
+ __syscall_return(type,__res); \
+}
+
+
+#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
+type name(type1 arg1,type2 arg2,type3 arg3) { \
+ long __res; \
+ __asm__ __volatile__ ( \
+ "mov\tr0,%1\n\t" \
+ "mov\tr1,%2\n\t" \
+ "mov\tr2,%3\n\t" \
+ __syscall(name) \
+ "mov\t%0,r0" \
+ : "=r" (__res) \
+ : "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)) \
+ : "r0","r1","r2","lr"); \
+ __syscall_return(type,__res); \
+}
+
+
+#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
+ long __res; \
+ __asm__ __volatile__ ( \
+ "mov\tr0,%1\n\t" \
+ "mov\tr1,%2\n\t" \
+ "mov\tr2,%3\n\t" \
+ "mov\tr3,%4\n\t" \
+ __syscall(name) \
+ "mov\t%0,r0" \
+ : "=r" (__res) \
+ : "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)),"r" ((long)(arg4)) \
+ : "r0","r1","r2","r3","lr"); \
+ __syscall_return(type,__res); \
+}
+
+
+#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \
+ long __res; \
+ __asm__ __volatile__ ( \
+ "mov\tr0,%1\n\t" \
+ "mov\tr1,%2\n\t" \
+ "mov\tr2,%3\n\t" \
+ "mov\tr3,%4\n\t" \
+ "mov\tr4,%5\n\t" \
+ __syscall(name) \
+ "mov\t%0,r0" \
+ : "=r" (__res) \
+ : "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)),"r" ((long)(arg4)), \
+ "r" ((long)(arg5)) \
+ : "r0","r1","r2","r3","r4","lr"); \
+ __syscall_return(type,__res); \
+}
+
+_syscall1(int,exit1,int,status);
+_syscall3(int,write,int,fd,const char *,buf, int, len);
+
+void _start(void)
+{
+ write(1, "Hello World\n", 12);
+ exit1(0);
+}
diff --git a/src/recompiler/tests/hello-i386.c b/src/recompiler/tests/hello-i386.c
new file mode 100644
index 00000000..e00245d3
--- /dev/null
+++ b/src/recompiler/tests/hello-i386.c
@@ -0,0 +1,26 @@
+#include <asm/unistd.h>
+
+extern inline volatile void exit(int status)
+{
+ int __res;
+ __asm__ volatile ("movl %%ecx,%%ebx\n"\
+ "int $0x80" \
+ : "=a" (__res) : "0" (__NR_exit),"c" ((long)(status)));
+}
+
+extern inline int write(int fd, const char * buf, int len)
+{
+ int status;
+ __asm__ volatile ("pushl %%ebx\n"\
+ "movl %%esi,%%ebx\n"\
+ "int $0x80\n" \
+ "popl %%ebx\n"\
+ : "=a" (status) \
+ : "0" (__NR_write),"S" ((long)(fd)),"c" ((long)(buf)),"d" ((long)(len)));
+}
+
+void _start(void)
+{
+ write(1, "Hello World\n", 12);
+ exit(0);
+}
diff --git a/src/recompiler/tests/linux-test.c b/src/recompiler/tests/linux-test.c
new file mode 100644
index 00000000..cce80373
--- /dev/null
+++ b/src/recompiler/tests/linux-test.c
@@ -0,0 +1,545 @@
+/*
+ * linux and CPU test
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle GPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the General Public License version 2 (GPLv2) at this time for any software where
+ * a choice of GPL license versions is made available with the language indicating
+ * that GPLv2 or any later version may be used, or where a choice of which version
+ * of the GPL is applied is otherwise unspecified.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <utime.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sched.h>
+#include <dirent.h>
+#include <setjmp.h>
+#include <sys/shm.h>
+
+#define TESTPATH "/tmp/linux-test.tmp"
+#define TESTPORT 7654
+#define STACK_SIZE 16384
+
+void error1(const char *filename, int line, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ fprintf(stderr, "%s:%d: ", filename, line);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+int __chk_error(const char *filename, int line, int ret)
+{
+ if (ret < 0) {
+ error1(filename, line, "%m (ret=%d, errno=%d)",
+ ret, errno);
+ }
+ return ret;
+}
+
+#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__)
+
+#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret))
+
+/*******************************************************/
+
+#define FILE_BUF_SIZE 300
+
+void test_file(void)
+{
+ int fd, i, len, ret;
+ uint8_t buf[FILE_BUF_SIZE];
+ uint8_t buf2[FILE_BUF_SIZE];
+ uint8_t buf3[FILE_BUF_SIZE];
+ char cur_dir[1024];
+ struct stat st;
+ struct utimbuf tbuf;
+ struct iovec vecs[2];
+ DIR *dir;
+ struct dirent *de;
+
+ /* clean up, just in case */
+ unlink(TESTPATH "/file1");
+ unlink(TESTPATH "/file2");
+ unlink(TESTPATH "/file3");
+ rmdir(TESTPATH);
+
+ if (getcwd(cur_dir, sizeof(cur_dir)) == NULL)
+ error("getcwd");
+
+ chk_error(mkdir(TESTPATH, 0755));
+
+ chk_error(chdir(TESTPATH));
+
+ /* open/read/write/close/readv/writev/lseek */
+
+ fd = chk_error(open("file1", O_WRONLY | O_TRUNC | O_CREAT, 0644));
+ for(i=0;i < FILE_BUF_SIZE; i++)
+ buf[i] = i;
+ len = chk_error(write(fd, buf, FILE_BUF_SIZE / 2));
+ if (len != (FILE_BUF_SIZE / 2))
+ error("write");
+ vecs[0].iov_base = buf + (FILE_BUF_SIZE / 2);
+ vecs[0].iov_len = 16;
+ vecs[1].iov_base = buf + (FILE_BUF_SIZE / 2) + 16;
+ vecs[1].iov_len = (FILE_BUF_SIZE / 2) - 16;
+ len = chk_error(writev(fd, vecs, 2));
+ if (len != (FILE_BUF_SIZE / 2))
+ error("writev");
+ chk_error(close(fd));
+
+ chk_error(rename("file1", "file2"));
+
+ fd = chk_error(open("file2", O_RDONLY));
+
+ len = chk_error(read(fd, buf2, FILE_BUF_SIZE));
+ if (len != FILE_BUF_SIZE)
+ error("read");
+ if (memcmp(buf, buf2, FILE_BUF_SIZE) != 0)
+ error("memcmp");
+
+#define FOFFSET 16
+ ret = chk_error(lseek(fd, FOFFSET, SEEK_SET));
+ if (ret != 16)
+ error("lseek");
+ vecs[0].iov_base = buf3;
+ vecs[0].iov_len = 32;
+ vecs[1].iov_base = buf3 + 32;
+ vecs[1].iov_len = FILE_BUF_SIZE - FOFFSET - 32;
+ len = chk_error(readv(fd, vecs, 2));
+ if (len != FILE_BUF_SIZE - FOFFSET)
+ error("readv");
+ if (memcmp(buf + FOFFSET, buf3, FILE_BUF_SIZE - FOFFSET) != 0)
+ error("memcmp");
+
+ chk_error(close(fd));
+
+ /* access */
+ chk_error(access("file2", R_OK));
+
+ /* stat/chmod/utime/truncate */
+
+ chk_error(chmod("file2", 0600));
+ tbuf.actime = 1001;
+ tbuf.modtime = 1000;
+ chk_error(truncate("file2", 100));
+ chk_error(utime("file2", &tbuf));
+ chk_error(stat("file2", &st));
+ if (st.st_size != 100)
+ error("stat size");
+ if (!S_ISREG(st.st_mode))
+ error("stat mode");
+ if ((st.st_mode & 0777) != 0600)
+ error("stat mode2");
+ if (st.st_atime != 1001 ||
+ st.st_mtime != 1000)
+ error("stat time");
+
+ chk_error(stat(TESTPATH, &st));
+ if (!S_ISDIR(st.st_mode))
+ error("stat mode");
+
+ /* fstat */
+ fd = chk_error(open("file2", O_RDWR));
+ chk_error(ftruncate(fd, 50));
+ chk_error(fstat(fd, &st));
+ chk_error(close(fd));
+
+ if (st.st_size != 50)
+ error("stat size");
+ if (!S_ISREG(st.st_mode))
+ error("stat mode");
+
+ /* symlink/lstat */
+ chk_error(symlink("file2", "file3"));
+ chk_error(lstat("file3", &st));
+ if (!S_ISLNK(st.st_mode))
+ error("stat mode");
+
+ /* getdents */
+ dir = opendir(TESTPATH);
+ if (!dir)
+ error("opendir");
+ len = 0;
+ for(;;) {
+ de = readdir(dir);
+ if (!de)
+ break;
+ if (strcmp(de->d_name, ".") != 0 &&
+ strcmp(de->d_name, "..") != 0 &&
+ strcmp(de->d_name, "file2") != 0 &&
+ strcmp(de->d_name, "file3") != 0)
+ error("readdir");
+ len++;
+ }
+ closedir(dir);
+ if (len != 4)
+ error("readdir");
+
+ chk_error(unlink("file3"));
+ chk_error(unlink("file2"));
+ chk_error(chdir(cur_dir));
+ chk_error(rmdir(TESTPATH));
+}
+
+void test_fork(void)
+{
+ int pid, status;
+
+ pid = chk_error(fork());
+ if (pid == 0) {
+ /* child */
+ exit(2);
+ }
+ chk_error(waitpid(pid, &status, 0));
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 2)
+ error("waitpid status=0x%x", status);
+}
+
+void test_time(void)
+{
+ struct timeval tv, tv2;
+ struct timespec ts, rem;
+ struct rusage rusg1, rusg2;
+ int ti, i;
+
+ chk_error(gettimeofday(&tv, NULL));
+ rem.tv_sec = 1;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 20 * 1000000;
+ chk_error(nanosleep(&ts, &rem));
+ if (rem.tv_sec != 1)
+ error("nanosleep");
+ chk_error(gettimeofday(&tv2, NULL));
+ ti = tv2.tv_sec - tv.tv_sec;
+ if (ti >= 2)
+ error("gettimeofday");
+
+ chk_error(getrusage(RUSAGE_SELF, &rusg1));
+ for(i = 0;i < 10000; i++);
+ chk_error(getrusage(RUSAGE_SELF, &rusg2));
+ if ((rusg2.ru_utime.tv_sec - rusg1.ru_utime.tv_sec) < 0 ||
+ (rusg2.ru_stime.tv_sec - rusg1.ru_stime.tv_sec) < 0)
+ error("getrusage");
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+ int c;
+ char *q = buf;
+
+ if (buf_size <= 0)
+ return;
+
+ for(;;) {
+ c = *str++;
+ if (c == 0 || q >= buf + buf_size - 1)
+ break;
+ *q++ = c;
+ }
+ *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+ int len;
+ len = strlen(buf);
+ if (len < buf_size)
+ pstrcpy(buf + len, buf_size - len, s);
+ return buf;
+}
+
+int server_socket(void)
+{
+ int val, fd;
+ struct sockaddr_in sockaddr;
+
+ /* server socket */
+ fd = chk_error(socket(PF_INET, SOCK_STREAM, 0));
+
+ val = 1;
+ chk_error(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)));
+
+ sockaddr.sin_family = AF_INET;
+ sockaddr.sin_port = htons(TESTPORT);
+ sockaddr.sin_addr.s_addr = 0;
+ chk_error(bind(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)));
+ chk_error(listen(fd, 0));
+ return fd;
+
+}
+
+int client_socket(void)
+{
+ int fd;
+ struct sockaddr_in sockaddr;
+
+ /* server socket */
+ fd = chk_error(socket(PF_INET, SOCK_STREAM, 0));
+ sockaddr.sin_family = AF_INET;
+ sockaddr.sin_port = htons(TESTPORT);
+ inet_aton("127.0.0.1", &sockaddr.sin_addr);
+ chk_error(connect(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)));
+ return fd;
+}
+
+const char socket_msg[] = "hello socket\n";
+
+void test_socket(void)
+{
+ int server_fd, client_fd, fd, pid, ret, val;
+ struct sockaddr_in sockaddr;
+ socklen_t len;
+ char buf[512];
+
+ server_fd = server_socket();
+
+ /* test a few socket options */
+ len = sizeof(val);
+ chk_error(getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &val, &len));
+ if (val != SOCK_STREAM)
+ error("getsockopt");
+
+ pid = chk_error(fork());
+ if (pid == 0) {
+ client_fd = client_socket();
+ send(client_fd, socket_msg, sizeof(socket_msg), 0);
+ close(client_fd);
+ exit(0);
+ }
+ len = sizeof(sockaddr);
+ fd = chk_error(accept(server_fd, (struct sockaddr *)&sockaddr, &len));
+
+ ret = chk_error(recv(fd, buf, sizeof(buf), 0));
+ if (ret != sizeof(socket_msg))
+ error("recv");
+ if (memcmp(buf, socket_msg, sizeof(socket_msg)) != 0)
+ error("socket_msg");
+ chk_error(close(fd));
+ chk_error(close(server_fd));
+}
+
+#define WCOUNT_MAX 512
+
+void test_pipe(void)
+{
+ fd_set rfds, wfds;
+ int fds[2], fd_max, ret;
+ uint8_t ch;
+ int wcount, rcount;
+
+ chk_error(pipe(fds));
+ chk_error(fcntl(fds[0], F_SETFL, O_NONBLOCK));
+ chk_error(fcntl(fds[1], F_SETFL, O_NONBLOCK));
+ wcount = 0;
+ rcount = 0;
+ for(;;) {
+ FD_ZERO(&rfds);
+ fd_max = fds[0];
+ FD_SET(fds[0], &rfds);
+
+ FD_ZERO(&wfds);
+ FD_SET(fds[1], &wfds);
+ if (fds[1] > fd_max)
+ fd_max = fds[1];
+
+ ret = chk_error(select(fd_max + 1, &rfds, &wfds, NULL, NULL));
+ if (ret > 0) {
+ if (FD_ISSET(fds[0], &rfds)) {
+ chk_error(read(fds[0], &ch, 1));
+ rcount++;
+ if (rcount >= WCOUNT_MAX)
+ break;
+ }
+ if (FD_ISSET(fds[1], &wfds)) {
+ ch = 'a';
+ chk_error(write(fds[0], &ch, 1));
+ wcount++;
+ }
+ }
+ }
+ chk_error(close(fds[0]));
+ chk_error(close(fds[1]));
+}
+
+int thread1_res;
+int thread2_res;
+
+int thread1_func(void *arg)
+{
+ int i;
+ for(i=0;i<5;i++) {
+ thread1_res++;
+ usleep(10 * 1000);
+ }
+ return 0;
+}
+
+int thread2_func(void *arg)
+{
+ int i;
+ for(i=0;i<6;i++) {
+ thread2_res++;
+ usleep(10 * 1000);
+ }
+ return 0;
+}
+
+void test_clone(void)
+{
+ uint8_t *stack1, *stack2;
+ int pid1, pid2, status1, status2;
+
+ stack1 = malloc(STACK_SIZE);
+ pid1 = chk_error(clone(thread1_func, stack1 + STACK_SIZE,
+ CLONE_VM | CLONE_FS | CLONE_FILES | SIGCHLD, "hello1"));
+
+ stack2 = malloc(STACK_SIZE);
+ pid2 = chk_error(clone(thread2_func, stack2 + STACK_SIZE,
+ CLONE_VM | CLONE_FS | CLONE_FILES | SIGCHLD, "hello2"));
+
+ while (waitpid(pid1, &status1, 0) != pid1);
+ while (waitpid(pid2, &status2, 0) != pid2);
+ if (thread1_res != 5 ||
+ thread2_res != 6)
+ error("clone");
+}
+
+/***********************************/
+
+volatile int alarm_count;
+jmp_buf jmp_env;
+
+void sig_alarm(int sig)
+{
+ if (sig != SIGALRM)
+ error("signal");
+ alarm_count++;
+}
+
+void sig_segv(int sig, siginfo_t *info, void *puc)
+{
+ if (sig != SIGSEGV)
+ error("signal");
+ longjmp(jmp_env, 1);
+}
+
+void test_signal(void)
+{
+ struct sigaction act;
+ struct itimerval it, oit;
+
+ /* timer test */
+
+ alarm_count = 0;
+
+ act.sa_handler = sig_alarm;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ chk_error(sigaction(SIGALRM, &act, NULL));
+
+ it.it_interval.tv_sec = 0;
+ it.it_interval.tv_usec = 10 * 1000;
+ it.it_value.tv_sec = 0;
+ it.it_value.tv_usec = 10 * 1000;
+ chk_error(setitimer(ITIMER_REAL, &it, NULL));
+ chk_error(getitimer(ITIMER_REAL, &oit));
+ if (oit.it_value.tv_sec != it.it_value.tv_sec ||
+ oit.it_value.tv_usec != it.it_value.tv_usec)
+ error("itimer");
+
+ while (alarm_count < 5) {
+ usleep(10 * 1000);
+ }
+
+ it.it_interval.tv_sec = 0;
+ it.it_interval.tv_usec = 0;
+ it.it_value.tv_sec = 0;
+ it.it_value.tv_usec = 0;
+ memset(&oit, 0xff, sizeof(oit));
+ chk_error(setitimer(ITIMER_REAL, &it, &oit));
+ if (oit.it_value.tv_sec != 0 ||
+ oit.it_value.tv_usec != 10 * 1000)
+ error("setitimer");
+
+ /* SIGSEGV test */
+ act.sa_sigaction = sig_segv;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ chk_error(sigaction(SIGSEGV, &act, NULL));
+ if (setjmp(jmp_env) == 0) {
+ *(uint8_t *)0 = 0;
+ }
+
+ act.sa_handler = SIG_DFL;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ chk_error(sigaction(SIGSEGV, &act, NULL));
+}
+
+#define SHM_SIZE 32768
+
+void test_shm(void)
+{
+ void *ptr;
+ int shmid;
+
+ shmid = chk_error(shmget(IPC_PRIVATE, SHM_SIZE, IPC_CREAT | 0777));
+ ptr = shmat(shmid, NULL, 0);
+ if (!ptr)
+ error("shmat");
+
+ memset(ptr, 0, SHM_SIZE);
+
+ chk_error(shmctl(shmid, IPC_RMID, 0));
+ chk_error(shmdt(ptr));
+}
+
+int main(int argc, char **argv)
+{
+ test_file();
+ test_fork();
+ test_time();
+ test_socket();
+ // test_clone();
+ test_signal();
+ test_shm();
+ return 0;
+}
diff --git a/src/recompiler/tests/pi_10.com b/src/recompiler/tests/pi_10.com
new file mode 100644
index 00000000..8993ba1a
--- /dev/null
+++ b/src/recompiler/tests/pi_10.com
Binary files differ
diff --git a/src/recompiler/tests/qruncom.c b/src/recompiler/tests/qruncom.c
new file mode 100644
index 00000000..079f7a29
--- /dev/null
+++ b/src/recompiler/tests/qruncom.c
@@ -0,0 +1,284 @@
+/*
+ * Example of use of user mode libqemu: launch a basic .com DOS
+ * executable
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <malloc.h>
+
+#include "cpu.h"
+
+//#define SIGTEST
+
+int cpu_get_pic_interrupt(CPUState *env)
+{
+ return -1;
+}
+
+uint64_t cpu_get_tsc(CPUState *env)
+{
+ return 0;
+}
+
+static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
+ unsigned long addr, unsigned int sel)
+{
+ unsigned int e1, e2;
+ e1 = (addr & 0xffff) | (sel << 16);
+ e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
+ stl((uint8_t *)ptr, e1);
+ stl((uint8_t *)ptr + 4, e2);
+}
+
+uint64_t idt_table[256];
+
+/* only dpl matters as we do only user space emulation */
+static void set_idt(int n, unsigned int dpl)
+{
+ set_gate(idt_table + n, 0, dpl, 0, 0);
+}
+
+void qemu_free(void *ptr)
+{
+ free(ptr);
+}
+
+void *qemu_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+void *qemu_mallocz(size_t size)
+{
+ void *ptr;
+ ptr = qemu_malloc(size);
+ if (!ptr)
+ return NULL;
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+void *qemu_vmalloc(size_t size)
+{
+ return memalign(4096, size);
+}
+
+void qemu_vfree(void *ptr)
+{
+ free(ptr);
+}
+
+void qemu_printf(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+/* XXX: this is a bug in helper2.c */
+int errno;
+
+/**********************************************/
+
+#define COM_BASE_ADDR 0x10100
+
+static void usage(void)
+{
+ printf("qruncom version 0.1 (c) 2003 Fabrice Bellard\n"
+ "usage: qruncom file.com\n"
+ "user mode libqemu demo: run simple .com DOS executables\n");
+ exit(1);
+}
+
+static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
+{
+ return (uint8_t *)((seg << 4) + (reg & 0xffff));
+}
+
+static inline void pushw(CPUState *env, int val)
+{
+ env->regs[R_ESP] = (env->regs[R_ESP] & ~0xffff) | ((env->regs[R_ESP] - 2) & 0xffff);
+ *(uint16_t *)seg_to_linear(env->segs[R_SS].selector, env->regs[R_ESP]) = val;
+}
+
+static void host_segv_handler(int host_signum, siginfo_t *info,
+ void *puc)
+{
+ if (cpu_signal_handler(host_signum, info, puc)) {
+ return;
+ }
+ abort();
+}
+
+int main(int argc, char **argv)
+{
+ uint8_t *vm86_mem;
+ const char *filename;
+ int fd, ret, seg;
+ CPUState *env;
+
+ if (argc != 2)
+ usage();
+ filename = argv[1];
+
+ vm86_mem = mmap((void *)0x00000000, 0x110000,
+ PROT_WRITE | PROT_READ | PROT_EXEC,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (vm86_mem == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* load the MSDOS .com executable */
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ perror(filename);
+ exit(1);
+ }
+ ret = read(fd, vm86_mem + COM_BASE_ADDR, 65536 - 256);
+ if (ret < 0) {
+ perror("read");
+ exit(1);
+ }
+ close(fd);
+
+ /* install exception handler for CPU emulator */
+ {
+ struct sigaction act;
+
+ sigfillset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ // act.sa_flags |= SA_ONSTACK;
+
+ act.sa_sigaction = host_segv_handler;
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+ }
+
+ // cpu_set_log(CPU_LOG_TB_IN_ASM | CPU_LOG_TB_OUT_ASM | CPU_LOG_EXEC);
+
+ env = cpu_init("qemu32");
+
+ cpu_x86_set_cpl(env, 3);
+
+ env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
+ /* NOTE: hflags duplicates some of the virtual CPU state */
+ env->hflags |= HF_PE_MASK | VM_MASK;
+
+ /* flags setup : we activate the IRQs by default as in user
+ mode. We also activate the VM86 flag to run DOS code */
+ env->eflags |= IF_MASK | VM_MASK;
+
+ /* init basic registers */
+ env->eip = 0x100;
+ env->regs[R_ESP] = 0xfffe;
+ seg = (COM_BASE_ADDR - 0x100) >> 4;
+
+ cpu_x86_load_seg_cache(env, R_CS, seg,
+ (seg << 4), 0xffff, 0);
+ cpu_x86_load_seg_cache(env, R_SS, seg,
+ (seg << 4), 0xffff, 0);
+ cpu_x86_load_seg_cache(env, R_DS, seg,
+ (seg << 4), 0xffff, 0);
+ cpu_x86_load_seg_cache(env, R_ES, seg,
+ (seg << 4), 0xffff, 0);
+ cpu_x86_load_seg_cache(env, R_FS, seg,
+ (seg << 4), 0xffff, 0);
+ cpu_x86_load_seg_cache(env, R_GS, seg,
+ (seg << 4), 0xffff, 0);
+
+ /* exception support */
+ env->idt.base = (unsigned long)idt_table;
+ env->idt.limit = sizeof(idt_table) - 1;
+ set_idt(0, 0);
+ set_idt(1, 0);
+ set_idt(2, 0);
+ set_idt(3, 3);
+ set_idt(4, 3);
+ set_idt(5, 3);
+ set_idt(6, 0);
+ set_idt(7, 0);
+ set_idt(8, 0);
+ set_idt(9, 0);
+ set_idt(10, 0);
+ set_idt(11, 0);
+ set_idt(12, 0);
+ set_idt(13, 0);
+ set_idt(14, 0);
+ set_idt(15, 0);
+ set_idt(16, 0);
+ set_idt(17, 0);
+ set_idt(18, 0);
+ set_idt(19, 0);
+
+ /* put return code */
+ *seg_to_linear(env->segs[R_CS].selector, 0) = 0xb4; /* mov ah, $0 */
+ *seg_to_linear(env->segs[R_CS].selector, 1) = 0x00;
+ *seg_to_linear(env->segs[R_CS].selector, 2) = 0xcd; /* int $0x21 */
+ *seg_to_linear(env->segs[R_CS].selector, 3) = 0x21;
+ pushw(env, 0x0000);
+
+ /* the value of these registers seem to be assumed by pi_10.com */
+ env->regs[R_ESI] = 0x100;
+ env->regs[R_ECX] = 0xff;
+ env->regs[R_EBP] = 0x0900;
+ env->regs[R_EDI] = 0xfffe;
+
+ /* inform the emulator of the mmaped memory */
+ page_set_flags(0x00000000, 0x110000,
+ PAGE_WRITE | PAGE_READ | PAGE_EXEC | PAGE_VALID);
+
+ for(;;) {
+ ret = cpu_x86_exec(env);
+ switch(ret) {
+ case EXCP0D_GPF:
+ {
+ int int_num, ah;
+ int_num = *(uint8_t *)(env->segs[R_CS].base + env->eip + 1);
+ if (int_num != 0x21)
+ goto unknown_int;
+ ah = (env->regs[R_EAX] >> 8) & 0xff;
+ switch(ah) {
+ case 0x00: /* exit */
+ exit(0);
+ case 0x02: /* write char */
+ {
+ uint8_t c = env->regs[R_EDX];
+ write(1, &c, 1);
+ }
+ break;
+ case 0x09: /* write string */
+ {
+ uint8_t c;
+ for(;;) {
+ c = *seg_to_linear(env->segs[R_DS].selector, env->regs[R_EAX]);
+ if (c == '$')
+ break;
+ write(1, &c, 1);
+ }
+ env->regs[R_EAX] = (env->regs[R_EAX] & ~0xff) | '$';
+ }
+ break;
+ default:
+ unknown_int:
+ fprintf(stderr, "unsupported int 0x%02x\n", int_num);
+ cpu_dump_state(env, stderr, fprintf, 0);
+ // exit(1);
+ }
+ env->eip += 2;
+ }
+ break;
+ default:
+ fprintf(stderr, "unhandled cpu_exec return code (0x%x)\n", ret);
+ cpu_dump_state(env, stderr, fprintf, 0);
+ exit(1);
+ }
+ }
+}
diff --git a/src/recompiler/tests/runcom.c b/src/recompiler/tests/runcom.c
new file mode 100644
index 00000000..63805666
--- /dev/null
+++ b/src/recompiler/tests/runcom.c
@@ -0,0 +1,195 @@
+/*
+ * Simple example of use of vm86: launch a basic .com DOS executable
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <signal.h>
+
+#include <linux/unistd.h>
+#include <asm/vm86.h>
+
+//#define SIGTEST
+
+#undef __syscall_return
+#define __syscall_return(type, res) \
+do { \
+ return (type) (res); \
+} while (0)
+
+_syscall2(int, vm86, int, func, struct vm86plus_struct *, v86)
+
+#define COM_BASE_ADDR 0x10100
+
+static void usage(void)
+{
+ printf("runcom version 0.1 (c) 2003 Fabrice Bellard\n"
+ "usage: runcom file.com\n"
+ "VM86 Run simple .com DOS executables (linux vm86 test mode)\n");
+ exit(1);
+}
+
+static inline void set_bit(uint8_t *a, unsigned int bit)
+{
+ a[bit / 8] |= (1 << (bit % 8));
+}
+
+static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
+{
+ return (uint8_t *)((seg << 4) + (reg & 0xffff));
+}
+
+static inline void pushw(struct vm86_regs *r, int val)
+{
+ r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff);
+ *(uint16_t *)seg_to_linear(r->ss, r->esp) = val;
+}
+
+void dump_regs(struct vm86_regs *r)
+{
+ fprintf(stderr,
+ "EAX=%08lx EBX=%08lx ECX=%08lx EDX=%08lx\n"
+ "ESI=%08lx EDI=%08lx EBP=%08lx ESP=%08lx\n"
+ "EIP=%08lx EFL=%08lx\n"
+ "CS=%04x DS=%04x ES=%04x SS=%04x FS=%04x GS=%04x\n",
+ r->eax, r->ebx, r->ecx, r->edx, r->esi, r->edi, r->ebp, r->esp,
+ r->eip, r->eflags,
+ r->cs, r->ds, r->es, r->ss, r->fs, r->gs);
+}
+
+#ifdef SIGTEST
+void alarm_handler(int sig)
+{
+ fprintf(stderr, "alarm signal=%d\n", sig);
+ alarm(1);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ uint8_t *vm86_mem;
+ const char *filename;
+ int fd, ret, seg;
+ struct vm86plus_struct ctx;
+ struct vm86_regs *r;
+
+ if (argc != 2)
+ usage();
+ filename = argv[1];
+
+ vm86_mem = mmap((void *)0x00000000, 0x110000,
+ PROT_WRITE | PROT_READ | PROT_EXEC,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (vm86_mem == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+#ifdef SIGTEST
+ {
+ struct sigaction act;
+
+ act.sa_handler = alarm_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGALRM, &act, NULL);
+ alarm(1);
+ }
+#endif
+
+ /* load the MSDOS .com executable */
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ perror(filename);
+ exit(1);
+ }
+ ret = read(fd, vm86_mem + COM_BASE_ADDR, 65536 - 256);
+ if (ret < 0) {
+ perror("read");
+ exit(1);
+ }
+ close(fd);
+
+ memset(&ctx, 0, sizeof(ctx));
+ /* init basic registers */
+ r = &ctx.regs;
+ r->eip = 0x100;
+ r->esp = 0xfffe;
+ seg = (COM_BASE_ADDR - 0x100) >> 4;
+ r->cs = seg;
+ r->ss = seg;
+ r->ds = seg;
+ r->es = seg;
+ r->fs = seg;
+ r->gs = seg;
+ r->eflags = VIF_MASK;
+
+ /* put return code */
+ set_bit((uint8_t *)&ctx.int_revectored, 0x21);
+ *seg_to_linear(r->cs, 0) = 0xb4; /* mov ah, $0 */
+ *seg_to_linear(r->cs, 1) = 0x00;
+ *seg_to_linear(r->cs, 2) = 0xcd; /* int $0x21 */
+ *seg_to_linear(r->cs, 3) = 0x21;
+ pushw(&ctx.regs, 0x0000);
+
+ /* the value of these registers seem to be assumed by pi_10.com */
+ r->esi = 0x100;
+ r->ecx = 0xff;
+ r->ebp = 0x0900;
+ r->edi = 0xfffe;
+
+ for(;;) {
+ ret = vm86(VM86_ENTER, &ctx);
+ switch(VM86_TYPE(ret)) {
+ case VM86_INTx:
+ {
+ int int_num, ah;
+
+ int_num = VM86_ARG(ret);
+ if (int_num != 0x21)
+ goto unknown_int;
+ ah = (r->eax >> 8) & 0xff;
+ switch(ah) {
+ case 0x00: /* exit */
+ exit(0);
+ case 0x02: /* write char */
+ {
+ uint8_t c = r->edx;
+ write(1, &c, 1);
+ }
+ break;
+ case 0x09: /* write string */
+ {
+ uint8_t c;
+ for(;;) {
+ c = *seg_to_linear(r->ds, r->edx);
+ if (c == '$')
+ break;
+ write(1, &c, 1);
+ }
+ r->eax = (r->eax & ~0xff) | '$';
+ }
+ break;
+ default:
+ unknown_int:
+ fprintf(stderr, "unsupported int 0x%02x\n", int_num);
+ dump_regs(&ctx.regs);
+ // exit(1);
+ }
+ }
+ break;
+ case VM86_SIGNAL:
+ /* a signal came, we just ignore that */
+ break;
+ case VM86_STI:
+ break;
+ default:
+ fprintf(stderr, "unhandled vm86 return code (0x%x)\n", ret);
+ dump_regs(&ctx.regs);
+ exit(1);
+ }
+ }
+}
diff --git a/src/recompiler/tests/sha1.c b/src/recompiler/tests/sha1.c
new file mode 100644
index 00000000..93b7c8e8
--- /dev/null
+++ b/src/recompiler/tests/sha1.c
@@ -0,0 +1,240 @@
+
+/* from valgrind tests */
+
+/* ================ sha1.c ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+ A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+ 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+ 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
+/* #define SHA1HANDSOFF * Copies data before messing with it. */
+
+#define SHA1HANDSOFF
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+/* ================ sha1.h ================ */
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+*/
+
+typedef struct {
+ uint32_t state[5];
+ uint32_t count[2];
+ unsigned char buffer[64];
+} SHA1_CTX;
+
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);
+void SHA1Init(SHA1_CTX* context);
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len);
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
+/* ================ end of sha1.h ================ */
+#include <endian.h>
+
+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
+ |(rol(block->l[i],8)&0x00FF00FF))
+#elif BYTE_ORDER == BIG_ENDIAN
+#define blk0(i) block->l[i]
+#else
+#error "Endianness not defined!"
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
+ ^block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
+{
+uint32_t a, b, c, d, e;
+typedef union {
+ unsigned char c[64];
+ uint32_t l[16];
+} CHAR64LONG16;
+#ifdef SHA1HANDSOFF
+CHAR64LONG16 block[1]; /* use array to appear as a pointer */
+ memcpy(block, buffer, 64);
+#else
+ /* The following had better never be used because it causes the
+ * pointer-to-const buffer to be cast into a pointer to non-const.
+ * And the result is written through. I threw a "const" in, hoping
+ * this will cause a diagnostic.
+ */
+CHAR64LONG16* block = (const CHAR64LONG16*)buffer;
+#endif
+ /* Copy context->state[] to working vars */
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ /* 4 rounds of 20 operations each. Loop unrolled. */
+ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+ R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+ R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+ R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+ R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+ R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+ R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+ R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+ R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+ R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+ R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+ R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+ R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+ R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+ R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+ R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+ R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+ R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+ R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+ R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+ /* Add the working vars back into context.state[] */
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ /* Wipe variables */
+ a = b = c = d = e = 0;
+#ifdef SHA1HANDSOFF
+ memset(block, '\0', sizeof(block));
+#endif
+}
+
+
+/* SHA1Init - Initialize new context */
+
+void SHA1Init(SHA1_CTX* context)
+{
+ /* SHA1 initialization constants */
+ context->state[0] = 0x67452301;
+ context->state[1] = 0xEFCDAB89;
+ context->state[2] = 0x98BADCFE;
+ context->state[3] = 0x10325476;
+ context->state[4] = 0xC3D2E1F0;
+ context->count[0] = context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len)
+{
+uint32_t i;
+uint32_t j;
+
+ j = context->count[0];
+ if ((context->count[0] += len << 3) < j)
+ context->count[1]++;
+ context->count[1] += (len>>29);
+ j = (j >> 3) & 63;
+ if ((j + len) > 63) {
+ memcpy(&context->buffer[j], data, (i = 64-j));
+ SHA1Transform(context->state, context->buffer);
+ for ( ; i + 63 < len; i += 64) {
+ SHA1Transform(context->state, &data[i]);
+ }
+ j = 0;
+ }
+ else i = 0;
+ memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+
+void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
+{
+unsigned i;
+unsigned char finalcount[8];
+unsigned char c;
+
+#if 0 /* untested "improvement" by DHR */
+ /* Convert context->count to a sequence of bytes
+ * in finalcount. Second element first, but
+ * big-endian order within element.
+ * But we do it all backwards.
+ */
+ unsigned char *fcp = &finalcount[8];
+
+ for (i = 0; i < 2; i++)
+ {
+ uint32_t t = context->count[i];
+ int j;
+
+ for (j = 0; j < 4; t >>= 8, j++)
+ *--fcp = (unsigned char) t;
+ }
+#else
+ for (i = 0; i < 8; i++) {
+ finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+ >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
+ }
+#endif
+ c = 0200;
+ SHA1Update(context, &c, 1);
+ while ((context->count[0] & 504) != 448) {
+ c = 0000;
+ SHA1Update(context, &c, 1);
+ }
+ SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
+ for (i = 0; i < 20; i++) {
+ digest[i] = (unsigned char)
+ ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+ }
+ /* Wipe variables */
+ memset(context, '\0', sizeof(*context));
+ memset(&finalcount, '\0', sizeof(finalcount));
+}
+/* ================ end of sha1.c ================ */
+
+#define BUFSIZE 4096
+
+int
+main(int argc, char **argv)
+{
+ SHA1_CTX ctx;
+ unsigned char hash[20], buf[BUFSIZE];
+ int i;
+
+ for(i=0;i<BUFSIZE;i++)
+ buf[i] = i;
+
+ SHA1Init(&ctx);
+ for(i=0;i<1000;i++)
+ SHA1Update(&ctx, buf, BUFSIZE);
+ SHA1Final(hash, &ctx);
+
+ printf("SHA1=");
+ for(i=0;i<20;i++)
+ printf("%02x", hash[i]);
+ printf("\n");
+ return 0;
+}
diff --git a/src/recompiler/tests/test-i386-code16.S b/src/recompiler/tests/test-i386-code16.S
new file mode 100644
index 00000000..816c24b9
--- /dev/null
+++ b/src/recompiler/tests/test-i386-code16.S
@@ -0,0 +1,79 @@
+ .code16
+ .globl code16_start
+ .globl code16_end
+
+CS_SEG = 0xf
+
+code16_start:
+
+ .globl code16_func1
+
+ /* basic test */
+code16_func1 = . - code16_start
+ mov $1, %eax
+ data32 lret
+
+/* test push/pop in 16 bit mode */
+ .globl code16_func2
+code16_func2 = . - code16_start
+ xor %eax, %eax
+ mov $0x12345678, %ebx
+ movl %esp, %ecx
+ push %bx
+ subl %esp, %ecx
+ pop %ax
+ data32 lret
+
+/* test various jmp opcodes */
+ .globl code16_func3
+code16_func3 = . - code16_start
+ jmp 1f
+ nop
+1:
+ mov $4, %eax
+ mov $0x12345678, %ebx
+ xor %bx, %bx
+ jz 2f
+ add $2, %ax
+2:
+
+ call myfunc
+
+ lcall $CS_SEG, $(myfunc2 - code16_start)
+
+ ljmp $CS_SEG, $(myjmp1 - code16_start)
+myjmp1_next:
+
+ cs lcall *myfunc2_addr - code16_start
+
+ cs ljmp *myjmp2_addr - code16_start
+myjmp2_next:
+
+ data32 lret
+
+myfunc2_addr:
+ .short myfunc2 - code16_start
+ .short CS_SEG
+
+myjmp2_addr:
+ .short myjmp2 - code16_start
+ .short CS_SEG
+
+myjmp1:
+ add $8, %ax
+ jmp myjmp1_next
+
+myjmp2:
+ add $16, %ax
+ jmp myjmp2_next
+
+myfunc:
+ add $1, %ax
+ ret
+
+myfunc2:
+ add $4, %ax
+ lret
+
+
+code16_end:
diff --git a/src/recompiler/tests/test-i386-muldiv.h b/src/recompiler/tests/test-i386-muldiv.h
new file mode 100644
index 00000000..015f59e1
--- /dev/null
+++ b/src/recompiler/tests/test-i386-muldiv.h
@@ -0,0 +1,76 @@
+
+void glue(glue(test_, OP), b)(long op0, long op1)
+{
+ long res, s1, s0, flags;
+ s0 = op0;
+ s1 = op1;
+ res = s0;
+ flags = 0;
+ asm ("push %4\n\t"
+ "popf\n\t"
+ stringify(OP)"b %b2\n\t"
+ "pushf\n\t"
+ "pop %1\n\t"
+ : "=a" (res), "=g" (flags)
+ : "q" (s1), "0" (res), "1" (flags));
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+ stringify(OP) "b", s0, s1, res, flags & CC_MASK);
+}
+
+void glue(glue(test_, OP), w)(long op0h, long op0, long op1)
+{
+ long res, s1, flags, resh;
+ s1 = op1;
+ resh = op0h;
+ res = op0;
+ flags = 0;
+ asm ("push %5\n\t"
+ "popf\n\t"
+ stringify(OP) "w %w3\n\t"
+ "pushf\n\t"
+ "pop %1\n\t"
+ : "=a" (res), "=g" (flags), "=d" (resh)
+ : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+ printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n",
+ stringify(OP) "w", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+
+void glue(glue(test_, OP), l)(long op0h, long op0, long op1)
+{
+ long res, s1, flags, resh;
+ s1 = op1;
+ resh = op0h;
+ res = op0;
+ flags = 0;
+ asm ("push %5\n\t"
+ "popf\n\t"
+ stringify(OP) "l %k3\n\t"
+ "pushf\n\t"
+ "pop %1\n\t"
+ : "=a" (res), "=g" (flags), "=d" (resh)
+ : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+ printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n",
+ stringify(OP) "l", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+
+#if defined(__x86_64__)
+void glue(glue(test_, OP), q)(long op0h, long op0, long op1)
+{
+ long res, s1, flags, resh;
+ s1 = op1;
+ resh = op0h;
+ res = op0;
+ flags = 0;
+ asm ("push %5\n\t"
+ "popf\n\t"
+ stringify(OP) "q %3\n\t"
+ "pushf\n\t"
+ "pop %1\n\t"
+ : "=a" (res), "=g" (flags), "=d" (resh)
+ : "q" (s1), "0" (res), "1" (flags), "2" (resh));
+ printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n",
+ stringify(OP) "q", op0h, op0, s1, resh, res, flags & CC_MASK);
+}
+#endif
+
+#undef OP
diff --git a/src/recompiler/tests/test-i386-shift.h b/src/recompiler/tests/test-i386-shift.h
new file mode 100644
index 00000000..3d8f84bf
--- /dev/null
+++ b/src/recompiler/tests/test-i386-shift.h
@@ -0,0 +1,185 @@
+
+#define exec_op glue(exec_, OP)
+#define exec_opq glue(glue(exec_, OP), q)
+#define exec_opl glue(glue(exec_, OP), l)
+#define exec_opw glue(glue(exec_, OP), w)
+#define exec_opb glue(glue(exec_, OP), b)
+
+#ifndef OP_SHIFTD
+
+#ifdef OP_NOBYTE
+#define EXECSHIFT(size, rsize, res, s1, s2, flags) \
+ asm ("push %4\n\t"\
+ "popf\n\t"\
+ stringify(OP) size " %" rsize "2, %" rsize "0\n\t" \
+ "pushf\n\t"\
+ "pop %1\n\t"\
+ : "=g" (res), "=g" (flags)\
+ : "r" (s1), "0" (res), "1" (flags));
+#else
+#define EXECSHIFT(size, rsize, res, s1, s2, flags) \
+ asm ("push %4\n\t"\
+ "popf\n\t"\
+ stringify(OP) size " %%cl, %" rsize "0\n\t" \
+ "pushf\n\t"\
+ "pop %1\n\t"\
+ : "=q" (res), "=g" (flags)\
+ : "c" (s1), "0" (res), "1" (flags));
+#endif
+
+#if defined(__x86_64__)
+void exec_opq(long s2, long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("q", "", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+ stringify(OP) "q", s0, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+
+void exec_opl(long s2, long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("l", "k", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+ stringify(OP) "l", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+void exec_opw(long s2, long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("w", "w", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+ stringify(OP) "w", s0, s1, res, iflags, flags & CC_MASK);
+}
+
+#else
+#define EXECSHIFT(size, rsize, res, s1, s2, flags) \
+ asm ("push %4\n\t"\
+ "popf\n\t"\
+ stringify(OP) size " %%cl, %" rsize "5, %" rsize "0\n\t" \
+ "pushf\n\t"\
+ "pop %1\n\t"\
+ : "=g" (res), "=g" (flags)\
+ : "c" (s1), "0" (res), "1" (flags), "r" (s2));
+
+#if defined(__x86_64__)
+void exec_opq(long s2, long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("q", "", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+ stringify(OP) "q", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+
+void exec_opl(long s2, long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("l", "k", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+ stringify(OP) "l", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+
+void exec_opw(long s2, long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("w", "w", res, s1, s2, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+ stringify(OP) "w", s0, s2, s1, res, iflags, flags & CC_MASK);
+}
+
+#endif
+
+#ifndef OP_NOBYTE
+void exec_opb(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECSHIFT("b", "b", res, s1, 0, flags);
+ /* overflow is undefined if count != 1 */
+ if (s1 != 1)
+ flags &= ~CC_O;
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n",
+ stringify(OP) "b", s0, s1, res, iflags, flags & CC_MASK);
+}
+#endif
+
+void exec_op(long s2, long s0, long s1)
+{
+ s2 = i2l(s2);
+ s0 = i2l(s0);
+#if defined(__x86_64__)
+ exec_opq(s2, s0, s1, 0);
+#endif
+ exec_opl(s2, s0, s1, 0);
+#ifdef OP_SHIFTD
+ exec_opw(s2, s0, s1, 0);
+#else
+ exec_opw(s2, s0, s1, 0);
+#endif
+#ifndef OP_NOBYTE
+ exec_opb(s0, s1, 0);
+#endif
+#ifdef OP_CC
+#if defined(__x86_64__)
+ exec_opq(s2, s0, s1, CC_C);
+#endif
+ exec_opl(s2, s0, s1, CC_C);
+ exec_opw(s2, s0, s1, CC_C);
+ exec_opb(s0, s1, CC_C);
+#endif
+}
+
+void glue(test_, OP)(void)
+{
+ int i, n;
+#if defined(__x86_64__)
+ n = 64;
+#else
+ n = 32;
+#endif
+ for(i = 0; i < n; i++)
+ exec_op(0x21ad3d34, 0x12345678, i);
+ for(i = 0; i < n; i++)
+ exec_op(0x813f3421, 0x82345679, i);
+}
+
+void *glue(_test_, OP) __init_call = glue(test_, OP);
+
+#undef OP
+#undef OP_CC
+#undef OP_SHIFTD
+#undef OP_NOBYTE
+#undef EXECSHIFT
diff --git a/src/recompiler/tests/test-i386-ssse3.c b/src/recompiler/tests/test-i386-ssse3.c
new file mode 100644
index 00000000..0a42bd03
--- /dev/null
+++ b/src/recompiler/tests/test-i386-ssse3.c
@@ -0,0 +1,57 @@
+/* See if various MMX/SSE SSSE3 instructions give expected results */
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+int main(int argc, char *argv[]) {
+ char hello[16];
+ const char ehlo[8] = "EHLO ";
+ uint64_t mask = 0x8080800302020001;
+
+ uint64_t a = 0x0000000000090007;
+ uint64_t b = 0x0000000000000000;
+ uint32_t c;
+ uint16_t d;
+
+ const char e[16] = "LLOaaaaaaaaaaaaa";
+ const char f[16] = "aaaaaaaaaaaaaaHE";
+
+ /* pshufb mm1/xmm1, mm2/xmm2 */
+ asm volatile ("movq (%0), %%mm0" : : "r" (ehlo) : "mm0", "mm1");
+ asm volatile ("movq %0, %%mm1" : : "m" (mask));
+ asm volatile ("pshufb %mm1, %mm0");
+ asm volatile ("movq %%mm0, %0" : "=m" (hello));
+ printf("%s\n", hello);
+
+ /* pshufb mm1/xmm1, m64/m128 */
+ asm volatile ("movq (%0), %%mm0" : : "r" (ehlo) : "mm0");
+ asm volatile ("pshufb %0, %%mm0" : : "m" (mask));
+ asm volatile ("movq %%mm0, %0" : "=m" (hello));
+ printf("%s\n", hello);
+
+ /* psubsw mm1/xmm1, m64/m128 */
+ asm volatile ("movq %0, %%mm0" : : "r" (a) : "mm0");
+ asm volatile ("phsubsw %0, %%mm0" : : "m" (b));
+ asm volatile ("movq %%mm0, %0" : "=m" (a));
+ printf("%i - %i = %i\n", 9, 7, -(int16_t) a);
+
+ /* palignr mm1/xmm1, m64/m128, imm8 */
+ asm volatile ("movdqa (%0), %%xmm0" : : "r" (e) : "xmm0");
+ asm volatile ("palignr $14, (%0), %%xmm0" : : "r" (f));
+ asm volatile ("movdqa %%xmm0, (%0)" : : "r" (hello));
+ printf("%5.5s\n", hello);
+
+#if 1 /* SSE4 */
+ /* popcnt r64, r/m64 */
+ asm volatile ("movq $0x8421000010009c63, %%rax" : : : "rax");
+ asm volatile ("popcnt %%ax, %%dx" : : : "dx");
+ asm volatile ("popcnt %%eax, %%ecx" : : : "ecx");
+ asm volatile ("popcnt %rax, %rax");
+ asm volatile ("movq %%rax, %0" : "=m" (a));
+ asm volatile ("movl %%ecx, %0" : "=m" (c));
+ asm volatile ("movw %%dx, %0" : "=m" (d));
+ printf("%i = %i\n%i = %i = %i\n", 13, (int) a, 9, c, d + 1);
+#endif
+
+ return 0;
+}
diff --git a/src/recompiler/tests/test-i386-vm86.S b/src/recompiler/tests/test-i386-vm86.S
new file mode 100644
index 00000000..3bb96c99
--- /dev/null
+++ b/src/recompiler/tests/test-i386-vm86.S
@@ -0,0 +1,103 @@
+ .code16
+ .globl vm86_code_start
+ .globl vm86_code_end
+
+#define GET_OFFSET(x) ((x) - vm86_code_start + 0x100)
+
+vm86_code_start:
+ movw $GET_OFFSET(hello_world), %dx
+ movb $0x09, %ah
+ int $0x21
+
+ /* prepare int 0x90 vector */
+ xorw %ax, %ax
+ movw %ax, %es
+ es movw $GET_OFFSET(int90_test), 0x90 * 4
+ es movw %cs, 0x90 * 4 + 2
+
+ /* launch int 0x90 */
+
+ int $0x90
+
+ /* test IF support */
+ movw $GET_OFFSET(IF_msg), %dx
+ movb $0x09, %ah
+ int $0x21
+
+ pushf
+ popw %dx
+ movb $0xff, %ah
+ int $0x21
+
+ cli
+ pushf
+ popw %dx
+ movb $0xff, %ah
+ int $0x21
+
+ sti
+ pushfl
+ popl %edx
+ movb $0xff, %ah
+ int $0x21
+
+#if 0
+ movw $GET_OFFSET(IF_msg1), %dx
+ movb $0x09, %ah
+ int $0x21
+
+ pushf
+ movw %sp, %bx
+ andw $~0x200, (%bx)
+ popf
+#else
+ cli
+#endif
+
+ pushf
+ popw %dx
+ movb $0xff, %ah
+ int $0x21
+
+ pushfl
+ movw %sp, %bx
+ orw $0x200, (%bx)
+ popfl
+
+ pushfl
+ popl %edx
+ movb $0xff, %ah
+ int $0x21
+
+ movb $0x00, %ah
+ int $0x21
+
+int90_test:
+ pushf
+ pop %dx
+ movb $0xff, %ah
+ int $0x21
+
+ movw %sp, %bx
+ movw 4(%bx), %dx
+ movb $0xff, %ah
+ int $0x21
+
+ movw $GET_OFFSET(int90_msg), %dx
+ movb $0x09, %ah
+ int $0x21
+ iret
+
+int90_msg:
+ .string "INT90 started\n$"
+
+hello_world:
+ .string "Hello VM86 world\n$"
+
+IF_msg:
+ .string "VM86 IF test\n$"
+
+IF_msg1:
+ .string "If you see a diff here, your Linux kernel is buggy, please update to 2.4.20 kernel\n$"
+
+vm86_code_end:
diff --git a/src/recompiler/tests/test-i386.c b/src/recompiler/tests/test-i386.c
new file mode 100644
index 00000000..c8f21a95
--- /dev/null
+++ b/src/recompiler/tests/test-i386.c
@@ -0,0 +1,2769 @@
+/*
+ * x86 CPU test
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle GPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the General Public License version 2 (GPLv2) at this time for any software where
+ * a choice of GPL license versions is made available with the language indicating
+ * that GPLv2 or any later version may be used, or where a choice of which version
+ * of the GPL is applied is otherwise unspecified.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <errno.h>
+#include <sys/ucontext.h>
+#include <sys/mman.h>
+
+#if !defined(__x86_64__)
+//#define TEST_VM86
+#define TEST_SEGS
+#endif
+//#define LINUX_VM86_IOPL_FIX
+//#define TEST_P4_FLAGS
+#ifdef __SSE__
+#define TEST_SSE
+#define TEST_CMOV 1
+#define TEST_FCOMI 1
+#else
+#undef TEST_SSE
+#define TEST_CMOV 1
+#define TEST_FCOMI 1
+#endif
+
+#if defined(__x86_64__)
+#define FMT64X "%016lx"
+#define FMTLX "%016lx"
+#define X86_64_ONLY(x) x
+#else
+#define FMT64X "%016" PRIx64
+#define FMTLX "%08lx"
+#define X86_64_ONLY(x)
+#endif
+
+#ifdef TEST_VM86
+#include <asm/vm86.h>
+#endif
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+
+#define CC_C 0x0001
+#define CC_P 0x0004
+#define CC_A 0x0010
+#define CC_Z 0x0040
+#define CC_S 0x0080
+#define CC_O 0x0800
+
+#define __init_call __attribute__ ((unused,__section__ ("initcall")))
+
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
+
+#if defined(__x86_64__)
+static inline long i2l(long v)
+{
+ return v | ((v ^ 0xabcd) << 32);
+}
+#else
+static inline long i2l(long v)
+{
+ return v;
+}
+#endif
+
+#define OP add
+#include "test-i386.h"
+
+#define OP sub
+#include "test-i386.h"
+
+#define OP xor
+#include "test-i386.h"
+
+#define OP and
+#include "test-i386.h"
+
+#define OP or
+#include "test-i386.h"
+
+#define OP cmp
+#include "test-i386.h"
+
+#define OP adc
+#define OP_CC
+#include "test-i386.h"
+
+#define OP sbb
+#define OP_CC
+#include "test-i386.h"
+
+#define OP inc
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#define OP dec
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#define OP neg
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#define OP not
+#define OP_CC
+#define OP1
+#include "test-i386.h"
+
+#undef CC_MASK
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
+
+#define OP shl
+#include "test-i386-shift.h"
+
+#define OP shr
+#include "test-i386-shift.h"
+
+#define OP sar
+#include "test-i386-shift.h"
+
+#define OP rol
+#include "test-i386-shift.h"
+
+#define OP ror
+#include "test-i386-shift.h"
+
+#define OP rcr
+#define OP_CC
+#include "test-i386-shift.h"
+
+#define OP rcl
+#define OP_CC
+#include "test-i386-shift.h"
+
+#define OP shld
+#define OP_SHIFTD
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP shrd
+#define OP_SHIFTD
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+/* XXX: should be more precise ? */
+#undef CC_MASK
+#define CC_MASK (CC_C)
+
+#define OP bt
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP bts
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP btr
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+#define OP btc
+#define OP_NOBYTE
+#include "test-i386-shift.h"
+
+/* lea test (modrm support) */
+#define TEST_LEAQ(STR)\
+{\
+ asm("lea " STR ", %0"\
+ : "=r" (res)\
+ : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
+ printf("lea %s = " FMTLX "\n", STR, res);\
+}
+
+#define TEST_LEA(STR)\
+{\
+ asm("lea " STR ", %0"\
+ : "=r" (res)\
+ : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
+ printf("lea %s = " FMTLX "\n", STR, res);\
+}
+
+#define TEST_LEA16(STR)\
+{\
+ asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
+ : "=wq" (res)\
+ : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
+ printf("lea %s = %08lx\n", STR, res);\
+}
+
+
+void test_lea(void)
+{
+ long eax, ebx, ecx, edx, esi, edi, res;
+ eax = i2l(0x0001);
+ ebx = i2l(0x0002);
+ ecx = i2l(0x0004);
+ edx = i2l(0x0008);
+ esi = i2l(0x0010);
+ edi = i2l(0x0020);
+
+ TEST_LEA("0x4000");
+
+ TEST_LEA("(%%eax)");
+ TEST_LEA("(%%ebx)");
+ TEST_LEA("(%%ecx)");
+ TEST_LEA("(%%edx)");
+ TEST_LEA("(%%esi)");
+ TEST_LEA("(%%edi)");
+
+ TEST_LEA("0x40(%%eax)");
+ TEST_LEA("0x40(%%ebx)");
+ TEST_LEA("0x40(%%ecx)");
+ TEST_LEA("0x40(%%edx)");
+ TEST_LEA("0x40(%%esi)");
+ TEST_LEA("0x40(%%edi)");
+
+ TEST_LEA("0x4000(%%eax)");
+ TEST_LEA("0x4000(%%ebx)");
+ TEST_LEA("0x4000(%%ecx)");
+ TEST_LEA("0x4000(%%edx)");
+ TEST_LEA("0x4000(%%esi)");
+ TEST_LEA("0x4000(%%edi)");
+
+ TEST_LEA("(%%eax, %%ecx)");
+ TEST_LEA("(%%ebx, %%edx)");
+ TEST_LEA("(%%ecx, %%ecx)");
+ TEST_LEA("(%%edx, %%ecx)");
+ TEST_LEA("(%%esi, %%ecx)");
+ TEST_LEA("(%%edi, %%ecx)");
+
+ TEST_LEA("0x40(%%eax, %%ecx)");
+ TEST_LEA("0x4000(%%ebx, %%edx)");
+
+ TEST_LEA("(%%ecx, %%ecx, 2)");
+ TEST_LEA("(%%edx, %%ecx, 4)");
+ TEST_LEA("(%%esi, %%ecx, 8)");
+
+ TEST_LEA("(,%%eax, 2)");
+ TEST_LEA("(,%%ebx, 4)");
+ TEST_LEA("(,%%ecx, 8)");
+
+ TEST_LEA("0x40(,%%eax, 2)");
+ TEST_LEA("0x40(,%%ebx, 4)");
+ TEST_LEA("0x40(,%%ecx, 8)");
+
+
+ TEST_LEA("-10(%%ecx, %%ecx, 2)");
+ TEST_LEA("-10(%%edx, %%ecx, 4)");
+ TEST_LEA("-10(%%esi, %%ecx, 8)");
+
+ TEST_LEA("0x4000(%%ecx, %%ecx, 2)");
+ TEST_LEA("0x4000(%%edx, %%ecx, 4)");
+ TEST_LEA("0x4000(%%esi, %%ecx, 8)");
+
+#if defined(__x86_64__)
+ TEST_LEAQ("0x4000");
+ TEST_LEAQ("0x4000(%%rip)");
+
+ TEST_LEAQ("(%%rax)");
+ TEST_LEAQ("(%%rbx)");
+ TEST_LEAQ("(%%rcx)");
+ TEST_LEAQ("(%%rdx)");
+ TEST_LEAQ("(%%rsi)");
+ TEST_LEAQ("(%%rdi)");
+
+ TEST_LEAQ("0x40(%%rax)");
+ TEST_LEAQ("0x40(%%rbx)");
+ TEST_LEAQ("0x40(%%rcx)");
+ TEST_LEAQ("0x40(%%rdx)");
+ TEST_LEAQ("0x40(%%rsi)");
+ TEST_LEAQ("0x40(%%rdi)");
+
+ TEST_LEAQ("0x4000(%%rax)");
+ TEST_LEAQ("0x4000(%%rbx)");
+ TEST_LEAQ("0x4000(%%rcx)");
+ TEST_LEAQ("0x4000(%%rdx)");
+ TEST_LEAQ("0x4000(%%rsi)");
+ TEST_LEAQ("0x4000(%%rdi)");
+
+ TEST_LEAQ("(%%rax, %%rcx)");
+ TEST_LEAQ("(%%rbx, %%rdx)");
+ TEST_LEAQ("(%%rcx, %%rcx)");
+ TEST_LEAQ("(%%rdx, %%rcx)");
+ TEST_LEAQ("(%%rsi, %%rcx)");
+ TEST_LEAQ("(%%rdi, %%rcx)");
+
+ TEST_LEAQ("0x40(%%rax, %%rcx)");
+ TEST_LEAQ("0x4000(%%rbx, %%rdx)");
+
+ TEST_LEAQ("(%%rcx, %%rcx, 2)");
+ TEST_LEAQ("(%%rdx, %%rcx, 4)");
+ TEST_LEAQ("(%%rsi, %%rcx, 8)");
+
+ TEST_LEAQ("(,%%rax, 2)");
+ TEST_LEAQ("(,%%rbx, 4)");
+ TEST_LEAQ("(,%%rcx, 8)");
+
+ TEST_LEAQ("0x40(,%%rax, 2)");
+ TEST_LEAQ("0x40(,%%rbx, 4)");
+ TEST_LEAQ("0x40(,%%rcx, 8)");
+
+
+ TEST_LEAQ("-10(%%rcx, %%rcx, 2)");
+ TEST_LEAQ("-10(%%rdx, %%rcx, 4)");
+ TEST_LEAQ("-10(%%rsi, %%rcx, 8)");
+
+ TEST_LEAQ("0x4000(%%rcx, %%rcx, 2)");
+ TEST_LEAQ("0x4000(%%rdx, %%rcx, 4)");
+ TEST_LEAQ("0x4000(%%rsi, %%rcx, 8)");
+#else
+ /* limited 16 bit addressing test */
+ TEST_LEA16("0x4000");
+ TEST_LEA16("(%%bx)");
+ TEST_LEA16("(%%si)");
+ TEST_LEA16("(%%di)");
+ TEST_LEA16("0x40(%%bx)");
+ TEST_LEA16("0x40(%%si)");
+ TEST_LEA16("0x40(%%di)");
+ TEST_LEA16("0x4000(%%bx)");
+ TEST_LEA16("0x4000(%%si)");
+ TEST_LEA16("(%%bx,%%si)");
+ TEST_LEA16("(%%bx,%%di)");
+ TEST_LEA16("0x40(%%bx,%%si)");
+ TEST_LEA16("0x40(%%bx,%%di)");
+ TEST_LEA16("0x4000(%%bx,%%si)");
+ TEST_LEA16("0x4000(%%bx,%%di)");
+#endif
+}
+
+#define TEST_JCC(JCC, v1, v2)\
+{\
+ int res;\
+ asm("movl $1, %0\n\t"\
+ "cmpl %2, %1\n\t"\
+ "j" JCC " 1f\n\t"\
+ "movl $0, %0\n\t"\
+ "1:\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2));\
+ printf("%-10s %d\n", "j" JCC, res);\
+\
+ asm("movl $0, %0\n\t"\
+ "cmpl %2, %1\n\t"\
+ "set" JCC " %b0\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2));\
+ printf("%-10s %d\n", "set" JCC, res);\
+ if (TEST_CMOV) {\
+ long val = i2l(1);\
+ long res = i2l(0x12345678);\
+X86_64_ONLY(\
+ asm("cmpl %2, %1\n\t"\
+ "cmov" JCC "q %3, %0\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2), "m" (val), "0" (res));\
+ printf("%-10s R=" FMTLX "\n", "cmov" JCC "q", res);)\
+ asm("cmpl %2, %1\n\t"\
+ "cmov" JCC "l %k3, %k0\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2), "m" (val), "0" (res));\
+ printf("%-10s R=" FMTLX "\n", "cmov" JCC "l", res);\
+ asm("cmpl %2, %1\n\t"\
+ "cmov" JCC "w %w3, %w0\n\t"\
+ : "=r" (res)\
+ : "r" (v1), "r" (v2), "r" (1), "0" (res));\
+ printf("%-10s R=" FMTLX "\n", "cmov" JCC "w", res);\
+ } \
+}
+
+/* various jump tests */
+void test_jcc(void)
+{
+ TEST_JCC("ne", 1, 1);
+ TEST_JCC("ne", 1, 0);
+
+ TEST_JCC("e", 1, 1);
+ TEST_JCC("e", 1, 0);
+
+ TEST_JCC("l", 1, 1);
+ TEST_JCC("l", 1, 0);
+ TEST_JCC("l", 1, -1);
+
+ TEST_JCC("le", 1, 1);
+ TEST_JCC("le", 1, 0);
+ TEST_JCC("le", 1, -1);
+
+ TEST_JCC("ge", 1, 1);
+ TEST_JCC("ge", 1, 0);
+ TEST_JCC("ge", -1, 1);
+
+ TEST_JCC("g", 1, 1);
+ TEST_JCC("g", 1, 0);
+ TEST_JCC("g", 1, -1);
+
+ TEST_JCC("b", 1, 1);
+ TEST_JCC("b", 1, 0);
+ TEST_JCC("b", 1, -1);
+
+ TEST_JCC("be", 1, 1);
+ TEST_JCC("be", 1, 0);
+ TEST_JCC("be", 1, -1);
+
+ TEST_JCC("ae", 1, 1);
+ TEST_JCC("ae", 1, 0);
+ TEST_JCC("ae", 1, -1);
+
+ TEST_JCC("a", 1, 1);
+ TEST_JCC("a", 1, 0);
+ TEST_JCC("a", 1, -1);
+
+
+ TEST_JCC("p", 1, 1);
+ TEST_JCC("p", 1, 0);
+
+ TEST_JCC("np", 1, 1);
+ TEST_JCC("np", 1, 0);
+
+ TEST_JCC("o", 0x7fffffff, 0);
+ TEST_JCC("o", 0x7fffffff, -1);
+
+ TEST_JCC("no", 0x7fffffff, 0);
+ TEST_JCC("no", 0x7fffffff, -1);
+
+ TEST_JCC("s", 0, 1);
+ TEST_JCC("s", 0, -1);
+ TEST_JCC("s", 0, 0);
+
+ TEST_JCC("ns", 0, 1);
+ TEST_JCC("ns", 0, -1);
+ TEST_JCC("ns", 0, 0);
+}
+
+#define TEST_LOOP(insn) \
+{\
+ for(i = 0; i < sizeof(ecx_vals) / sizeof(long); i++) {\
+ ecx = ecx_vals[i];\
+ for(zf = 0; zf < 2; zf++) {\
+ asm("test %2, %2\n\t"\
+ "movl $1, %0\n\t"\
+ insn " 1f\n\t" \
+ "movl $0, %0\n\t"\
+ "1:\n\t"\
+ : "=a" (res)\
+ : "c" (ecx), "b" (!zf)); \
+ printf("%-10s ECX=" FMTLX " ZF=%ld r=%d\n", insn, ecx, zf, res); \
+ }\
+ }\
+}
+
+void test_loop(void)
+{
+ long ecx, zf;
+ const long ecx_vals[] = {
+ 0,
+ 1,
+ 0x10000,
+ 0x10001,
+#if defined(__x86_64__)
+ 0x100000000L,
+ 0x100000001L,
+#endif
+ };
+ int i, res;
+
+#if !defined(__x86_64__)
+ TEST_LOOP("jcxz");
+ TEST_LOOP("loopw");
+ TEST_LOOP("loopzw");
+ TEST_LOOP("loopnzw");
+#endif
+
+ TEST_LOOP("jecxz");
+ TEST_LOOP("loopl");
+ TEST_LOOP("loopzl");
+ TEST_LOOP("loopnzl");
+}
+
+#undef CC_MASK
+#ifdef TEST_P4_FLAGS
+#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
+#else
+#define CC_MASK (CC_O | CC_C)
+#endif
+
+#define OP mul
+#include "test-i386-muldiv.h"
+
+#define OP imul
+#include "test-i386-muldiv.h"
+
+void test_imulw2(long op0, long op1)
+{
+ long res, s1, s0, flags;
+ s0 = op0;
+ s1 = op1;
+ res = s0;
+ flags = 0;
+ asm volatile ("push %4\n\t"
+ "popf\n\t"
+ "imulw %w2, %w0\n\t"
+ "pushf\n\t"
+ "pop %1\n\t"
+ : "=q" (res), "=g" (flags)
+ : "q" (s1), "0" (res), "1" (flags));
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+ "imulw", s0, s1, res, flags & CC_MASK);
+}
+
+void test_imull2(long op0, long op1)
+{
+ long res, s1, s0, flags;
+ s0 = op0;
+ s1 = op1;
+ res = s0;
+ flags = 0;
+ asm volatile ("push %4\n\t"
+ "popf\n\t"
+ "imull %k2, %k0\n\t"
+ "pushf\n\t"
+ "pop %1\n\t"
+ : "=q" (res), "=g" (flags)
+ : "q" (s1), "0" (res), "1" (flags));
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+ "imull", s0, s1, res, flags & CC_MASK);
+}
+
+#if defined(__x86_64__)
+void test_imulq2(long op0, long op1)
+{
+ long res, s1, s0, flags;
+ s0 = op0;
+ s1 = op1;
+ res = s0;
+ flags = 0;
+ asm volatile ("push %4\n\t"
+ "popf\n\t"
+ "imulq %2, %0\n\t"
+ "pushf\n\t"
+ "pop %1\n\t"
+ : "=q" (res), "=g" (flags)
+ : "q" (s1), "0" (res), "1" (flags));
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
+ "imulq", s0, s1, res, flags & CC_MASK);
+}
+#endif
+
+#define TEST_IMUL_IM(size, rsize, op0, op1)\
+{\
+ long res, flags, s1;\
+ flags = 0;\
+ res = 0;\
+ s1 = op1;\
+ asm volatile ("push %3\n\t"\
+ "popf\n\t"\
+ "imul" size " $" #op0 ", %" rsize "2, %" rsize "0\n\t" \
+ "pushf\n\t"\
+ "pop %1\n\t"\
+ : "=r" (res), "=g" (flags)\
+ : "r" (s1), "1" (flags), "0" (res));\
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",\
+ "imul" size " im", (long)op0, (long)op1, res, flags & CC_MASK);\
+}
+
+
+#undef CC_MASK
+#define CC_MASK (0)
+
+#define OP div
+#include "test-i386-muldiv.h"
+
+#define OP idiv
+#include "test-i386-muldiv.h"
+
+void test_mul(void)
+{
+ test_imulb(0x1234561d, 4);
+ test_imulb(3, -4);
+ test_imulb(0x80, 0x80);
+ test_imulb(0x10, 0x10);
+
+ test_imulw(0, 0x1234001d, 45);
+ test_imulw(0, 23, -45);
+ test_imulw(0, 0x8000, 0x8000);
+ test_imulw(0, 0x100, 0x100);
+
+ test_imull(0, 0x1234001d, 45);
+ test_imull(0, 23, -45);
+ test_imull(0, 0x80000000, 0x80000000);
+ test_imull(0, 0x10000, 0x10000);
+
+ test_mulb(0x1234561d, 4);
+ test_mulb(3, -4);
+ test_mulb(0x80, 0x80);
+ test_mulb(0x10, 0x10);
+
+ test_mulw(0, 0x1234001d, 45);
+ test_mulw(0, 23, -45);
+ test_mulw(0, 0x8000, 0x8000);
+ test_mulw(0, 0x100, 0x100);
+
+ test_mull(0, 0x1234001d, 45);
+ test_mull(0, 23, -45);
+ test_mull(0, 0x80000000, 0x80000000);
+ test_mull(0, 0x10000, 0x10000);
+
+ test_imulw2(0x1234001d, 45);
+ test_imulw2(23, -45);
+ test_imulw2(0x8000, 0x8000);
+ test_imulw2(0x100, 0x100);
+
+ test_imull2(0x1234001d, 45);
+ test_imull2(23, -45);
+ test_imull2(0x80000000, 0x80000000);
+ test_imull2(0x10000, 0x10000);
+
+ TEST_IMUL_IM("w", "w", 45, 0x1234);
+ TEST_IMUL_IM("w", "w", -45, 23);
+ TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
+ TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
+
+ TEST_IMUL_IM("l", "k", 45, 0x1234);
+ TEST_IMUL_IM("l", "k", -45, 23);
+ TEST_IMUL_IM("l", "k", 0x8000, 0x80000000);
+ TEST_IMUL_IM("l", "k", 0x7fff, 0x1000);
+
+ test_idivb(0x12341678, 0x127e);
+ test_idivb(0x43210123, -5);
+ test_idivb(0x12340004, -1);
+
+ test_idivw(0, 0x12345678, 12347);
+ test_idivw(0, -23223, -45);
+ test_idivw(0, 0x12348000, -1);
+ test_idivw(0x12343, 0x12345678, 0x81238567);
+
+ test_idivl(0, 0x12345678, 12347);
+ test_idivl(0, -233223, -45);
+ test_idivl(0, 0x80000000, -1);
+ test_idivl(0x12343, 0x12345678, 0x81234567);
+
+ test_divb(0x12341678, 0x127e);
+ test_divb(0x43210123, -5);
+ test_divb(0x12340004, -1);
+
+ test_divw(0, 0x12345678, 12347);
+ test_divw(0, -23223, -45);
+ test_divw(0, 0x12348000, -1);
+ test_divw(0x12343, 0x12345678, 0x81238567);
+
+ test_divl(0, 0x12345678, 12347);
+ test_divl(0, -233223, -45);
+ test_divl(0, 0x80000000, -1);
+ test_divl(0x12343, 0x12345678, 0x81234567);
+
+#if defined(__x86_64__)
+ test_imulq(0, 0x1234001d1234001d, 45);
+ test_imulq(0, 23, -45);
+ test_imulq(0, 0x8000000000000000, 0x8000000000000000);
+ test_imulq(0, 0x100000000, 0x100000000);
+
+ test_mulq(0, 0x1234001d1234001d, 45);
+ test_mulq(0, 23, -45);
+ test_mulq(0, 0x8000000000000000, 0x8000000000000000);
+ test_mulq(0, 0x100000000, 0x100000000);
+
+ test_imulq2(0x1234001d1234001d, 45);
+ test_imulq2(23, -45);
+ test_imulq2(0x8000000000000000, 0x8000000000000000);
+ test_imulq2(0x100000000, 0x100000000);
+
+ TEST_IMUL_IM("q", "", 45, 0x12341234);
+ TEST_IMUL_IM("q", "", -45, 23);
+ TEST_IMUL_IM("q", "", 0x8000, 0x8000000000000000);
+ TEST_IMUL_IM("q", "", 0x7fff, 0x10000000);
+
+ test_idivq(0, 0x12345678abcdef, 12347);
+ test_idivq(0, -233223, -45);
+ test_idivq(0, 0x8000000000000000, -1);
+ test_idivq(0x12343, 0x12345678, 0x81234567);
+
+ test_divq(0, 0x12345678abcdef, 12347);
+ test_divq(0, -233223, -45);
+ test_divq(0, 0x8000000000000000, -1);
+ test_divq(0x12343, 0x12345678, 0x81234567);
+#endif
+}
+
+#define TEST_BSX(op, size, op0)\
+{\
+ long res, val, resz;\
+ val = op0;\
+ asm("xor %1, %1\n"\
+ "mov $0x12345678, %0\n"\
+ #op " %" size "2, %" size "0 ; setz %b1" \
+ : "=&r" (res), "=&q" (resz)\
+ : "r" (val));\
+ printf("%-10s A=" FMTLX " R=" FMTLX " %ld\n", #op, val, res, resz);\
+}
+
+void test_bsx(void)
+{
+ TEST_BSX(bsrw, "w", 0);
+ TEST_BSX(bsrw, "w", 0x12340128);
+ TEST_BSX(bsfw, "w", 0);
+ TEST_BSX(bsfw, "w", 0x12340128);
+ TEST_BSX(bsrl, "k", 0);
+ TEST_BSX(bsrl, "k", 0x00340128);
+ TEST_BSX(bsfl, "k", 0);
+ TEST_BSX(bsfl, "k", 0x00340128);
+#if defined(__x86_64__)
+ TEST_BSX(bsrq, "", 0);
+ TEST_BSX(bsrq, "", 0x003401281234);
+ TEST_BSX(bsfq, "", 0);
+ TEST_BSX(bsfq, "", 0x003401281234);
+#endif
+}
+
+/**********************************************/
+
+union float64u {
+ double d;
+ uint64_t l;
+};
+
+union float64u q_nan = { .l = 0xFFF8000000000000LL };
+union float64u s_nan = { .l = 0xFFF0000000000000LL };
+
+void test_fops(double a, double b)
+{
+ printf("a=%f b=%f a+b=%f\n", a, b, a + b);
+ printf("a=%f b=%f a-b=%f\n", a, b, a - b);
+ printf("a=%f b=%f a*b=%f\n", a, b, a * b);
+ printf("a=%f b=%f a/b=%f\n", a, b, a / b);
+ printf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
+ printf("a=%f sqrt(a)=%f\n", a, sqrt(a));
+ printf("a=%f sin(a)=%f\n", a, sin(a));
+ printf("a=%f cos(a)=%f\n", a, cos(a));
+ printf("a=%f tan(a)=%f\n", a, tan(a));
+ printf("a=%f log(a)=%f\n", a, log(a));
+ printf("a=%f exp(a)=%f\n", a, exp(a));
+ printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
+ /* just to test some op combining */
+ printf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
+ printf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
+ printf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
+
+}
+
+void fpu_clear_exceptions(void)
+{
+ struct __attribute__((packed)) {
+ uint16_t fpuc;
+ uint16_t dummy1;
+ uint16_t fpus;
+ uint16_t dummy2;
+ uint16_t fptag;
+ uint16_t dummy3;
+ uint32_t ignored[4];
+ long double fpregs[8];
+ } float_env32;
+
+ asm volatile ("fnstenv %0\n" : : "m" (float_env32));
+ float_env32.fpus &= ~0x7f;
+ asm volatile ("fldenv %0\n" : : "m" (float_env32));
+}
+
+/* XXX: display exception bits when supported */
+#define FPUS_EMASK 0x0000
+//#define FPUS_EMASK 0x007f
+
+void test_fcmp(double a, double b)
+{
+ long eflags, fpus;
+
+ fpu_clear_exceptions();
+ asm("fcom %2\n"
+ "fstsw %%ax\n"
+ : "=a" (fpus)
+ : "t" (a), "u" (b));
+ printf("fcom(%f %f)=%04lx \n",
+ a, b, fpus & (0x4500 | FPUS_EMASK));
+ fpu_clear_exceptions();
+ asm("fucom %2\n"
+ "fstsw %%ax\n"
+ : "=a" (fpus)
+ : "t" (a), "u" (b));
+ printf("fucom(%f %f)=%04lx\n",
+ a, b, fpus & (0x4500 | FPUS_EMASK));
+ if (TEST_FCOMI) {
+ /* test f(u)comi instruction */
+ fpu_clear_exceptions();
+ asm("fcomi %3, %2\n"
+ "fstsw %%ax\n"
+ "pushf\n"
+ "pop %0\n"
+ : "=r" (eflags), "=a" (fpus)
+ : "t" (a), "u" (b));
+ printf("fcomi(%f %f)=%04lx %02lx\n",
+ a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
+ fpu_clear_exceptions();
+ asm("fucomi %3, %2\n"
+ "fstsw %%ax\n"
+ "pushf\n"
+ "pop %0\n"
+ : "=r" (eflags), "=a" (fpus)
+ : "t" (a), "u" (b));
+ printf("fucomi(%f %f)=%04lx %02lx\n",
+ a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
+ }
+ fpu_clear_exceptions();
+ asm volatile("fxam\n"
+ "fstsw %%ax\n"
+ : "=a" (fpus)
+ : "t" (a));
+ printf("fxam(%f)=%04lx\n", a, fpus & 0x4700);
+ fpu_clear_exceptions();
+}
+
+void test_fcvt(double a)
+{
+ float fa;
+ long double la;
+ int16_t fpuc;
+ int i;
+ int64_t lla;
+ int ia;
+ int16_t wa;
+ double ra;
+
+ fa = a;
+ la = a;
+ printf("(float)%f = %f\n", a, fa);
+ printf("(long double)%f = %Lf\n", a, la);
+ printf("a=" FMT64X "\n", *(uint64_t *)&a);
+ printf("la=" FMT64X " %04x\n", *(uint64_t *)&la,
+ *(unsigned short *)((char *)(&la) + 8));
+
+ /* test all roundings */
+ asm volatile ("fstcw %0" : "=m" (fpuc));
+ for(i=0;i<4;i++) {
+ uint16_t val16;
+ val16 = (fpuc & ~0x0c00) | (i << 10);
+ asm volatile ("fldcw %0" : : "m" (val16));
+ asm volatile ("fist %0" : "=m" (wa) : "t" (a));
+ asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
+ asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
+ asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
+ asm volatile ("fldcw %0" : : "m" (fpuc));
+ printf("(short)a = %d\n", wa);
+ printf("(int)a = %d\n", ia);
+ printf("(int64_t)a = " FMT64X "\n", lla);
+ printf("rint(a) = %f\n", ra);
+ }
+}
+
+#define TEST(N) \
+ asm("fld" #N : "=t" (a)); \
+ printf("fld" #N "= %f\n", a);
+
+void test_fconst(void)
+{
+ double a;
+ TEST(1);
+ TEST(l2t);
+ TEST(l2e);
+ TEST(pi);
+ TEST(lg2);
+ TEST(ln2);
+ TEST(z);
+}
+
+void test_fbcd(double a)
+{
+ unsigned short bcd[5];
+ double b;
+
+ asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
+ asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
+ printf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
+ a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
+}
+
+#define TEST_ENV(env, save, restore)\
+{\
+ memset((env), 0xaa, sizeof(*(env)));\
+ for(i=0;i<5;i++)\
+ asm volatile ("fldl %0" : : "m" (dtab[i]));\
+ asm volatile (save " %0\n" : : "m" (*(env)));\
+ asm volatile (restore " %0\n": : "m" (*(env)));\
+ for(i=0;i<5;i++)\
+ asm volatile ("fstpl %0" : "=m" (rtab[i]));\
+ for(i=0;i<5;i++)\
+ printf("res[%d]=%f\n", i, rtab[i]);\
+ printf("fpuc=%04x fpus=%04x fptag=%04x\n",\
+ (env)->fpuc,\
+ (env)->fpus & 0xff00,\
+ (env)->fptag);\
+}
+
+void test_fenv(void)
+{
+ struct __attribute__((packed)) {
+ uint16_t fpuc;
+ uint16_t dummy1;
+ uint16_t fpus;
+ uint16_t dummy2;
+ uint16_t fptag;
+ uint16_t dummy3;
+ uint32_t ignored[4];
+ long double fpregs[8];
+ } float_env32;
+ struct __attribute__((packed)) {
+ uint16_t fpuc;
+ uint16_t fpus;
+ uint16_t fptag;
+ uint16_t ignored[4];
+ long double fpregs[8];
+ } float_env16;
+ double dtab[8];
+ double rtab[8];
+ int i;
+
+ for(i=0;i<8;i++)
+ dtab[i] = i + 1;
+
+ TEST_ENV(&float_env16, "data16 fnstenv", "data16 fldenv");
+ TEST_ENV(&float_env16, "data16 fnsave", "data16 frstor");
+ TEST_ENV(&float_env32, "fnstenv", "fldenv");
+ TEST_ENV(&float_env32, "fnsave", "frstor");
+
+ /* test for ffree */
+ for(i=0;i<5;i++)
+ asm volatile ("fldl %0" : : "m" (dtab[i]));
+ asm volatile("ffree %st(2)");
+ asm volatile ("fnstenv %0\n" : : "m" (float_env32));
+ asm volatile ("fninit");
+ printf("fptag=%04x\n", float_env32.fptag);
+}
+
+
+#define TEST_FCMOV(a, b, eflags, CC)\
+{\
+ double res;\
+ asm("push %3\n"\
+ "popf\n"\
+ "fcmov" CC " %2, %0\n"\
+ : "=t" (res)\
+ : "0" (a), "u" (b), "g" (eflags));\
+ printf("fcmov%s eflags=0x%04lx-> %f\n", \
+ CC, (long)eflags, res);\
+}
+
+void test_fcmov(void)
+{
+ double a, b;
+ long eflags, i;
+
+ a = 1.0;
+ b = 2.0;
+ for(i = 0; i < 4; i++) {
+ eflags = 0;
+ if (i & 1)
+ eflags |= CC_C;
+ if (i & 2)
+ eflags |= CC_Z;
+ TEST_FCMOV(a, b, eflags, "b");
+ TEST_FCMOV(a, b, eflags, "e");
+ TEST_FCMOV(a, b, eflags, "be");
+ TEST_FCMOV(a, b, eflags, "nb");
+ TEST_FCMOV(a, b, eflags, "ne");
+ TEST_FCMOV(a, b, eflags, "nbe");
+ }
+ TEST_FCMOV(a, b, 0, "u");
+ TEST_FCMOV(a, b, CC_P, "u");
+ TEST_FCMOV(a, b, 0, "nu");
+ TEST_FCMOV(a, b, CC_P, "nu");
+}
+
+void test_floats(void)
+{
+ test_fops(2, 3);
+ test_fops(1.4, -5);
+ test_fcmp(2, -1);
+ test_fcmp(2, 2);
+ test_fcmp(2, 3);
+ test_fcmp(2, q_nan.d);
+ test_fcmp(q_nan.d, -1);
+ test_fcmp(-1.0/0.0, -1);
+ test_fcmp(1.0/0.0, -1);
+ test_fcvt(0.5);
+ test_fcvt(-0.5);
+ test_fcvt(1.0/7.0);
+ test_fcvt(-1.0/9.0);
+ test_fcvt(32768);
+ test_fcvt(-1e20);
+ test_fcvt(-1.0/0.0);
+ test_fcvt(1.0/0.0);
+ test_fcvt(q_nan.d);
+ test_fconst();
+ test_fbcd(1234567890123456.0);
+ test_fbcd(-123451234567890.0);
+ test_fenv();
+ if (TEST_CMOV) {
+ test_fcmov();
+ }
+}
+
+/**********************************************/
+#if !defined(__x86_64__)
+
+#define TEST_BCD(op, op0, cc_in, cc_mask)\
+{\
+ int res, flags;\
+ res = op0;\
+ flags = cc_in;\
+ asm ("push %3\n\t"\
+ "popf\n\t"\
+ #op "\n\t"\
+ "pushf\n\t"\
+ "pop %1\n\t"\
+ : "=a" (res), "=g" (flags)\
+ : "0" (res), "1" (flags));\
+ printf("%-10s A=%08x R=%08x CCIN=%04x CC=%04x\n",\
+ #op, op0, res, cc_in, flags & cc_mask);\
+}
+
+void test_bcd(void)
+{
+ TEST_BCD(daa, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(daa, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+
+ TEST_BCD(das, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+ TEST_BCD(das, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
+
+ TEST_BCD(aaa, 0x12340205, CC_A, (CC_C | CC_A));
+ TEST_BCD(aaa, 0x12340306, CC_A, (CC_C | CC_A));
+ TEST_BCD(aaa, 0x1234040a, CC_A, (CC_C | CC_A));
+ TEST_BCD(aaa, 0x123405fa, CC_A, (CC_C | CC_A));
+ TEST_BCD(aaa, 0x12340205, 0, (CC_C | CC_A));
+ TEST_BCD(aaa, 0x12340306, 0, (CC_C | CC_A));
+ TEST_BCD(aaa, 0x1234040a, 0, (CC_C | CC_A));
+ TEST_BCD(aaa, 0x123405fa, 0, (CC_C | CC_A));
+
+ TEST_BCD(aas, 0x12340205, CC_A, (CC_C | CC_A));
+ TEST_BCD(aas, 0x12340306, CC_A, (CC_C | CC_A));
+ TEST_BCD(aas, 0x1234040a, CC_A, (CC_C | CC_A));
+ TEST_BCD(aas, 0x123405fa, CC_A, (CC_C | CC_A));
+ TEST_BCD(aas, 0x12340205, 0, (CC_C | CC_A));
+ TEST_BCD(aas, 0x12340306, 0, (CC_C | CC_A));
+ TEST_BCD(aas, 0x1234040a, 0, (CC_C | CC_A));
+ TEST_BCD(aas, 0x123405fa, 0, (CC_C | CC_A));
+
+ TEST_BCD(aam, 0x12340547, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
+ TEST_BCD(aad, 0x12340407, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
+}
+#endif
+
+#define TEST_XCHG(op, size, opconst)\
+{\
+ long op0, op1;\
+ op0 = i2l(0x12345678);\
+ op1 = i2l(0xfbca7654);\
+ asm(#op " %" size "0, %" size "1" \
+ : "=q" (op0), opconst (op1) \
+ : "0" (op0));\
+ printf("%-10s A=" FMTLX " B=" FMTLX "\n",\
+ #op, op0, op1);\
+}
+
+#define TEST_CMPXCHG(op, size, opconst, eax)\
+{\
+ long op0, op1, op2;\
+ op0 = i2l(0x12345678);\
+ op1 = i2l(0xfbca7654);\
+ op2 = i2l(eax);\
+ asm(#op " %" size "0, %" size "1" \
+ : "=q" (op0), opconst (op1) \
+ : "0" (op0), "a" (op2));\
+ printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX "\n",\
+ #op, op2, op0, op1);\
+}
+
+void test_xchg(void)
+{
+#if defined(__x86_64__)
+ TEST_XCHG(xchgq, "", "+q");
+#endif
+ TEST_XCHG(xchgl, "k", "+q");
+ TEST_XCHG(xchgw, "w", "+q");
+ TEST_XCHG(xchgb, "b", "+q");
+
+#if defined(__x86_64__)
+ TEST_XCHG(xchgq, "", "=m");
+#endif
+ TEST_XCHG(xchgl, "k", "+m");
+ TEST_XCHG(xchgw, "w", "+m");
+ TEST_XCHG(xchgb, "b", "+m");
+
+#if defined(__x86_64__)
+ TEST_XCHG(xaddq, "", "+q");
+#endif
+ TEST_XCHG(xaddl, "k", "+q");
+ TEST_XCHG(xaddw, "w", "+q");
+ TEST_XCHG(xaddb, "b", "+q");
+
+ {
+ int res;
+ res = 0x12345678;
+ asm("xaddl %1, %0" : "=r" (res) : "0" (res));
+ printf("xaddl same res=%08x\n", res);
+ }
+
+#if defined(__x86_64__)
+ TEST_XCHG(xaddq, "", "+m");
+#endif
+ TEST_XCHG(xaddl, "k", "+m");
+ TEST_XCHG(xaddw, "w", "+m");
+ TEST_XCHG(xaddb, "b", "+m");
+
+#if defined(__x86_64__)
+ TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfbca7654);
+#endif
+ TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfbca7654);
+ TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfbca7654);
+ TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfbca7654);
+
+#if defined(__x86_64__)
+ TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfffefdfc);
+#endif
+ TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfffefdfc);
+ TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfffefdfc);
+ TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfffefdfc);
+
+#if defined(__x86_64__)
+ TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfbca7654);
+#endif
+ TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfbca7654);
+ TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfbca7654);
+ TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfbca7654);
+
+#if defined(__x86_64__)
+ TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfffefdfc);
+#endif
+ TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfffefdfc);
+ TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfffefdfc);
+ TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfffefdfc);
+
+ {
+ uint64_t op0, op1, op2;
+ long eax, edx;
+ long i, eflags;
+
+ for(i = 0; i < 2; i++) {
+ op0 = 0x123456789abcdLL;
+ eax = i2l(op0 & 0xffffffff);
+ edx = i2l(op0 >> 32);
+ if (i == 0)
+ op1 = 0xfbca765423456LL;
+ else
+ op1 = op0;
+ op2 = 0x6532432432434LL;
+ asm("cmpxchg8b %2\n"
+ "pushf\n"
+ "pop %3\n"
+ : "=a" (eax), "=d" (edx), "=m" (op1), "=g" (eflags)
+ : "0" (eax), "1" (edx), "m" (op1), "b" ((int)op2), "c" ((int)(op2 >> 32)));
+ printf("cmpxchg8b: eax=" FMTLX " edx=" FMTLX " op1=" FMT64X " CC=%02lx\n",
+ eax, edx, op1, eflags & CC_Z);
+ }
+ }
+}
+
+#ifdef TEST_SEGS
+/**********************************************/
+/* segmentation tests */
+
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <asm/ldt.h>
+#include <linux/version.h>
+
+static inline int modify_ldt(int func, void * ptr, unsigned long bytecount)
+{
+ return syscall(__NR_modify_ldt, func, ptr, bytecount);
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66)
+#define modify_ldt_ldt_s user_desc
+#endif
+
+#define MK_SEL(n) (((n) << 3) | 7)
+
+uint8_t seg_data1[4096];
+uint8_t seg_data2[4096];
+
+#define TEST_LR(op, size, seg, mask)\
+{\
+ int res, res2;\
+ uint16_t mseg = seg;\
+ res = 0x12345678;\
+ asm (op " %" size "2, %" size "0\n" \
+ "movl $0, %1\n"\
+ "jnz 1f\n"\
+ "movl $1, %1\n"\
+ "1:\n"\
+ : "=r" (res), "=r" (res2) : "m" (mseg), "0" (res));\
+ printf(op ": Z=%d %08x\n", res2, res & ~(mask));\
+}
+
+#define TEST_ARPL(op, size, op1, op2)\
+{\
+ long a, b, c; \
+ a = (op1); \
+ b = (op2); \
+ asm volatile(op " %" size "3, %" size "0\n"\
+ "movl $0,%1\n"\
+ "jnz 1f\n"\
+ "movl $1,%1\n"\
+ "1:\n"\
+ : "=r" (a), "=r" (c) : "0" (a), "r" (b)); \
+ printf(op size " A=" FMTLX " B=" FMTLX " R=" FMTLX " z=%ld\n",\
+ (long)(op1), (long)(op2), a, c);\
+}
+
+/* NOTE: we use Linux modify_ldt syscall */
+void test_segs(void)
+{
+ struct modify_ldt_ldt_s ldt;
+ long long ldt_table[3];
+ int res, res2;
+ char tmp;
+ struct {
+ uint32_t offset;
+ uint16_t seg;
+ } __attribute__((packed)) segoff;
+
+ ldt.entry_number = 1;
+ ldt.base_addr = (unsigned long)&seg_data1;
+ ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
+ ldt.seg_32bit = 1;
+ ldt.contents = MODIFY_LDT_CONTENTS_DATA;
+ ldt.read_exec_only = 0;
+ ldt.limit_in_pages = 1;
+ ldt.seg_not_present = 0;
+ ldt.useable = 1;
+ modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+
+ ldt.entry_number = 2;
+ ldt.base_addr = (unsigned long)&seg_data2;
+ ldt.limit = (sizeof(seg_data2) + 0xfff) >> 12;
+ ldt.seg_32bit = 1;
+ ldt.contents = MODIFY_LDT_CONTENTS_DATA;
+ ldt.read_exec_only = 0;
+ ldt.limit_in_pages = 1;
+ ldt.seg_not_present = 0;
+ ldt.useable = 1;
+ modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+
+ modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */
+#if 0
+ {
+ int i;
+ for(i=0;i<3;i++)
+ printf("%d: %016Lx\n", i, ldt_table[i]);
+ }
+#endif
+ /* do some tests with fs or gs */
+ asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
+
+ seg_data1[1] = 0xaa;
+ seg_data2[1] = 0x55;
+
+ asm volatile ("fs movzbl 0x1, %0" : "=r" (res));
+ printf("FS[1] = %02x\n", res);
+
+ asm volatile ("pushl %%gs\n"
+ "movl %1, %%gs\n"
+ "gs movzbl 0x1, %0\n"
+ "popl %%gs\n"
+ : "=r" (res)
+ : "r" (MK_SEL(2)));
+ printf("GS[1] = %02x\n", res);
+
+ /* tests with ds/ss (implicit segment case) */
+ tmp = 0xa5;
+ asm volatile ("pushl %%ebp\n\t"
+ "pushl %%ds\n\t"
+ "movl %2, %%ds\n\t"
+ "movl %3, %%ebp\n\t"
+ "movzbl 0x1, %0\n\t"
+ "movzbl (%%ebp), %1\n\t"
+ "popl %%ds\n\t"
+ "popl %%ebp\n\t"
+ : "=r" (res), "=r" (res2)
+ : "r" (MK_SEL(1)), "r" (&tmp));
+ printf("DS[1] = %02x\n", res);
+ printf("SS[tmp] = %02x\n", res2);
+
+ segoff.seg = MK_SEL(2);
+ segoff.offset = 0xabcdef12;
+ asm volatile("lfs %2, %0\n\t"
+ "movl %%fs, %1\n\t"
+ : "=r" (res), "=g" (res2)
+ : "m" (segoff));
+ printf("FS:reg = %04x:%08x\n", res2, res);
+
+ TEST_LR("larw", "w", MK_SEL(2), 0x0100);
+ TEST_LR("larl", "", MK_SEL(2), 0x0100);
+ TEST_LR("lslw", "w", MK_SEL(2), 0);
+ TEST_LR("lsll", "", MK_SEL(2), 0);
+
+ TEST_LR("larw", "w", 0xfff8, 0);
+ TEST_LR("larl", "", 0xfff8, 0);
+ TEST_LR("lslw", "w", 0xfff8, 0);
+ TEST_LR("lsll", "", 0xfff8, 0);
+
+ TEST_ARPL("arpl", "w", 0x12345678 | 3, 0x762123c | 1);
+ TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 3);
+ TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 1);
+}
+
+/* 16 bit code test */
+extern char code16_start, code16_end;
+extern char code16_func1;
+extern char code16_func2;
+extern char code16_func3;
+
+void test_code16(void)
+{
+ struct modify_ldt_ldt_s ldt;
+ int res, res2;
+
+ /* build a code segment */
+ ldt.entry_number = 1;
+ ldt.base_addr = (unsigned long)&code16_start;
+ ldt.limit = &code16_end - &code16_start;
+ ldt.seg_32bit = 0;
+ ldt.contents = MODIFY_LDT_CONTENTS_CODE;
+ ldt.read_exec_only = 0;
+ ldt.limit_in_pages = 0;
+ ldt.seg_not_present = 0;
+ ldt.useable = 1;
+ modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+
+ /* call the first function */
+ asm volatile ("lcall %1, %2"
+ : "=a" (res)
+ : "i" (MK_SEL(1)), "i" (&code16_func1): "memory", "cc");
+ printf("func1() = 0x%08x\n", res);
+ asm volatile ("lcall %2, %3"
+ : "=a" (res), "=c" (res2)
+ : "i" (MK_SEL(1)), "i" (&code16_func2): "memory", "cc");
+ printf("func2() = 0x%08x spdec=%d\n", res, res2);
+ asm volatile ("lcall %1, %2"
+ : "=a" (res)
+ : "i" (MK_SEL(1)), "i" (&code16_func3): "memory", "cc");
+ printf("func3() = 0x%08x\n", res);
+}
+#endif
+
+#if defined(__x86_64__)
+asm(".globl func_lret\n"
+ "func_lret:\n"
+ "movl $0x87654641, %eax\n"
+ "lretq\n");
+#else
+asm(".globl func_lret\n"
+ "func_lret:\n"
+ "movl $0x87654321, %eax\n"
+ "lret\n"
+
+ ".globl func_iret\n"
+ "func_iret:\n"
+ "movl $0xabcd4321, %eax\n"
+ "iret\n");
+#endif
+
+extern char func_lret;
+extern char func_iret;
+
+void test_misc(void)
+{
+ char table[256];
+ long res, i;
+
+ for(i=0;i<256;i++) table[i] = 256 - i;
+ res = 0x12345678;
+ asm ("xlat" : "=a" (res) : "b" (table), "0" (res));
+ printf("xlat: EAX=" FMTLX "\n", res);
+
+#if defined(__x86_64__)
+#if 0
+ {
+ /* XXX: see if Intel Core2 and AMD64 behavior really
+ differ. Here we implemented the Intel way which is not
+ compatible yet with QEMU. */
+ static struct __attribute__((packed)) {
+ uint64_t offset;
+ uint16_t seg;
+ } desc;
+ long cs_sel;
+
+ asm volatile ("mov %%cs, %0" : "=r" (cs_sel));
+
+ asm volatile ("push %1\n"
+ "call func_lret\n"
+ : "=a" (res)
+ : "r" (cs_sel) : "memory", "cc");
+ printf("func_lret=" FMTLX "\n", res);
+
+ desc.offset = (long)&func_lret;
+ desc.seg = cs_sel;
+
+ asm volatile ("xor %%rax, %%rax\n"
+ "rex64 lcall *(%%rcx)\n"
+ : "=a" (res)
+ : "c" (&desc)
+ : "memory", "cc");
+ printf("func_lret2=" FMTLX "\n", res);
+
+ asm volatile ("push %2\n"
+ "mov $ 1f, %%rax\n"
+ "push %%rax\n"
+ "rex64 ljmp *(%%rcx)\n"
+ "1:\n"
+ : "=a" (res)
+ : "c" (&desc), "b" (cs_sel)
+ : "memory", "cc");
+ printf("func_lret3=" FMTLX "\n", res);
+ }
+#endif
+#else
+ asm volatile ("push %%cs ; call %1"
+ : "=a" (res)
+ : "m" (func_lret): "memory", "cc");
+ printf("func_lret=" FMTLX "\n", res);
+
+ asm volatile ("pushf ; push %%cs ; call %1"
+ : "=a" (res)
+ : "m" (func_iret): "memory", "cc");
+ printf("func_iret=" FMTLX "\n", res);
+#endif
+
+#if defined(__x86_64__)
+ /* specific popl test */
+ asm volatile ("push $12345432 ; push $0x9abcdef ; pop (%%rsp) ; pop %0"
+ : "=g" (res));
+ printf("popl esp=" FMTLX "\n", res);
+#else
+ /* specific popl test */
+ asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popl (%%esp) ; popl %0"
+ : "=g" (res));
+ printf("popl esp=" FMTLX "\n", res);
+
+ /* specific popw test */
+ asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popw (%%esp) ; addl $2, %%esp ; popl %0"
+ : "=g" (res));
+ printf("popw esp=" FMTLX "\n", res);
+#endif
+}
+
+uint8_t str_buffer[4096];
+
+#define TEST_STRING1(OP, size, DF, REP)\
+{\
+ long esi, edi, eax, ecx, eflags;\
+\
+ esi = (long)(str_buffer + sizeof(str_buffer) / 2);\
+ edi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
+ eax = i2l(0x12345678);\
+ ecx = 17;\
+\
+ asm volatile ("push $0\n\t"\
+ "popf\n\t"\
+ DF "\n\t"\
+ REP #OP size "\n\t"\
+ "cld\n\t"\
+ "pushf\n\t"\
+ "pop %4\n\t"\
+ : "=S" (esi), "=D" (edi), "=a" (eax), "=c" (ecx), "=g" (eflags)\
+ : "0" (esi), "1" (edi), "2" (eax), "3" (ecx));\
+ printf("%-10s ESI=" FMTLX " EDI=" FMTLX " EAX=" FMTLX " ECX=" FMTLX " EFL=%04x\n",\
+ REP #OP size, esi, edi, eax, ecx,\
+ (int)(eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)));\
+}
+
+#define TEST_STRING(OP, REP)\
+ TEST_STRING1(OP, "b", "", REP);\
+ TEST_STRING1(OP, "w", "", REP);\
+ TEST_STRING1(OP, "l", "", REP);\
+ X86_64_ONLY(TEST_STRING1(OP, "q", "", REP));\
+ TEST_STRING1(OP, "b", "std", REP);\
+ TEST_STRING1(OP, "w", "std", REP);\
+ TEST_STRING1(OP, "l", "std", REP);\
+ X86_64_ONLY(TEST_STRING1(OP, "q", "std", REP))
+
+void test_string(void)
+{
+ int i;
+ for(i = 0;i < sizeof(str_buffer); i++)
+ str_buffer[i] = i + 0x56;
+ TEST_STRING(stos, "");
+ TEST_STRING(stos, "rep ");
+ TEST_STRING(lods, ""); /* to verify stos */
+ TEST_STRING(lods, "rep ");
+ TEST_STRING(movs, "");
+ TEST_STRING(movs, "rep ");
+ TEST_STRING(lods, ""); /* to verify stos */
+
+ /* XXX: better tests */
+ TEST_STRING(scas, "");
+ TEST_STRING(scas, "repz ");
+ TEST_STRING(scas, "repnz ");
+ TEST_STRING(cmps, "");
+ TEST_STRING(cmps, "repz ");
+ TEST_STRING(cmps, "repnz ");
+}
+
+#ifdef TEST_VM86
+/* VM86 test */
+
+static inline void set_bit(uint8_t *a, unsigned int bit)
+{
+ a[bit / 8] |= (1 << (bit % 8));
+}
+
+static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
+{
+ return (uint8_t *)((seg << 4) + (reg & 0xffff));
+}
+
+static inline void pushw(struct vm86_regs *r, int val)
+{
+ r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff);
+ *(uint16_t *)seg_to_linear(r->ss, r->esp) = val;
+}
+
+static inline int vm86(int func, struct vm86plus_struct *v86)
+{
+ return syscall(__NR_vm86, func, v86);
+}
+
+extern char vm86_code_start;
+extern char vm86_code_end;
+
+#define VM86_CODE_CS 0x100
+#define VM86_CODE_IP 0x100
+
+void test_vm86(void)
+{
+ struct vm86plus_struct ctx;
+ struct vm86_regs *r;
+ uint8_t *vm86_mem;
+ int seg, ret;
+
+ vm86_mem = mmap((void *)0x00000000, 0x110000,
+ PROT_WRITE | PROT_READ | PROT_EXEC,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (vm86_mem == MAP_FAILED) {
+ printf("ERROR: could not map vm86 memory");
+ return;
+ }
+ memset(&ctx, 0, sizeof(ctx));
+
+ /* init basic registers */
+ r = &ctx.regs;
+ r->eip = VM86_CODE_IP;
+ r->esp = 0xfffe;
+ seg = VM86_CODE_CS;
+ r->cs = seg;
+ r->ss = seg;
+ r->ds = seg;
+ r->es = seg;
+ r->fs = seg;
+ r->gs = seg;
+ r->eflags = VIF_MASK;
+
+ /* move code to proper address. We use the same layout as a .com
+ dos program. */
+ memcpy(vm86_mem + (VM86_CODE_CS << 4) + VM86_CODE_IP,
+ &vm86_code_start, &vm86_code_end - &vm86_code_start);
+
+ /* mark int 0x21 as being emulated */
+ set_bit((uint8_t *)&ctx.int_revectored, 0x21);
+
+ for(;;) {
+ ret = vm86(VM86_ENTER, &ctx);
+ switch(VM86_TYPE(ret)) {
+ case VM86_INTx:
+ {
+ int int_num, ah, v;
+
+ int_num = VM86_ARG(ret);
+ if (int_num != 0x21)
+ goto unknown_int;
+ ah = (r->eax >> 8) & 0xff;
+ switch(ah) {
+ case 0x00: /* exit */
+ goto the_end;
+ case 0x02: /* write char */
+ {
+ uint8_t c = r->edx;
+ putchar(c);
+ }
+ break;
+ case 0x09: /* write string */
+ {
+ uint8_t c, *ptr;
+ ptr = seg_to_linear(r->ds, r->edx);
+ for(;;) {
+ c = *ptr++;
+ if (c == '$')
+ break;
+ putchar(c);
+ }
+ r->eax = (r->eax & ~0xff) | '$';
+ }
+ break;
+ case 0xff: /* extension: write eflags number in edx */
+ v = (int)r->edx;
+#ifndef LINUX_VM86_IOPL_FIX
+ v &= ~0x3000;
+#endif
+ printf("%08x\n", v);
+ break;
+ default:
+ unknown_int:
+ printf("unsupported int 0x%02x\n", int_num);
+ goto the_end;
+ }
+ }
+ break;
+ case VM86_SIGNAL:
+ /* a signal came, we just ignore that */
+ break;
+ case VM86_STI:
+ break;
+ default:
+ printf("ERROR: unhandled vm86 return code (0x%x)\n", ret);
+ goto the_end;
+ }
+ }
+ the_end:
+ printf("VM86 end\n");
+ munmap(vm86_mem, 0x110000);
+}
+#endif
+
+/* exception tests */
+#if defined(__i386__) && !defined(REG_EAX)
+#define REG_EAX EAX
+#define REG_EBX EBX
+#define REG_ECX ECX
+#define REG_EDX EDX
+#define REG_ESI ESI
+#define REG_EDI EDI
+#define REG_EBP EBP
+#define REG_ESP ESP
+#define REG_EIP EIP
+#define REG_EFL EFL
+#define REG_TRAPNO TRAPNO
+#define REG_ERR ERR
+#endif
+
+#if defined(__x86_64__)
+#define REG_EIP REG_RIP
+#endif
+
+jmp_buf jmp_env;
+int v1;
+int tab[2];
+
+void sig_handler(int sig, siginfo_t *info, void *puc)
+{
+ struct ucontext *uc = puc;
+
+ printf("si_signo=%d si_errno=%d si_code=%d",
+ info->si_signo, info->si_errno, info->si_code);
+ printf(" si_addr=0x%08lx",
+ (unsigned long)info->si_addr);
+ printf("\n");
+
+ printf("trapno=" FMTLX " err=" FMTLX,
+ (long)uc->uc_mcontext.gregs[REG_TRAPNO],
+ (long)uc->uc_mcontext.gregs[REG_ERR]);
+ printf(" EIP=" FMTLX, (long)uc->uc_mcontext.gregs[REG_EIP]);
+ printf("\n");
+ longjmp(jmp_env, 1);
+}
+
+void test_exceptions(void)
+{
+ struct sigaction act;
+ volatile int val;
+
+ act.sa_sigaction = sig_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO | SA_NODEFER;
+ sigaction(SIGFPE, &act, NULL);
+ sigaction(SIGILL, &act, NULL);
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+ sigaction(SIGTRAP, &act, NULL);
+
+ /* test division by zero reporting */
+ printf("DIVZ exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ /* now divide by zero */
+ v1 = 0;
+ v1 = 2 / v1;
+ }
+
+#if !defined(__x86_64__)
+ printf("BOUND exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ /* bound exception */
+ tab[0] = 1;
+ tab[1] = 10;
+ asm volatile ("bound %0, %1" : : "r" (11), "m" (tab[0]));
+ }
+#endif
+
+#ifdef TEST_SEGS
+ printf("segment exceptions:\n");
+ if (setjmp(jmp_env) == 0) {
+ /* load an invalid segment */
+ asm volatile ("movl %0, %%fs" : : "r" ((0x1234 << 3) | 1));
+ }
+ if (setjmp(jmp_env) == 0) {
+ /* null data segment is valid */
+ asm volatile ("movl %0, %%fs" : : "r" (3));
+ /* null stack segment */
+ asm volatile ("movl %0, %%ss" : : "r" (3));
+ }
+
+ {
+ struct modify_ldt_ldt_s ldt;
+ ldt.entry_number = 1;
+ ldt.base_addr = (unsigned long)&seg_data1;
+ ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
+ ldt.seg_32bit = 1;
+ ldt.contents = MODIFY_LDT_CONTENTS_DATA;
+ ldt.read_exec_only = 0;
+ ldt.limit_in_pages = 1;
+ ldt.seg_not_present = 1;
+ ldt.useable = 1;
+ modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
+
+ if (setjmp(jmp_env) == 0) {
+ /* segment not present */
+ asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
+ }
+ }
+#endif
+
+ /* test SEGV reporting */
+ printf("PF exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ val = 1;
+ /* we add a nop to test a weird PC retrieval case */
+ asm volatile ("nop");
+ /* now store in an invalid address */
+ *(char *)0x1234 = 1;
+ }
+
+ /* test SEGV reporting */
+ printf("PF exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ val = 1;
+ /* read from an invalid address */
+ v1 = *(char *)0x1234;
+ }
+
+ /* test illegal instruction reporting */
+ printf("UD2 exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ /* now execute an invalid instruction */
+ asm volatile("ud2");
+ }
+ printf("lock nop exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ /* now execute an invalid instruction */
+ asm volatile("lock nop");
+ }
+
+ printf("INT exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("int $0xfd");
+ }
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("int $0x01");
+ }
+ if (setjmp(jmp_env) == 0) {
+ asm volatile (".byte 0xcd, 0x03");
+ }
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("int $0x04");
+ }
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("int $0x05");
+ }
+
+ printf("INT3 exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("int3");
+ }
+
+ printf("CLI exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("cli");
+ }
+
+ printf("STI exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("cli");
+ }
+
+#if !defined(__x86_64__)
+ printf("INTO exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ /* overflow exception */
+ asm volatile ("addl $1, %0 ; into" : : "r" (0x7fffffff));
+ }
+#endif
+
+ printf("OUTB exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("outb %%al, %%dx" : : "d" (0x4321), "a" (0));
+ }
+
+ printf("INB exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("inb %%dx, %%al" : "=a" (val) : "d" (0x4321));
+ }
+
+ printf("REP OUTSB exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("rep outsb" : : "d" (0x4321), "S" (tab), "c" (1));
+ }
+
+ printf("REP INSB exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("rep insb" : : "d" (0x4321), "D" (tab), "c" (1));
+ }
+
+ printf("HLT exception:\n");
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("hlt");
+ }
+
+ printf("single step exception:\n");
+ val = 0;
+ if (setjmp(jmp_env) == 0) {
+ asm volatile ("pushf\n"
+ "orl $0x00100, (%%esp)\n"
+ "popf\n"
+ "movl $0xabcd, %0\n"
+ "movl $0x0, %0\n" : "=m" (val) : : "cc", "memory");
+ }
+ printf("val=0x%x\n", val);
+}
+
+#if !defined(__x86_64__)
+/* specific precise single step test */
+void sig_trap_handler(int sig, siginfo_t *info, void *puc)
+{
+ struct ucontext *uc = puc;
+ printf("EIP=" FMTLX "\n", (long)uc->uc_mcontext.gregs[REG_EIP]);
+}
+
+const uint8_t sstep_buf1[4] = { 1, 2, 3, 4};
+uint8_t sstep_buf2[4];
+
+void test_single_step(void)
+{
+ struct sigaction act;
+ volatile int val;
+ int i;
+
+ val = 0;
+ act.sa_sigaction = sig_trap_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ sigaction(SIGTRAP, &act, NULL);
+ asm volatile ("pushf\n"
+ "orl $0x00100, (%%esp)\n"
+ "popf\n"
+ "movl $0xabcd, %0\n"
+
+ /* jmp test */
+ "movl $3, %%ecx\n"
+ "1:\n"
+ "addl $1, %0\n"
+ "decl %%ecx\n"
+ "jnz 1b\n"
+
+ /* movsb: the single step should stop at each movsb iteration */
+ "movl $sstep_buf1, %%esi\n"
+ "movl $sstep_buf2, %%edi\n"
+ "movl $0, %%ecx\n"
+ "rep movsb\n"
+ "movl $3, %%ecx\n"
+ "rep movsb\n"
+ "movl $1, %%ecx\n"
+ "rep movsb\n"
+
+ /* cmpsb: the single step should stop at each cmpsb iteration */
+ "movl $sstep_buf1, %%esi\n"
+ "movl $sstep_buf2, %%edi\n"
+ "movl $0, %%ecx\n"
+ "rep cmpsb\n"
+ "movl $4, %%ecx\n"
+ "rep cmpsb\n"
+
+ /* getpid() syscall: single step should skip one
+ instruction */
+ "movl $20, %%eax\n"
+ "int $0x80\n"
+ "movl $0, %%eax\n"
+
+ /* when modifying SS, trace is not done on the next
+ instruction */
+ "movl %%ss, %%ecx\n"
+ "movl %%ecx, %%ss\n"
+ "addl $1, %0\n"
+ "movl $1, %%eax\n"
+ "movl %%ecx, %%ss\n"
+ "jmp 1f\n"
+ "addl $1, %0\n"
+ "1:\n"
+ "movl $1, %%eax\n"
+ "pushl %%ecx\n"
+ "popl %%ss\n"
+ "addl $1, %0\n"
+ "movl $1, %%eax\n"
+
+ "pushf\n"
+ "andl $~0x00100, (%%esp)\n"
+ "popf\n"
+ : "=m" (val)
+ :
+ : "cc", "memory", "eax", "ecx", "esi", "edi");
+ printf("val=%d\n", val);
+ for(i = 0; i < 4; i++)
+ printf("sstep_buf2[%d] = %d\n", i, sstep_buf2[i]);
+}
+
+/* self modifying code test */
+uint8_t code[] = {
+ 0xb8, 0x1, 0x00, 0x00, 0x00, /* movl $1, %eax */
+ 0xc3, /* ret */
+};
+
+asm(".section \".data\"\n"
+ "smc_code2:\n"
+ "movl 4(%esp), %eax\n"
+ "movl %eax, smc_patch_addr2 + 1\n"
+ "nop\n"
+ "nop\n"
+ "nop\n"
+ "nop\n"
+ "nop\n"
+ "nop\n"
+ "nop\n"
+ "nop\n"
+ "smc_patch_addr2:\n"
+ "movl $1, %eax\n"
+ "ret\n"
+ ".previous\n"
+ );
+
+typedef int FuncType(void);
+extern int smc_code2(int);
+void test_self_modifying_code(void)
+{
+ int i;
+ printf("self modifying code:\n");
+ printf("func1 = 0x%x\n", ((FuncType *)code)());
+ for(i = 2; i <= 4; i++) {
+ code[1] = i;
+ printf("func%d = 0x%x\n", i, ((FuncType *)code)());
+ }
+
+ /* more difficult test : the modified code is just after the
+ modifying instruction. It is forbidden in Intel specs, but it
+ is used by old DOS programs */
+ for(i = 2; i <= 4; i++) {
+ printf("smc_code2(%d) = %d\n", i, smc_code2(i));
+ }
+}
+#endif
+
+long enter_stack[4096];
+
+#if defined(__x86_64__)
+#define RSP "%%rsp"
+#define RBP "%%rbp"
+#else
+#define RSP "%%esp"
+#define RBP "%%ebp"
+#endif
+
+#define TEST_ENTER(size, stack_type, level)\
+{\
+ long esp_save, esp_val, ebp_val, ebp_save, i;\
+ stack_type *ptr, *stack_end, *stack_ptr;\
+ memset(enter_stack, 0, sizeof(enter_stack));\
+ stack_end = stack_ptr = (stack_type *)(enter_stack + 4096);\
+ ebp_val = (long)stack_ptr;\
+ for(i=1;i<=32;i++)\
+ *--stack_ptr = i;\
+ esp_val = (long)stack_ptr;\
+ asm("mov " RSP ", %[esp_save]\n"\
+ "mov " RBP ", %[ebp_save]\n"\
+ "mov %[esp_val], " RSP "\n"\
+ "mov %[ebp_val], " RBP "\n"\
+ "enter" size " $8, $" #level "\n"\
+ "mov " RSP ", %[esp_val]\n"\
+ "mov " RBP ", %[ebp_val]\n"\
+ "mov %[esp_save], " RSP "\n"\
+ "mov %[ebp_save], " RBP "\n"\
+ : [esp_save] "=r" (esp_save),\
+ [ebp_save] "=r" (ebp_save),\
+ [esp_val] "=r" (esp_val),\
+ [ebp_val] "=r" (ebp_val)\
+ : "[esp_val]" (esp_val),\
+ "[ebp_val]" (ebp_val));\
+ printf("level=%d:\n", level);\
+ printf("esp_val=" FMTLX "\n", esp_val - (long)stack_end);\
+ printf("ebp_val=" FMTLX "\n", ebp_val - (long)stack_end);\
+ for(ptr = (stack_type *)esp_val; ptr < stack_end; ptr++)\
+ printf(FMTLX "\n", (long)ptr[0]);\
+}
+
+static void test_enter(void)
+{
+#if defined(__x86_64__)
+ TEST_ENTER("q", uint64_t, 0);
+ TEST_ENTER("q", uint64_t, 1);
+ TEST_ENTER("q", uint64_t, 2);
+ TEST_ENTER("q", uint64_t, 31);
+#else
+ TEST_ENTER("l", uint32_t, 0);
+ TEST_ENTER("l", uint32_t, 1);
+ TEST_ENTER("l", uint32_t, 2);
+ TEST_ENTER("l", uint32_t, 31);
+#endif
+
+ TEST_ENTER("w", uint16_t, 0);
+ TEST_ENTER("w", uint16_t, 1);
+ TEST_ENTER("w", uint16_t, 2);
+ TEST_ENTER("w", uint16_t, 31);
+}
+
+#ifdef TEST_SSE
+
+typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
+typedef float __m128 __attribute__ ((__mode__(__V4SF__)));
+
+typedef union {
+ double d[2];
+ float s[4];
+ uint32_t l[4];
+ uint64_t q[2];
+ __m128 dq;
+} XMMReg;
+
+static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
+ { 0x456723c698694873, 0xdc515cff944a58ec },
+ { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 },
+ { 0x007c62c2085427f8, 0x231be9e8cde7438d },
+ { 0x0f76255a085427f8, 0xc233e9e8c4c9439a },
+};
+
+#define SSE_OP(op)\
+{\
+ asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ b.q[1], b.q[0],\
+ r.q[1], r.q[0]);\
+}
+
+#define SSE_OP2(op)\
+{\
+ int i;\
+ for(i=0;i<2;i++) {\
+ a.q[0] = test_values[2*i][0];\
+ a.q[1] = test_values[2*i][1];\
+ b.q[0] = test_values[2*i+1][0];\
+ b.q[1] = test_values[2*i+1][1];\
+ SSE_OP(op);\
+ }\
+}
+
+#define MMX_OP2(op)\
+{\
+ int i;\
+ for(i=0;i<2;i++) {\
+ a.q[0] = test_values[2*i][0];\
+ b.q[0] = test_values[2*i+1][0];\
+ asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\
+ printf("%-9s: a=" FMT64X " b=" FMT64X " r=" FMT64X "\n",\
+ #op,\
+ a.q[0],\
+ b.q[0],\
+ r.q[0]);\
+ }\
+ SSE_OP2(op);\
+}
+
+#define SHUF_OP(op, ib)\
+{\
+ a.q[0] = test_values[0][0];\
+ a.q[1] = test_values[0][1];\
+ b.q[0] = test_values[1][0];\
+ b.q[1] = test_values[1][1];\
+ asm volatile (#op " $" #ib ", %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ b.q[1], b.q[0],\
+ ib,\
+ r.q[1], r.q[0]);\
+}
+
+#define PSHUF_OP(op, ib)\
+{\
+ int i;\
+ for(i=0;i<2;i++) {\
+ a.q[0] = test_values[2*i][0];\
+ a.q[1] = test_values[2*i][1];\
+ asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ ib,\
+ r.q[1], r.q[0]);\
+ }\
+}
+
+#define SHIFT_IM(op, ib)\
+{\
+ int i;\
+ for(i=0;i<2;i++) {\
+ a.q[0] = test_values[2*i][0];\
+ a.q[1] = test_values[2*i][1];\
+ asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ ib,\
+ r.q[1], r.q[0]);\
+ }\
+}
+
+#define SHIFT_OP(op, ib)\
+{\
+ int i;\
+ SHIFT_IM(op, ib);\
+ for(i=0;i<2;i++) {\
+ a.q[0] = test_values[2*i][0];\
+ a.q[1] = test_values[2*i][1];\
+ b.q[0] = ib;\
+ b.q[1] = 0;\
+ asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ b.q[1], b.q[0],\
+ r.q[1], r.q[0]);\
+ }\
+}
+
+#define MOVMSK(op)\
+{\
+ int i, reg;\
+ for(i=0;i<2;i++) {\
+ a.q[0] = test_values[2*i][0];\
+ a.q[1] = test_values[2*i][1];\
+ asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ reg);\
+ }\
+}
+
+#define SSE_OPS(a) \
+SSE_OP(a ## ps);\
+SSE_OP(a ## ss);
+
+#define SSE_OPD(a) \
+SSE_OP(a ## pd);\
+SSE_OP(a ## sd);
+
+#define SSE_COMI(op, field)\
+{\
+ unsigned int eflags;\
+ XMMReg a, b;\
+ a.field[0] = a1;\
+ b.field[0] = b1;\
+ asm volatile (#op " %2, %1\n"\
+ "pushf\n"\
+ "pop %0\n"\
+ : "=m" (eflags)\
+ : "x" (a.dq), "x" (b.dq));\
+ printf("%-9s: a=%f b=%f cc=%04x\n",\
+ #op, a1, b1,\
+ eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
+}
+
+void test_sse_comi(double a1, double b1)
+{
+ SSE_COMI(ucomiss, s);
+ SSE_COMI(ucomisd, d);
+ SSE_COMI(comiss, s);
+ SSE_COMI(comisd, d);
+}
+
+#define CVT_OP_XMM(op)\
+{\
+ asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ r.q[1], r.q[0]);\
+}
+
+/* Force %xmm0 usage to avoid the case where both register index are 0
+ to test intruction decoding more extensively */
+#define CVT_OP_XMM2MMX(op)\
+{\
+ asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq) \
+ : "%xmm0"); \
+ asm volatile("emms\n"); \
+ printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ r.q[0]);\
+}
+
+#define CVT_OP_MMX2XMM(op)\
+{\
+ asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\
+ asm volatile("emms\n"); \
+ printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.q[0],\
+ r.q[1], r.q[0]);\
+}
+
+#define CVT_OP_REG2XMM(op)\
+{\
+ asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\
+ printf("%-9s: a=%08x r=" FMT64X "" FMT64X "\n",\
+ #op,\
+ a.l[0],\
+ r.q[1], r.q[0]);\
+}
+
+#define CVT_OP_XMM2REG(op)\
+{\
+ asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\
+ printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
+ #op,\
+ a.q[1], a.q[0],\
+ r.l[0]);\
+}
+
+struct fpxstate {
+ uint16_t fpuc;
+ uint16_t fpus;
+ uint16_t fptag;
+ uint16_t fop;
+ uint32_t fpuip;
+ uint16_t cs_sel;
+ uint16_t dummy0;
+ uint32_t fpudp;
+ uint16_t ds_sel;
+ uint16_t dummy1;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+ uint8_t fpregs1[8 * 16];
+ uint8_t xmm_regs[8 * 16];
+ uint8_t dummy2[224];
+};
+
+static struct fpxstate fpx_state __attribute__((aligned(16)));
+static struct fpxstate fpx_state2 __attribute__((aligned(16)));
+
+void test_fxsave(void)
+{
+ struct fpxstate *fp = &fpx_state;
+ struct fpxstate *fp2 = &fpx_state2;
+ int i, nb_xmm;
+ XMMReg a, b;
+ a.q[0] = test_values[0][0];
+ a.q[1] = test_values[0][1];
+ b.q[0] = test_values[1][0];
+ b.q[1] = test_values[1][1];
+
+ asm("movdqa %2, %%xmm0\n"
+ "movdqa %3, %%xmm7\n"
+#if defined(__x86_64__)
+ "movdqa %2, %%xmm15\n"
+#endif
+ " fld1\n"
+ " fldpi\n"
+ " fldln2\n"
+ " fxsave %0\n"
+ " fxrstor %0\n"
+ " fxsave %1\n"
+ " fninit\n"
+ : "=m" (*(uint32_t *)fp2), "=m" (*(uint32_t *)fp)
+ : "m" (a), "m" (b));
+ printf("fpuc=%04x\n", fp->fpuc);
+ printf("fpus=%04x\n", fp->fpus);
+ printf("fptag=%04x\n", fp->fptag);
+ for(i = 0; i < 3; i++) {
+ printf("ST%d: " FMT64X " %04x\n",
+ i,
+ *(uint64_t *)&fp->fpregs1[i * 16],
+ *(uint16_t *)&fp->fpregs1[i * 16 + 8]);
+ }
+ printf("mxcsr=%08x\n", fp->mxcsr & 0x1f80);
+#if defined(__x86_64__)
+ nb_xmm = 16;
+#else
+ nb_xmm = 8;
+#endif
+ for(i = 0; i < nb_xmm; i++) {
+ printf("xmm%d: " FMT64X "" FMT64X "\n",
+ i,
+ *(uint64_t *)&fp->xmm_regs[i * 16],
+ *(uint64_t *)&fp->xmm_regs[i * 16 + 8]);
+ }
+}
+
+void test_sse(void)
+{
+ XMMReg r, a, b;
+ int i;
+
+ MMX_OP2(punpcklbw);
+ MMX_OP2(punpcklwd);
+ MMX_OP2(punpckldq);
+ MMX_OP2(packsswb);
+ MMX_OP2(pcmpgtb);
+ MMX_OP2(pcmpgtw);
+ MMX_OP2(pcmpgtd);
+ MMX_OP2(packuswb);
+ MMX_OP2(punpckhbw);
+ MMX_OP2(punpckhwd);
+ MMX_OP2(punpckhdq);
+ MMX_OP2(packssdw);
+ MMX_OP2(pcmpeqb);
+ MMX_OP2(pcmpeqw);
+ MMX_OP2(pcmpeqd);
+
+ MMX_OP2(paddq);
+ MMX_OP2(pmullw);
+ MMX_OP2(psubusb);
+ MMX_OP2(psubusw);
+ MMX_OP2(pminub);
+ MMX_OP2(pand);
+ MMX_OP2(paddusb);
+ MMX_OP2(paddusw);
+ MMX_OP2(pmaxub);
+ MMX_OP2(pandn);
+
+ MMX_OP2(pmulhuw);
+ MMX_OP2(pmulhw);
+
+ MMX_OP2(psubsb);
+ MMX_OP2(psubsw);
+ MMX_OP2(pminsw);
+ MMX_OP2(por);
+ MMX_OP2(paddsb);
+ MMX_OP2(paddsw);
+ MMX_OP2(pmaxsw);
+ MMX_OP2(pxor);
+ MMX_OP2(pmuludq);
+ MMX_OP2(pmaddwd);
+ MMX_OP2(psadbw);
+ MMX_OP2(psubb);
+ MMX_OP2(psubw);
+ MMX_OP2(psubd);
+ MMX_OP2(psubq);
+ MMX_OP2(paddb);
+ MMX_OP2(paddw);
+ MMX_OP2(paddd);
+
+ MMX_OP2(pavgb);
+ MMX_OP2(pavgw);
+
+ asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678));
+ printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]);
+
+ asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678));
+ printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]);
+
+ a.q[0] = test_values[0][0];
+ a.q[1] = test_values[0][1];
+ asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
+ printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
+
+ asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
+ printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
+
+ asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
+ printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
+
+ asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
+ printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
+
+ {
+ r.q[0] = -1;
+ r.q[1] = -1;
+
+ a.q[0] = test_values[0][0];
+ a.q[1] = test_values[0][1];
+ b.q[0] = test_values[1][0];
+ b.q[1] = test_values[1][1];
+ asm volatile("maskmovq %1, %0" :
+ : "y" (a.q[0]), "y" (b.q[0]), "D" (&r)
+ : "memory");
+ printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n",
+ "maskmov",
+ r.q[0],
+ a.q[0],
+ b.q[0]);
+ asm volatile("maskmovdqu %1, %0" :
+ : "x" (a.dq), "x" (b.dq), "D" (&r)
+ : "memory");
+ printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n",
+ "maskmov",
+ r.q[1], r.q[0],
+ a.q[1], a.q[0],
+ b.q[1], b.q[0]);
+ }
+
+ asm volatile ("emms");
+
+ SSE_OP2(punpcklqdq);
+ SSE_OP2(punpckhqdq);
+ SSE_OP2(andps);
+ SSE_OP2(andpd);
+ SSE_OP2(andnps);
+ SSE_OP2(andnpd);
+ SSE_OP2(orps);
+ SSE_OP2(orpd);
+ SSE_OP2(xorps);
+ SSE_OP2(xorpd);
+
+ SSE_OP2(unpcklps);
+ SSE_OP2(unpcklpd);
+ SSE_OP2(unpckhps);
+ SSE_OP2(unpckhpd);
+
+ SHUF_OP(shufps, 0x78);
+ SHUF_OP(shufpd, 0x02);
+
+ PSHUF_OP(pshufd, 0x78);
+ PSHUF_OP(pshuflw, 0x78);
+ PSHUF_OP(pshufhw, 0x78);
+
+ SHIFT_OP(psrlw, 7);
+ SHIFT_OP(psrlw, 16);
+ SHIFT_OP(psraw, 7);
+ SHIFT_OP(psraw, 16);
+ SHIFT_OP(psllw, 7);
+ SHIFT_OP(psllw, 16);
+
+ SHIFT_OP(psrld, 7);
+ SHIFT_OP(psrld, 32);
+ SHIFT_OP(psrad, 7);
+ SHIFT_OP(psrad, 32);
+ SHIFT_OP(pslld, 7);
+ SHIFT_OP(pslld, 32);
+
+ SHIFT_OP(psrlq, 7);
+ SHIFT_OP(psrlq, 32);
+ SHIFT_OP(psllq, 7);
+ SHIFT_OP(psllq, 32);
+
+ SHIFT_IM(psrldq, 16);
+ SHIFT_IM(psrldq, 7);
+ SHIFT_IM(pslldq, 16);
+ SHIFT_IM(pslldq, 7);
+
+ MOVMSK(movmskps);
+ MOVMSK(movmskpd);
+
+ /* FPU specific ops */
+
+ {
+ uint32_t mxcsr;
+ asm volatile("stmxcsr %0" : "=m" (mxcsr));
+ printf("mxcsr=%08x\n", mxcsr & 0x1f80);
+ asm volatile("ldmxcsr %0" : : "m" (mxcsr));
+ }
+
+ test_sse_comi(2, -1);
+ test_sse_comi(2, 2);
+ test_sse_comi(2, 3);
+ test_sse_comi(2, q_nan.d);
+ test_sse_comi(q_nan.d, -1);
+
+ for(i = 0; i < 2; i++) {
+ a.s[0] = 2.7;
+ a.s[1] = 3.4;
+ a.s[2] = 4;
+ a.s[3] = -6.3;
+ b.s[0] = 45.7;
+ b.s[1] = 353.4;
+ b.s[2] = 4;
+ b.s[3] = 56.3;
+ if (i == 1) {
+ a.s[0] = q_nan.d;
+ b.s[3] = q_nan.d;
+ }
+
+ SSE_OPS(add);
+ SSE_OPS(mul);
+ SSE_OPS(sub);
+ SSE_OPS(min);
+ SSE_OPS(div);
+ SSE_OPS(max);
+ SSE_OPS(sqrt);
+ SSE_OPS(cmpeq);
+ SSE_OPS(cmplt);
+ SSE_OPS(cmple);
+ SSE_OPS(cmpunord);
+ SSE_OPS(cmpneq);
+ SSE_OPS(cmpnlt);
+ SSE_OPS(cmpnle);
+ SSE_OPS(cmpord);
+
+
+ a.d[0] = 2.7;
+ a.d[1] = -3.4;
+ b.d[0] = 45.7;
+ b.d[1] = -53.4;
+ if (i == 1) {
+ a.d[0] = q_nan.d;
+ b.d[1] = q_nan.d;
+ }
+ SSE_OPD(add);
+ SSE_OPD(mul);
+ SSE_OPD(sub);
+ SSE_OPD(min);
+ SSE_OPD(div);
+ SSE_OPD(max);
+ SSE_OPD(sqrt);
+ SSE_OPD(cmpeq);
+ SSE_OPD(cmplt);
+ SSE_OPD(cmple);
+ SSE_OPD(cmpunord);
+ SSE_OPD(cmpneq);
+ SSE_OPD(cmpnlt);
+ SSE_OPD(cmpnle);
+ SSE_OPD(cmpord);
+ }
+
+ /* float to float/int */
+ a.s[0] = 2.7;
+ a.s[1] = 3.4;
+ a.s[2] = 4;
+ a.s[3] = -6.3;
+ CVT_OP_XMM(cvtps2pd);
+ CVT_OP_XMM(cvtss2sd);
+ CVT_OP_XMM2MMX(cvtps2pi);
+ CVT_OP_XMM2MMX(cvttps2pi);
+ CVT_OP_XMM2REG(cvtss2si);
+ CVT_OP_XMM2REG(cvttss2si);
+ CVT_OP_XMM(cvtps2dq);
+ CVT_OP_XMM(cvttps2dq);
+
+ a.d[0] = 2.6;
+ a.d[1] = -3.4;
+ CVT_OP_XMM(cvtpd2ps);
+ CVT_OP_XMM(cvtsd2ss);
+ CVT_OP_XMM2MMX(cvtpd2pi);
+ CVT_OP_XMM2MMX(cvttpd2pi);
+ CVT_OP_XMM2REG(cvtsd2si);
+ CVT_OP_XMM2REG(cvttsd2si);
+ CVT_OP_XMM(cvtpd2dq);
+ CVT_OP_XMM(cvttpd2dq);
+
+ /* sse/mmx moves */
+ CVT_OP_XMM2MMX(movdq2q);
+ CVT_OP_MMX2XMM(movq2dq);
+
+ /* int to float */
+ a.l[0] = -6;
+ a.l[1] = 2;
+ a.l[2] = 100;
+ a.l[3] = -60000;
+ CVT_OP_MMX2XMM(cvtpi2ps);
+ CVT_OP_MMX2XMM(cvtpi2pd);
+ CVT_OP_REG2XMM(cvtsi2ss);
+ CVT_OP_REG2XMM(cvtsi2sd);
+ CVT_OP_XMM(cvtdq2ps);
+ CVT_OP_XMM(cvtdq2pd);
+
+ /* XXX: test PNI insns */
+#if 0
+ SSE_OP2(movshdup);
+#endif
+ asm volatile ("emms");
+}
+
+#endif
+
+#define TEST_CONV_RAX(op)\
+{\
+ unsigned long a, r;\
+ a = i2l(0x8234a6f8);\
+ r = a;\
+ asm volatile(#op : "=a" (r) : "0" (r));\
+ printf("%-10s A=" FMTLX " R=" FMTLX "\n", #op, a, r);\
+}
+
+#define TEST_CONV_RAX_RDX(op)\
+{\
+ unsigned long a, d, r, rh; \
+ a = i2l(0x8234a6f8);\
+ d = i2l(0x8345a1f2);\
+ r = a;\
+ rh = d;\
+ asm volatile(#op : "=a" (r), "=d" (rh) : "0" (r), "1" (rh)); \
+ printf("%-10s A=" FMTLX " R=" FMTLX ":" FMTLX "\n", #op, a, r, rh); \
+}
+
+void test_conv(void)
+{
+ TEST_CONV_RAX(cbw);
+ TEST_CONV_RAX(cwde);
+#if defined(__x86_64__)
+ TEST_CONV_RAX(cdqe);
+#endif
+
+ TEST_CONV_RAX_RDX(cwd);
+ TEST_CONV_RAX_RDX(cdq);
+#if defined(__x86_64__)
+ TEST_CONV_RAX_RDX(cqo);
+#endif
+
+ {
+ unsigned long a, r;
+ a = i2l(0x12345678);
+ asm volatile("bswapl %k0" : "=r" (r) : "0" (a));
+ printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapl", a, r);
+ }
+#if defined(__x86_64__)
+ {
+ unsigned long a, r;
+ a = i2l(0x12345678);
+ asm volatile("bswapq %0" : "=r" (r) : "0" (a));
+ printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapq", a, r);
+ }
+#endif
+}
+
+extern void *__start_initcall;
+extern void *__stop_initcall;
+
+
+int main(int argc, char **argv)
+{
+ void **ptr;
+ void (*func)(void);
+
+ ptr = &__start_initcall;
+ while (ptr != &__stop_initcall) {
+ func = *ptr++;
+ func();
+ }
+ test_bsx();
+ test_mul();
+ test_jcc();
+ test_loop();
+ test_floats();
+#if !defined(__x86_64__)
+ test_bcd();
+#endif
+ test_xchg();
+ test_string();
+ test_misc();
+ test_lea();
+#ifdef TEST_SEGS
+ test_segs();
+ test_code16();
+#endif
+#ifdef TEST_VM86
+ test_vm86();
+#endif
+#if !defined(__x86_64__)
+ test_exceptions();
+ test_self_modifying_code();
+ test_single_step();
+#endif
+ test_enter();
+ test_conv();
+#ifdef TEST_SSE
+ test_sse();
+ test_fxsave();
+#endif
+ return 0;
+}
diff --git a/src/recompiler/tests/test-i386.h b/src/recompiler/tests/test-i386.h
new file mode 100644
index 00000000..75106b8c
--- /dev/null
+++ b/src/recompiler/tests/test-i386.h
@@ -0,0 +1,152 @@
+
+#define exec_op glue(exec_, OP)
+#define exec_opq glue(glue(exec_, OP), q)
+#define exec_opl glue(glue(exec_, OP), l)
+#define exec_opw glue(glue(exec_, OP), w)
+#define exec_opb glue(glue(exec_, OP), b)
+
+#define EXECOP2(size, rsize, res, s1, flags) \
+ asm ("push %4\n\t"\
+ "popf\n\t"\
+ stringify(OP) size " %" rsize "2, %" rsize "0\n\t" \
+ "pushf\n\t"\
+ "pop %1\n\t"\
+ : "=q" (res), "=g" (flags)\
+ : "q" (s1), "0" (res), "1" (flags)); \
+ printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", \
+ stringify(OP) size, s0, s1, res, iflags, flags & CC_MASK);
+
+#define EXECOP1(size, rsize, res, flags) \
+ asm ("push %3\n\t"\
+ "popf\n\t"\
+ stringify(OP) size " %" rsize "0\n\t" \
+ "pushf\n\t"\
+ "pop %1\n\t"\
+ : "=q" (res), "=g" (flags)\
+ : "0" (res), "1" (flags)); \
+ printf("%-10s A=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", \
+ stringify(OP) size, s0, res, iflags, flags & CC_MASK);
+
+#ifdef OP1
+#if defined(__x86_64__)
+void exec_opq(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("q", "", res, flags);
+}
+#endif
+
+void exec_opl(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("l", "k", res, flags);
+}
+
+void exec_opw(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("w", "w", res, flags);
+}
+
+void exec_opb(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP1("b", "b", res, flags);
+}
+#else
+#if defined(__x86_64__)
+void exec_opq(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("q", "", res, s1, flags);
+}
+#endif
+
+void exec_opl(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("l", "k", res, s1, flags);
+}
+
+void exec_opw(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("w", "w", res, s1, flags);
+}
+
+void exec_opb(long s0, long s1, long iflags)
+{
+ long res, flags;
+ res = s0;
+ flags = iflags;
+ EXECOP2("b", "b", res, s1, flags);
+}
+#endif
+
+void exec_op(long s0, long s1)
+{
+ s0 = i2l(s0);
+ s1 = i2l(s1);
+#if defined(__x86_64__)
+ exec_opq(s0, s1, 0);
+#endif
+ exec_opl(s0, s1, 0);
+ exec_opw(s0, s1, 0);
+ exec_opb(s0, s1, 0);
+#ifdef OP_CC
+#if defined(__x86_64__)
+ exec_opq(s0, s1, CC_C);
+#endif
+ exec_opl(s0, s1, CC_C);
+ exec_opw(s0, s1, CC_C);
+ exec_opb(s0, s1, CC_C);
+#endif
+}
+
+void glue(test_, OP)(void)
+{
+ exec_op(0x12345678, 0x812FADA);
+ exec_op(0x12341, 0x12341);
+ exec_op(0x12341, -0x12341);
+ exec_op(0xffffffff, 0);
+ exec_op(0xffffffff, -1);
+ exec_op(0xffffffff, 1);
+ exec_op(0xffffffff, 2);
+ exec_op(0x7fffffff, 0);
+ exec_op(0x7fffffff, 1);
+ exec_op(0x7fffffff, -1);
+ exec_op(0x80000000, -1);
+ exec_op(0x80000000, 1);
+ exec_op(0x80000000, -2);
+ exec_op(0x12347fff, 0);
+ exec_op(0x12347fff, 1);
+ exec_op(0x12347fff, -1);
+ exec_op(0x12348000, -1);
+ exec_op(0x12348000, 1);
+ exec_op(0x12348000, -2);
+ exec_op(0x12347f7f, 0);
+ exec_op(0x12347f7f, 1);
+ exec_op(0x12347f7f, -1);
+ exec_op(0x12348080, -1);
+ exec_op(0x12348080, 1);
+ exec_op(0x12348080, -2);
+}
+
+void *glue(_test_, OP) __init_call = glue(test_, OP);
+
+#undef OP
+#undef OP_CC
diff --git a/src/recompiler/tests/test-mmap.c b/src/recompiler/tests/test-mmap.c
new file mode 100644
index 00000000..1d444b63
--- /dev/null
+++ b/src/recompiler/tests/test-mmap.c
@@ -0,0 +1,485 @@
+/*
+ * Small test program to verify simulated mmap behaviour.
+ *
+ * When running qemu-linux-user with the -p flag, you may need to tell
+ * this test program about the pagesize because getpagesize() will not reflect
+ * the -p choice. Simply pass one argument beeing the pagesize.
+ *
+ * Copyright (c) 2007 AXIS Communications AB
+ * Written by Edgar E. Iglesias.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle GPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the General Public License version 2 (GPLv2) at this time for any software where
+ * a choice of GPL license versions is made available with the language indicating
+ * that GPLv2 or any later version may be used, or where a choice of which version
+ * of the GPL is applied is otherwise unspecified.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+
+#define D(x)
+
+#define fail_unless(x) \
+do \
+{ \
+ if (!(x)) { \
+ fprintf (stderr, "FAILED at %s:%d\n", __FILE__, __LINE__); \
+ exit (EXIT_FAILURE); \
+ } \
+} while (0);
+
+unsigned char *dummybuf;
+static unsigned int pagesize;
+static unsigned int pagemask;
+int test_fd;
+size_t test_fsize;
+
+void check_aligned_anonymous_unfixed_mmaps(void)
+{
+ void *p1;
+ void *p2;
+ void *p3;
+ void *p4;
+ void *p5;
+ uintptr_t p;
+ int i;
+
+ fprintf (stderr, "%s", __func__);
+ for (i = 0; i < 0x1fff; i++)
+ {
+ size_t len;
+
+ len = pagesize + (pagesize * i & 7);
+ p1 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ p2 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ p3 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ p4 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ p5 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ /* Make sure we get pages aligned with the pagesize. The
+ target expects this. */
+ fail_unless (p1 != MAP_FAILED);
+ fail_unless (p2 != MAP_FAILED);
+ fail_unless (p3 != MAP_FAILED);
+ fail_unless (p4 != MAP_FAILED);
+ fail_unless (p5 != MAP_FAILED);
+ p = (uintptr_t) p1;
+ D(printf ("p=%x\n", p));
+ fail_unless ((p & pagemask) == 0);
+ p = (uintptr_t) p2;
+ fail_unless ((p & pagemask) == 0);
+ p = (uintptr_t) p3;
+ fail_unless ((p & pagemask) == 0);
+ p = (uintptr_t) p4;
+ fail_unless ((p & pagemask) == 0);
+ p = (uintptr_t) p5;
+ fail_unless ((p & pagemask) == 0);
+
+ /* Make sure we can read from the entire area. */
+ memcpy (dummybuf, p1, pagesize);
+ memcpy (dummybuf, p2, pagesize);
+ memcpy (dummybuf, p3, pagesize);
+ memcpy (dummybuf, p4, pagesize);
+ memcpy (dummybuf, p5, pagesize);
+
+ munmap (p1, len);
+ munmap (p2, len);
+ munmap (p3, len);
+ munmap (p4, len);
+ munmap (p5, len);
+ }
+ fprintf (stderr, " passed\n");
+}
+
+void check_large_anonymous_unfixed_mmap(void)
+{
+ void *p1;
+ uintptr_t p;
+ size_t len;
+
+ fprintf (stderr, "%s", __func__);
+
+ len = 0x02000000;
+ p1 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ /* Make sure we get pages aligned with the pagesize. The
+ target expects this. */
+ fail_unless (p1 != MAP_FAILED);
+ p = (uintptr_t) p1;
+ fail_unless ((p & pagemask) == 0);
+
+ /* Make sure we can read from the entire area. */
+ memcpy (dummybuf, p1, pagesize);
+ munmap (p1, len);
+ fprintf (stderr, " passed\n");
+}
+
+void check_aligned_anonymous_unfixed_colliding_mmaps(void)
+{
+ char *p1;
+ char *p2;
+ char *p3;
+ uintptr_t p;
+ int i;
+
+ fprintf (stderr, "%s", __func__);
+ for (i = 0; i < 0x2fff; i++)
+ {
+ int nlen;
+ p1 = mmap(NULL, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ fail_unless (p1 != MAP_FAILED);
+ p = (uintptr_t) p1;
+ fail_unless ((p & pagemask) == 0);
+ memcpy (dummybuf, p1, pagesize);
+
+ p2 = mmap(NULL, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ fail_unless (p2 != MAP_FAILED);
+ p = (uintptr_t) p2;
+ fail_unless ((p & pagemask) == 0);
+ memcpy (dummybuf, p2, pagesize);
+
+
+ munmap (p1, pagesize);
+ nlen = pagesize * 8;
+ p3 = mmap(NULL, nlen, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ /* Check if the mmaped areas collide. */
+ if (p3 < p2
+ && (p3 + nlen) > p2)
+ fail_unless (0);
+
+ memcpy (dummybuf, p3, pagesize);
+
+ /* Make sure we get pages aligned with the pagesize. The
+ target expects this. */
+ fail_unless (p3 != MAP_FAILED);
+ p = (uintptr_t) p3;
+ fail_unless ((p & pagemask) == 0);
+ munmap (p2, pagesize);
+ munmap (p3, nlen);
+ }
+ fprintf (stderr, " passed\n");
+}
+
+void check_aligned_anonymous_fixed_mmaps(void)
+{
+ char *addr;
+ void *p1;
+ uintptr_t p;
+ int i;
+
+ /* Find a suitable address to start with. */
+ addr = mmap(NULL, pagesize * 40, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ fprintf (stderr, "%s addr=%p", __func__, addr);
+ fail_unless (addr != MAP_FAILED);
+
+ for (i = 0; i < 40; i++)
+ {
+ /* Create submaps within our unfixed map. */
+ p1 = mmap(addr, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ -1, 0);
+ /* Make sure we get pages aligned with the pagesize.
+ The target expects this. */
+ p = (uintptr_t) p1;
+ fail_unless (p1 == addr);
+ fail_unless ((p & pagemask) == 0);
+ memcpy (dummybuf, p1, pagesize);
+ munmap (p1, pagesize);
+ addr += pagesize;
+ }
+ fprintf (stderr, " passed\n");
+}
+
+void check_aligned_anonymous_fixed_mmaps_collide_with_host(void)
+{
+ char *addr;
+ void *p1;
+ uintptr_t p;
+ int i;
+
+ /* Find a suitable address to start with. Right were the x86 hosts
+ stack is. */
+ addr = ((void *)0x80000000);
+ fprintf (stderr, "%s addr=%p", __func__, addr);
+ fprintf (stderr, "FIXME: QEMU fails to track pages used by the host.");
+
+ for (i = 0; i < 20; i++)
+ {
+ /* Create submaps within our unfixed map. */
+ p1 = mmap(addr, pagesize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ -1, 0);
+ /* Make sure we get pages aligned with the pagesize.
+ The target expects this. */
+ p = (uintptr_t) p1;
+ fail_unless (p1 == addr);
+ fail_unless ((p & pagemask) == 0);
+ memcpy (p1, dummybuf, pagesize);
+ munmap (p1, pagesize);
+ addr += pagesize;
+ }
+ fprintf (stderr, " passed\n");
+}
+
+void check_file_unfixed_mmaps(void)
+{
+ unsigned int *p1, *p2, *p3;
+ uintptr_t p;
+ int i;
+
+ fprintf (stderr, "%s", __func__);
+ for (i = 0; i < 0x10; i++)
+ {
+ size_t len;
+
+ len = pagesize;
+ p1 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE,
+ test_fd, 0);
+ p2 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE,
+ test_fd, pagesize);
+ p3 = mmap(NULL, len, PROT_READ,
+ MAP_PRIVATE,
+ test_fd, pagesize * 2);
+
+ fail_unless (p1 != MAP_FAILED);
+ fail_unless (p2 != MAP_FAILED);
+ fail_unless (p3 != MAP_FAILED);
+
+ /* Make sure we get pages aligned with the pagesize. The
+ target expects this. */
+ p = (uintptr_t) p1;
+ fail_unless ((p & pagemask) == 0);
+ p = (uintptr_t) p2;
+ fail_unless ((p & pagemask) == 0);
+ p = (uintptr_t) p3;
+ fail_unless ((p & pagemask) == 0);
+
+ /* Verify that the file maps was made correctly. */
+ D(printf ("p1=%d p2=%d p3=%d\n", *p1, *p2, *p3));
+ fail_unless (*p1 == 0);
+ fail_unless (*p2 == (pagesize / sizeof *p2));
+ fail_unless (*p3 == ((pagesize * 2) / sizeof *p3));
+
+ memcpy (dummybuf, p1, pagesize);
+ memcpy (dummybuf, p2, pagesize);
+ memcpy (dummybuf, p3, pagesize);
+ munmap (p1, len);
+ munmap (p2, len);
+ munmap (p3, len);
+ }
+ fprintf (stderr, " passed\n");
+}
+
+void check_file_unfixed_eof_mmaps(void)
+{
+ char *cp;
+ unsigned int *p1;
+ uintptr_t p;
+ int i;
+
+ fprintf (stderr, "%s", __func__);
+ for (i = 0; i < 0x10; i++)
+ {
+ p1 = mmap(NULL, pagesize, PROT_READ,
+ MAP_PRIVATE,
+ test_fd,
+ (test_fsize - sizeof *p1) & ~pagemask);
+
+ fail_unless (p1 != MAP_FAILED);
+
+ /* Make sure we get pages aligned with the pagesize. The
+ target expects this. */
+ p = (uintptr_t) p1;
+ fail_unless ((p & pagemask) == 0);
+ /* Verify that the file maps was made correctly. */
+ fail_unless (p1[(test_fsize & pagemask) / sizeof *p1 - 1]
+ == ((test_fsize - sizeof *p1) / sizeof *p1));
+
+ /* Verify that the end of page is accessable and zeroed. */
+ cp = (void *) p1;
+ fail_unless (cp[pagesize - 4] == 0);
+ munmap (p1, pagesize);
+ }
+ fprintf (stderr, " passed\n");
+}
+
+void check_file_fixed_eof_mmaps(void)
+{
+ char *addr;
+ char *cp;
+ unsigned int *p1;
+ uintptr_t p;
+ int i;
+
+ /* Find a suitable address to start with. */
+ addr = mmap(NULL, pagesize * 44, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+
+ fprintf (stderr, "%s addr=%p", __func__, (void *)addr);
+ fail_unless (addr != MAP_FAILED);
+
+ for (i = 0; i < 0x10; i++)
+ {
+ /* Create submaps within our unfixed map. */
+ p1 = mmap(addr, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_FIXED,
+ test_fd,
+ (test_fsize - sizeof *p1) & ~pagemask);
+
+ fail_unless (p1 != MAP_FAILED);
+
+ /* Make sure we get pages aligned with the pagesize. The
+ target expects this. */
+ p = (uintptr_t) p1;
+ fail_unless ((p & pagemask) == 0);
+
+ /* Verify that the file maps was made correctly. */
+ fail_unless (p1[(test_fsize & pagemask) / sizeof *p1 - 1]
+ == ((test_fsize - sizeof *p1) / sizeof *p1));
+
+ /* Verify that the end of page is accessable and zeroed. */
+ cp = (void *)p1;
+ fail_unless (cp[pagesize - 4] == 0);
+ munmap (p1, pagesize);
+ addr += pagesize;
+ }
+ fprintf (stderr, " passed\n");
+}
+
+void check_file_fixed_mmaps(void)
+{
+ unsigned char *addr;
+ unsigned int *p1, *p2, *p3, *p4;
+ int i;
+
+ /* Find a suitable address to start with. */
+ addr = mmap(NULL, pagesize * 40 * 4, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ fprintf (stderr, "%s addr=%p", __func__, (void *)addr);
+ fail_unless (addr != MAP_FAILED);
+
+ for (i = 0; i < 40; i++)
+ {
+ p1 = mmap(addr, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_FIXED,
+ test_fd, 0);
+ p2 = mmap(addr + pagesize, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_FIXED,
+ test_fd, pagesize);
+ p3 = mmap(addr + pagesize * 2, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_FIXED,
+ test_fd, pagesize * 2);
+ p4 = mmap(addr + pagesize * 3, pagesize, PROT_READ,
+ MAP_PRIVATE | MAP_FIXED,
+ test_fd, pagesize * 3);
+
+ /* Make sure we get pages aligned with the pagesize.
+ The target expects this. */
+ fail_unless (p1 == (void *)addr);
+ fail_unless (p2 == (void *)(addr + pagesize));
+ fail_unless (p3 == (void *)(addr + pagesize * 2));
+ fail_unless (p4 == (void *)(addr + pagesize * 3));
+
+ /* Verify that the file maps was made correctly. */
+ fail_unless (*p1 == 0);
+ fail_unless (*p2 == (pagesize / sizeof *p2));
+ fail_unless (*p3 == ((pagesize * 2) / sizeof *p3));
+ fail_unless (*p4 == ((pagesize * 3) / sizeof *p4));
+
+ memcpy (dummybuf, p1, pagesize);
+ memcpy (dummybuf, p2, pagesize);
+ memcpy (dummybuf, p3, pagesize);
+ memcpy (dummybuf, p4, pagesize);
+
+ munmap (p1, pagesize);
+ munmap (p2, pagesize);
+ munmap (p3, pagesize);
+ munmap (p4, pagesize);
+ addr += pagesize * 4;
+ }
+ fprintf (stderr, " passed\n");
+}
+
+int main(int argc, char **argv)
+{
+ char tempname[] = "/tmp/.cmmapXXXXXX";
+ unsigned int i;
+
+ /* Trust the first argument, otherwise probe the system for our
+ pagesize. */
+ if (argc > 1)
+ pagesize = strtoul(argv[1], NULL, 0);
+ else
+ pagesize = sysconf(_SC_PAGESIZE);
+
+ /* Assume pagesize is a power of two. */
+ pagemask = pagesize - 1;
+ dummybuf = malloc (pagesize);
+ printf ("pagesize=%u pagemask=%x\n", pagesize, pagemask);
+
+ test_fd = mkstemp(tempname);
+ unlink(tempname);
+
+ /* Fill the file with int's counting from zero and up. */
+ for (i = 0; i < (pagesize * 4) / sizeof i; i++)
+ write (test_fd, &i, sizeof i);
+ /* Append a few extra writes to make the file end at non
+ page boundary. */
+ write (test_fd, &i, sizeof i); i++;
+ write (test_fd, &i, sizeof i); i++;
+ write (test_fd, &i, sizeof i); i++;
+
+ test_fsize = lseek(test_fd, 0, SEEK_CUR);
+
+ /* Run the tests. */
+ check_aligned_anonymous_unfixed_mmaps();
+ check_aligned_anonymous_unfixed_colliding_mmaps();
+ check_aligned_anonymous_fixed_mmaps();
+ check_file_unfixed_mmaps();
+ check_file_fixed_mmaps();
+ check_file_fixed_eof_mmaps();
+ check_file_unfixed_eof_mmaps();
+
+ /* Fails at the moment. */
+ /* check_aligned_anonymous_fixed_mmaps_collide_with_host(); */
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/recompiler/tests/test_path.c b/src/recompiler/tests/test_path.c
new file mode 100644
index 00000000..def7441c
--- /dev/null
+++ b/src/recompiler/tests/test_path.c
@@ -0,0 +1,151 @@
+/* Test path override code */
+#define _GNU_SOURCE
+#include "../path.c"
+#include <stdarg.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+/* Any log message kills the test. */
+void gemu_log(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "FATAL: ");
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
+#define NO_CHANGE(_path) \
+ do { \
+ if (strcmp(path(_path), _path) != 0) return __LINE__; \
+ } while(0)
+
+#define CHANGE_TO(_path, _newpath) \
+ do { \
+ if (strcmp(path(_path), _newpath) != 0) return __LINE__; \
+ } while(0)
+
+static void cleanup(void)
+{
+ unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE2");
+ unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE3");
+ unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE4");
+ unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE5");
+ rmdir("/tmp/qemu-test_path/DIR1/DIR2");
+ rmdir("/tmp/qemu-test_path/DIR1/DIR3");
+ rmdir("/tmp/qemu-test_path/DIR1");
+ rmdir("/tmp/qemu-test_path");
+}
+
+static unsigned int do_test(void)
+{
+ if (mkdir("/tmp/qemu-test_path", 0700) != 0)
+ return __LINE__;
+
+ if (mkdir("/tmp/qemu-test_path/DIR1", 0700) != 0)
+ return __LINE__;
+
+ if (mkdir("/tmp/qemu-test_path/DIR1/DIR2", 0700) != 0)
+ return __LINE__;
+
+ if (mkdir("/tmp/qemu-test_path/DIR1/DIR3", 0700) != 0)
+ return __LINE__;
+
+ if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE", 0600)) != 0)
+ return __LINE__;
+
+ if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE2", 0600)) != 0)
+ return __LINE__;
+
+ if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE3", 0600)) != 0)
+ return __LINE__;
+
+ if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE4", 0600)) != 0)
+ return __LINE__;
+
+ if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE5", 0600)) != 0)
+ return __LINE__;
+
+ init_paths("/tmp/qemu-test_path");
+
+ NO_CHANGE("/tmp");
+ NO_CHANGE("/tmp/");
+ NO_CHANGE("/tmp/qemu-test_path");
+ NO_CHANGE("/tmp/qemu-test_path/");
+ NO_CHANGE("/tmp/qemu-test_path/D");
+ NO_CHANGE("/tmp/qemu-test_path/DI");
+ NO_CHANGE("/tmp/qemu-test_path/DIR");
+ NO_CHANGE("/tmp/qemu-test_path/DIR1");
+ NO_CHANGE("/tmp/qemu-test_path/DIR1/");
+
+ NO_CHANGE("/D");
+ NO_CHANGE("/DI");
+ NO_CHANGE("/DIR");
+ NO_CHANGE("/DIR2");
+ NO_CHANGE("/DIR1.");
+
+ CHANGE_TO("/DIR1", "/tmp/qemu-test_path/DIR1");
+ CHANGE_TO("/DIR1/", "/tmp/qemu-test_path/DIR1");
+
+ NO_CHANGE("/DIR1/D");
+ NO_CHANGE("/DIR1/DI");
+ NO_CHANGE("/DIR1/DIR");
+ NO_CHANGE("/DIR1/DIR1");
+
+ CHANGE_TO("/DIR1/DIR2", "/tmp/qemu-test_path/DIR1/DIR2");
+ CHANGE_TO("/DIR1/DIR2/", "/tmp/qemu-test_path/DIR1/DIR2");
+
+ CHANGE_TO("/DIR1/DIR3", "/tmp/qemu-test_path/DIR1/DIR3");
+ CHANGE_TO("/DIR1/DIR3/", "/tmp/qemu-test_path/DIR1/DIR3");
+
+ NO_CHANGE("/DIR1/DIR2/F");
+ NO_CHANGE("/DIR1/DIR2/FI");
+ NO_CHANGE("/DIR1/DIR2/FIL");
+ NO_CHANGE("/DIR1/DIR2/FIL.");
+
+ CHANGE_TO("/DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/DIR1/DIR2/FILE2", "/tmp/qemu-test_path/DIR1/DIR2/FILE2");
+ CHANGE_TO("/DIR1/DIR2/FILE3", "/tmp/qemu-test_path/DIR1/DIR2/FILE3");
+ CHANGE_TO("/DIR1/DIR2/FILE4", "/tmp/qemu-test_path/DIR1/DIR2/FILE4");
+ CHANGE_TO("/DIR1/DIR2/FILE5", "/tmp/qemu-test_path/DIR1/DIR2/FILE5");
+
+ NO_CHANGE("/DIR1/DIR2/FILE6");
+ NO_CHANGE("/DIR1/DIR2/FILE/X");
+
+ CHANGE_TO("/DIR1/../DIR1", "/tmp/qemu-test_path/DIR1");
+ CHANGE_TO("/DIR1/../DIR1/", "/tmp/qemu-test_path/DIR1");
+ CHANGE_TO("/../DIR1", "/tmp/qemu-test_path/DIR1");
+ CHANGE_TO("/../DIR1/", "/tmp/qemu-test_path/DIR1");
+ CHANGE_TO("/DIR1/DIR2/../DIR2", "/tmp/qemu-test_path/DIR1/DIR2");
+ CHANGE_TO("/DIR1/DIR2/../DIR2/../../DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/DIR1/DIR2/../DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+
+ NO_CHANGE("/DIR1/DIR2/../DIR1");
+ NO_CHANGE("/DIR1/DIR2/../FILE");
+
+ CHANGE_TO("/./DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/././DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/DIR1/./DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/DIR1/././DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/DIR1/DIR2/./FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/DIR1/DIR2/././FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+ CHANGE_TO("/./DIR1/./DIR2/./FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ ret = do_test();
+ cleanup();
+ if (ret) {
+ fprintf(stderr, "test_path: failed on line %i\n", ret);
+ return 1;
+ }
+ return 0;
+}
diff --git a/src/recompiler/tests/testthread.c b/src/recompiler/tests/testthread.c
new file mode 100644
index 00000000..27e4825b
--- /dev/null
+++ b/src/recompiler/tests/testthread.c
@@ -0,0 +1,51 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sys/wait.h>
+#include <sched.h>
+
+void *thread1_func(void *arg)
+{
+ int i;
+ char buf[512];
+
+ for(i=0;i<10;i++) {
+ snprintf(buf, sizeof(buf), "thread1: %d %s\n", i, (char *)arg);
+ write(1, buf, strlen(buf));
+ usleep(100 * 1000);
+ }
+ return NULL;
+}
+
+void *thread2_func(void *arg)
+{
+ int i;
+ char buf[512];
+ for(i=0;i<20;i++) {
+ snprintf(buf, sizeof(buf), "thread2: %d %s\n", i, (char *)arg);
+ write(1, buf, strlen(buf));
+ usleep(150 * 1000);
+ }
+ return NULL;
+}
+
+void test_pthread(void)
+{
+ pthread_t tid1, tid2;
+
+ pthread_create(&tid1, NULL, thread1_func, "hello1");
+ pthread_create(&tid2, NULL, thread2_func, "hello2");
+ pthread_join(tid1, NULL);
+ pthread_join(tid2, NULL);
+ printf("End of pthread test.\n");
+}
+
+int main(int argc, char **argv)
+{
+ test_pthread();
+ return 0;
+}
diff --git a/src/recompiler/translate-all.c b/src/recompiler/translate-all.c
new file mode 100644
index 00000000..b6882559
--- /dev/null
+++ b/src/recompiler/translate-all.c
@@ -0,0 +1,186 @@
+/*
+ * Host code generation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
+ * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
+ * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
+ * a choice of LGPL license versions is made available with the language indicating
+ * that LGPLv2 or any later version may be used, or where a choice of which version
+ * of the LGPL is applied is otherwise unspecified.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "config.h"
+
+#define NO_CPU_IO_DEFS
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+#include "tcg.h"
+#include "qemu-timer.h"
+
+/* code generation context */
+TCGContext tcg_ctx;
+
+uint16_t gen_opc_buf[OPC_BUF_SIZE];
+TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
+
+target_ulong gen_opc_pc[OPC_BUF_SIZE];
+uint16_t gen_opc_icount[OPC_BUF_SIZE];
+uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+
+void cpu_gen_init(void)
+{
+ tcg_context_init(&tcg_ctx);
+ tcg_set_frame(&tcg_ctx, TCG_AREG0, offsetof(CPUState, temp_buf),
+ sizeof(((CPUState *)0)->temp_buf));
+}
+
+/* return non zero if the very first instruction is invalid so that
+ the virtual CPU can trigger an exception.
+
+ '*gen_code_size_ptr' contains the size of the generated code (host
+ code).
+*/
+int cpu_gen_code(CPUState *env, TranslationBlock *tb, int *gen_code_size_ptr)
+{
+ TCGContext *s = &tcg_ctx;
+ uint8_t *gen_code_buf;
+ int gen_code_size;
+#ifdef CONFIG_PROFILER
+ int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+ s->tb_count1++; /* includes aborted translations because of
+ exceptions */
+ ti = profile_getclock();
+#endif
+
+#ifdef VBOX
+ RAWEx_ProfileStart(env, STATS_QEMU_COMPILATION);
+#endif
+
+ tcg_func_start(s);
+
+ gen_intermediate_code(env, tb);
+
+ /* generate machine code */
+ gen_code_buf = tb->tc_ptr;
+ tb->tb_next_offset[0] = 0xffff;
+ tb->tb_next_offset[1] = 0xffff;
+ s->tb_next_offset = tb->tb_next_offset;
+#ifdef USE_DIRECT_JUMP
+ s->tb_jmp_offset = tb->tb_jmp_offset;
+ s->tb_next = NULL;
+#else
+ s->tb_jmp_offset = NULL;
+ s->tb_next = tb->tb_next;
+#endif
+
+#ifdef CONFIG_PROFILER
+ s->tb_count++;
+ s->interm_time += profile_getclock() - ti;
+ s->code_time -= profile_getclock();
+#endif
+ gen_code_size = tcg_gen_code(s, gen_code_buf);
+ *gen_code_size_ptr = gen_code_size;
+#ifdef CONFIG_PROFILER
+ s->code_time += profile_getclock();
+ s->code_in_len += tb->size;
+ s->code_out_len += gen_code_size;
+#endif
+
+#ifdef VBOX
+ RAWEx_ProfileStop(env, STATS_QEMU_COMPILATION);
+#endif
+
+#ifdef DEBUG_DISAS
+ if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+ qemu_log("OUT: [size=%d]\n", *gen_code_size_ptr);
+ log_disas(tb->tc_ptr, *gen_code_size_ptr);
+ qemu_log("\n");
+ qemu_log_flush();
+ }
+#endif
+ return 0;
+}
+
+/* The cpu state corresponding to 'searched_pc' is restored.
+ */
+int cpu_restore_state(TranslationBlock *tb,
+ CPUState *env, uintptr_t searched_pc,
+ void *puc)
+{
+ TCGContext *s = &tcg_ctx;
+ int j;
+ uintptr_t tc_ptr;
+#ifdef CONFIG_PROFILER
+ int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+ ti = profile_getclock();
+#endif
+ tcg_func_start(s);
+
+ gen_intermediate_code_pc(env, tb);
+
+ if (use_icount) {
+ /* Reset the cycle counter to the start of the block. */
+ env->icount_decr.u16.low += tb->icount;
+ /* Clear the IO flag. */
+ env->can_do_io = 0;
+ }
+
+ /* find opc index corresponding to search_pc */
+ tc_ptr = (uintptr_t)tb->tc_ptr;
+ if (searched_pc < tc_ptr)
+ return -1;
+
+ s->tb_next_offset = tb->tb_next_offset;
+#ifdef USE_DIRECT_JUMP
+ s->tb_jmp_offset = tb->tb_jmp_offset;
+ s->tb_next = NULL;
+#else
+ s->tb_jmp_offset = NULL;
+ s->tb_next = tb->tb_next;
+#endif
+ j = tcg_gen_code_search_pc(s, (uint8_t *)tc_ptr, searched_pc - tc_ptr);
+ if (j < 0)
+ return -1;
+ /* now find start of instruction before */
+ while (gen_opc_instr_start[j] == 0)
+ j--;
+ env->icount_decr.u16.low -= gen_opc_icount[j];
+
+ gen_pc_load(env, tb, searched_pc, j, puc);
+
+#ifdef CONFIG_PROFILER
+ s->restore_time += profile_getclock() - ti;
+ s->restore_count++;
+#endif
+ return 0;
+}