diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 03:01:46 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 03:01:46 +0000 |
commit | f8fe689a81f906d1b91bb3220acde2a4ecb14c5b (patch) | |
tree | 26484e9d7e2c67806c2d1760196ff01aaa858e8c /src/recompiler | |
parent | Initial commit. (diff) | |
download | virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.tar.xz virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.zip |
Adding upstream version 6.0.4-dfsg.upstream/6.0.4-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
100 files changed, 70462 insertions, 0 deletions
diff --git a/src/recompiler/.scm-settings b/src/recompiler/.scm-settings new file mode 100644 index 00000000..946bad9f --- /dev/null +++ b/src/recompiler/.scm-settings @@ -0,0 +1,34 @@ +# $Id: .scm-settings $ +## @file +# Source code massager settings for the recompiler. +# + +# +# Copyright (C) 2017-2019 Oracle Corporation +# +# This file is part of VirtualBox Open Source Edition (OSE), as +# available from http://www.virtualbox.org. This file is free software; +# you can redistribute it and/or modify it under the terms of the GNU +# General Public License (GPL) as published by the Free Software +# Foundation, in version 2 as it comes in the "COPYING" file of the +# VirtualBox OSE distribution. VirtualBox OSE is distributed in the +# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +# + + +# This is external stuff. +--external-copyright --no-convert-tabs +/.scm-settings: --no-external-copyright --convert-tabs +/Makefile.kmk: --no-external-copyright --convert-tabs +/VBox*: --no-external-copyright --convert-tabs +/Sun/*: --no-external-copyright --convert-tabs +/Sun/e_*.S: --external-copyright --no-convert-tabs + +*.com: --treat-as binary + +*.h: --no-fix-header-guards + +/tests/linux-test.c: --lgpl-disclaimer +/tests/test-i386.c: --lgpl-disclaimer +/tests/test-mmap.c: --lgpl-disclaimer + diff --git a/src/recompiler/COPYING.LIB b/src/recompiler/COPYING.LIB new file mode 100644 index 00000000..bfd28e23 --- /dev/null +++ b/src/recompiler/COPYING.LIB @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/src/recompiler/Makefile.kmk b/src/recompiler/Makefile.kmk new file mode 100644 index 00000000..28c8ea95 --- /dev/null +++ b/src/recompiler/Makefile.kmk @@ -0,0 +1,340 @@ +# $Id: Makefile.kmk $ +## @file +# The Recompiler Sub-Makefile. +# + +# +# Copyright (C) 2006-2019 Oracle Corporation +# +# This file is part of VirtualBox Open Source Edition (OSE), as +# available from http://www.virtualbox.org. This file is free software; +# you can redistribute it and/or modify it under the terms of the GNU +# General Public License (GPL) as published by the Free Software +# Foundation, in version 2 as it comes in the "COPYING" file of the +# VirtualBox OSE distribution. VirtualBox OSE is distributed in the +# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +# + + +SUB_DEPTH = ../.. +include $(KBUILD_PATH)/subheader.kmk + +# +# Globals +# +VBOX_PATH_RECOMPILER_SRC := $(PATH_SUB_CURRENT) +# Workaround for darwin hell. +ifeq ($(KBUILD_TARGET),darwin) + VBOX_WITHOUT_REM_LDR_CYCLE := 1 +endif +ifeq ($(KBUILD_TARGET).$(KBUILD_TARGET_ARCH),win.amd64) + VBOX_USE_MINGWW64 = 1 +endif + + +# +# The primary REM module definition. +# +# This is extended by one of the VBoxREM* modules below. Currently, this +# isn't done by inheritance because of some obscure bug wrt inheriting from +# unused targets that I'm not going to fix now. +# +ifneq ($(KBUILD_TARGET),win) + VBoxRemPrimary_TEMPLATE = VBOXR3NP + # workaround the regparm bug in gcc <= 3.3 + VBoxRemPrimary_DEFS = $(if $(VBOX_GCC_BUGGY_REGPARM),GCC_WITH_BUGGY_REGPARM,) +else + VBoxRemPrimary_TEMPLATE = DUMMY + VBoxRemPrimary_TOOL.win.x86 = MINGW32 + ifdef VBOX_USE_MINGWW64 + VBoxRemPrimary_TOOL.win.amd64 = MINGWW64 + else + VBoxRemPrimary_TOOL.win.amd64 = XGCCAMD64LINUX + endif + VBoxRemPrimary_SDKS.win.x86 = W32API + VBoxRemPrimary_ASFLAGS = -x assembler-with-cpp + VBoxRemPrimary_CFLAGS = -Wall -g -fno-omit-frame-pointer -fno-strict-aliasing -Wno-shadow + VBoxRemPrimary_CFLAGS.debug = -O0 + VBoxRemPrimary_CFLAGS.release += -fno-gcse -O2 + VBoxRemPrimary_CFLAGS.profile = $(VBoxRemPrimary_CFLAGS.release) + VBoxRemPrimary_DEFS += IN_RING3 $(ARCH_BITS_DEFS) + # Workaround the regparm bug in gcc <= 3.3. + VBoxRemPrimary_DEFS.win.x86 += GCC_WITH_BUGGY_REGPARM + # Missing fpclassify. Is there a better define or flag for this? + VBoxRemPrimary_DEFS.solaris += __C99FEATURES__ +endif # win +VBoxRemPrimary_DEFS += IN_REM_R3 REM_INCLUDE_CPU_H NEED_CPU_H +#VBoxRemPrimary_DEFS += REM_PHYS_ADDR_IN_TLB +#VBoxRemPrimary_DEFS += DEBUG_ALL_LOGGING DEBUG_DISAS DEBUG_PCALL CONFIG_DEBUG_EXEC DEBUG_FLUSH DEBUG_IOPORT DEBUG_SIGNAL DEBUG_TLB_CHECK DEBUG_TB_INVALIDATE DEBUG_TLB # Enables huge amounts of debug logging. +#VBoxRemPrimary_DEFS += DEBUG_DISAS DEBUG_PCALL CONFIG_DEBUG_EXEC DEBUG_FLUSH DEBUG_IOPORT DEBUG_SIGNAL DEBUG_TLB_CHECK DEBUG_TB_INVALIDATE DEBUG_TLB # Enables huge amounts of debug logging. +ifdef VBOX_WITH_RAW_MODE + VBoxRemPrimary_DEFS += VBOX_WITH_RAW_MODE +endif +ifdef VBOX_WITH_RAW_RING1 + VBoxRemPrimary_DEFS += VBOX_WITH_RAW_RING1 +endif +VBoxRemPrimary_DEFS.linux = _GNU_SOURCE +ifdef VBOX_SOLARIS_10 + VBoxRemPrimary_DEFS.solaris = CONFIG_SOLARIS_VERSION=10 +else + VBoxRemPrimary_DEFS.solaris = CONFIG_SOLARIS_VERSION=11 +endif +VBoxRemPrimary_DEFS.freebsd += _BSD +VBoxRemPrimary_DEFS.amd64 += __x86_64__ +VBoxRemPrimary_DEFS.x86 += __i386__ + +VBoxRemPrimary_INCS = \ + Sun \ + target-i386 \ + tcg \ + fpu \ + $(VBoxRemPrimary_0_OUTDIR) \ + $(PATH_ROOT)/src/VBox/VMM/include \ + tcg/i386 \ + . +ifn1of ($(VBoxRemPrimary_DEFS),DEBUG_TMP_LOGGING) + VBoxRemPrimary_DEFS += LOG_USE_C99 + VBoxRemPrimary_INCS <= \ + Sun/crt +endif + +VBoxRemPrimary_SOURCES = \ + VBoxRecompiler.c \ + cpu-exec.c \ + exec.c \ + translate-all.c \ + host-utils.c \ + cutils.c \ + tcg-runtime.c \ + tcg/tcg.c \ + tcg/tcg-dyngen.c \ + fpu/softfloat-native.c \ + target-i386/op_helper.c \ + target-i386/helper.c \ + target-i386/translate.c +VBoxRemPrimary_SOURCES.debug += \ + Sun/testmath.c +VBoxRemPrimary_SOURCES.win = $(VBoxREMImp_0_OUTDIR)/VBoxREMRes.o +VBoxRemPrimary_SOURCES.win.x86 = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def +ifdef VBOX_USE_MINGWW64 + if 0 # exporting all helps when windbg pops up on crashes + VBoxRemPrimary_SOURCES.win.amd64 = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def + else + VBoxRemPrimary_LDFLAGS.win.amd64 = --export-all + endif +endif + +ifndef VBOX_USE_MINGWW64 +VBoxRemPrimary_LIBS = \ + $(LIB_VMM) \ + $(LIB_RUNTIME) +else +VBoxRemPrimary_LIBS = \ + $(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.a \ + $(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.a +VBoxRemPrimary_CLEAN = \ + $(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.a \ + $(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.def \ + $(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.a \ + $(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.def +endif + +VBoxRemPrimary_LDFLAGS.solaris = -mimpure-text +if defined(VBOX_WITH_HARDENING) && "$(KBUILD_TARGET)" == "win" + VBoxRemPrimary_POST_CMDS = \ + $(VBOX_VCC_EDITBIN) /LargeAddressAware /DynamicBase /NxCompat /Release /IntegrityCheck \ + /Version:$(VBOX_VERSION_MAJOR)0$(VBOX_VERSION_MINOR).$(VBOX_VERSION_BUILD) "$(out)" \ + $$(NLTAB)$(VBOX_SIGN_IMAGE_CMDS) +else + VBoxRemPrimary_POST_CMDS = $(VBOX_SIGN_IMAGE_CMDS) +endif + + +if "$(KBUILD_TARGET).$(KBUILD_TARGET_ARCH)" == "win.amd64" && !defined(VBOX_USE_MINGWW64) + # + # VBoxREM2/VBoxRemPrimary - Currently only used by 64-bit Windows. + # (e_powl-xxx.S doesn't fit in IPRT because it requires GAS and is LGPL.) + # + SYSMODS += VBoxRemPrimary + VBoxRemPrimary_TEMPLATE = VBOXNOCRTGAS + VBoxRemPrimary_NAME = VBoxREM2 + VBoxRemPrimary_DEFS += LOG_USE_C99 $(ARCH_BITS_DEFS) + VBoxRemPrimary_SOURCES += \ + Sun/e_powl-$(KBUILD_TARGET_ARCH).S + VBoxRemPrimary_INCS += \ + Sun/crt + VBoxRemPrimary_SYSSUFF = .rel + VBoxRemPrimary_LIBS = \ + $(PATH_STAGE_LIB)/RuntimeR3NoCRTGCC$(VBOX_SUFF_LIB) + VBoxRemPrimary_POST_CMDS = $(NO_SUCH_VARIABLE) + VBOX_REM_WRAPPER = 2 + +else if "$(KBUILD_TARGET_ARCH)" == "x86" && defined(VBOX_WITH_64_BITS_GUESTS) + # + # For 32-bit targets when enabled 64-bit guests we build 2 REM DLLs: + # with 64-bit support (slow and buggy at the moment) VBOXREM64 + # only 32-bit support (faster, stable, but not suitable for 64-bit guests) VBOXREM32 + # During the runtime, we load appropriate library from VBOXREM, depending on guest settings. + # 64-bit targets have 64-bit enabled REM by default, so is not part of this mess + # + + # + # VBoxREM32/VBoxRemPrimary + # + DLLS += VBoxRemPrimary + VBoxRemPrimary_NAME = VBoxREM32 + VBoxRemPrimary_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM32.dylib + VBOX_REM_WRAPPER = 32 + + # + # VBoxREM64 + # + DLLS += VBoxREM64 + VBoxREM64_EXTENDS = VBoxRemPrimary + VBoxREM64_EXTENDS_BY = appending + VBoxREM64_NAME = VBoxREM64 + VBoxREM64_DEFS = VBOX_ENABLE_VBOXREM64 + VBoxREM64_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM64.dylib + +else + # + # VBoxREM/VBoxRemPrimary - Normal. + # + DLLS += VBoxRemPrimary + VBoxRemPrimary_NAME = VBoxREM + VBoxRemPrimary_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM3.dylib + + ifdef VBOX_USE_MINGWW64 + # GNU ld (rubenvb-4.5.4) 2.22.52.20120716 doesn't fix up rip relative + # addressing in the import libraries generated by microsoft link.exe. So, we + # have to regenerate these. + # Note! The chdir to the output directory is because dlltool writes temporary files to the current directory. + $$(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.a \ + $$(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.a : $$(VBoxRemPrimary_0_OUTDIR)/$$(notdir $$(basename $$@)).def + $(REDIRECT) -C "$(dir $@)" -- $(TOOL_MINGWW64_DLLTOOL) \ + --output-lib "$@" \ + --input-def "$<" \ + --dllname "$(patsubst %Imp.a,%.dll,$(notdir $@))" + + $$(VBoxRemPrimary_0_OUTDIR)/VBoxVMMImp.def \ + $$(VBoxRemPrimary_0_OUTDIR)/VBoxRTImp.def : \ + $(PATH_STAGE_BIN)/$$(patsubst %Imp.def,%.dll,$$(notdir $$@)) \ + | $$(dir $$@) + $(APPEND) -nt $@ "LIBRARY $(notdir $<)" "EXPORTS" + $(TOOL_$(VBOX_VCC_TOOL)_DUMPBIN) /EXPORTS "$<" \ + | $(SED) -e '/ = /!d' \ + -e 's/^.* \([^ ][^ ]*\) = .*$(DOLLAR)/ \"\1\"/' \ + --append $@ + endif # VBOX_USE_MINGWW64 + +endif + + +ifdef VBOX_REM_WRAPPER + # + # VBoxREM - Wrapper for loading VBoxREM2, VBoxREM32 or VBoxREM64. + # + DLLS += VBoxREMWrapper + VBoxREMWrapper_TEMPLATE = VBoxR3DllWarnNoPic + VBoxREMWrapper_NAME = VBoxREM + VBoxREMWrapper_DEFS = IN_REM_R3 + if "$(KBUILD_TARGET_ARCH)" == "x86" && defined(VBOX_WITH_64_BITS_GUESTS) + VBoxREMWrapper_DEFS += VBOX_USE_BITNESS_SELECTOR + endif + ifdef VBOX_WITHOUT_REM_LDR_CYCLE + VBoxREMWrapper_DEFS += VBOX_WITHOUT_REM_LDR_CYCLE + endif + VBoxREMWrapper_SOURCES = \ + VBoxREMWrapper.cpp + VBoxREMWrapper_SOURCES.win = VBoxREM.rc + if "$(KBUILD_TARGET).$(KBUILD_TARGET_ARCH)" == "win.amd64" && !defined(VBOX_USE_MINGWW64) + VBoxREMWrapper_SOURCES += \ + VBoxREMWrapperA.asm + endif + VBoxREMWrapper_LIBS = \ + $(LIB_RUNTIME) + ifndef VBOX_WITHOUT_REM_LDR_CYCLE + VBoxREMWrapper_LIBS += \ + $(LIB_VMM) + VBoxREMWrapper_LIBS.darwin += \ + $(TARGET_VBoxREMImp) + endif + VBoxREMWrapper_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM.dylib +endif + + +# +# The VBoxREM import library. +# +# This is a HACK to get around (a) the cyclic dependency between VBoxVMM and +# VBoxREM during linking and (b) the recursive build ordering which means VBoxREM +# won't be built until after all the other DLLs. +# +IMPORT_LIBS += VBoxREMImp +VBoxREMImp_TEMPLATE = VBoxR3Dll + ifn1of ($(KBUILD_TARGET), os2 win) +VBoxREMImp_NAME = VBoxREM + endif +VBoxREMImp_INST = $(INST_LIB) +VBoxREMImp_SOURCES.win = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def +VBoxREMImp_CLEAN.win = $(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def +VBoxREMImp_SOURCES.os2 = $(VBoxREMImp_0_OUTDIR)/VBoxREMOS2.def +VBoxREMImp_CLEAN.os2 = $(VBoxREMImp_0_OUTDIR)/VBoxREMOS2.def + ifn1of ($(KBUILD_TARGET), os2 win) +VBoxREMImp_SOURCES = $(VBoxREMImp_0_OUTDIR)/VBoxREMImp.c +VBoxREMImp_CLEAN = $(VBoxREMImp_0_OUTDIR)/VBoxREMImp.c + endif + ifn1of ($(KBUILD_TARGET), darwin os2 win) +VBoxREMImp_SONAME = VBoxREM$(SUFF_DLL) + endif +ifdef VBOX_WITHOUT_REM_LDR_CYCLE + VBoxREMImp_LDFLAGS.darwin = -install_name $(VBOX_DYLD_EXECUTABLE_PATH)/VBoxREM.dylib +else + VBoxREMImp_LDFLAGS.darwin = -install_name $(subst @rpath,@executable_path,$(VBOX_DYLD_EXECUTABLE_PATH))/VBoxREM.dylib +endif +VBoxREMImp_LDFLAGS.l4 = -T$(L4_LIBDIR)/../main_rel.ld -nostdlib + +$$(VBoxREMImp_0_OUTDIR)/VBoxREMImp.c: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(VBOX_PATH_RECOMPILER_SRC)/Sun/deftoimp.sed $(MAKEFILE_CURRENT) | $$(dir $$@) + $(call MSG_GENERATE,,$@) + $(QUIET)$(APPEND) -t $@ '#ifdef VBOX_HAVE_VISIBILITY_HIDDEN' + $(QUIET)$(APPEND) $@ '# define EXPORT __attribute__((visibility("default")))' + $(QUIET)$(APPEND) $@ '#else' + $(QUIET)$(APPEND) $@ '# define EXPORT' + $(QUIET)$(APPEND) $@ '#endif' + $(QUIET)$(APPEND) $@ '' + $(QUIET)$(SED) -f $(VBOX_PATH_RECOMPILER_SRC)/Sun/deftoimp.sed --append $@ $< + +$$(VBoxREMImp_0_OUTDIR)/VBoxREMOS2.def: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(MAKEFILE_CURRENT) | $$(dir $$@) + $(SED) \ + -e 's/^[ \t][ \t]*REMR3/ _REMR3/' \ + -e 's/\.[Dd][Ll][Ll]//' \ + -e 's/^LIBRARY .*/LIBRARY VBoxREM INITINSTANCE TERMINSTANCE\nDATA MULTIPLE\n/' \ + --output $@ \ + $< + +$$(VBoxREMImp_0_OUTDIR)/VBoxREMWin.def: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.def $(MAKEFILE_CURRENT) | $$(dir $$@) + $(CP) -f $< $@ + +$$(VBoxREMImp_0_OUTDIR)/VBoxREMRes.o: $(VBOX_PATH_RECOMPILER_SRC)/VBoxREM.rc $(MAKEFILE_CURRENT) $(VBOX_VERSION_MK) | $$(dir $$@) + $(call MSG_GENERATE,,$@) + $(QUIET)$(REDIRECT) -E 'COMSPEC=$(VBOX_GOOD_COMSPEC_BS)' \ + -- $(TOOL_$(VBoxRemPrimary_TOOL.win.$(KBUILD_TARGET_ARCH))_PREFIX)windres \ + $(addprefix -I,$(INCS) $(PATH_SDK_$(VBOX_WINPSDK)_INC) $(PATH_TOOL_$(VBOX_VCC_TOOL)_INC)) \ + -DVBOX_SVN_REV=$(VBOX_SVN_REV) \ + -DVBOX_SVN_REV_MOD_5K=$(expr $(VBOX_SVN_REV) % 50000) \ + $< $@ + +# +# The math testcase as a standalone program for testing and debugging purposes. +# +## @todo This is a bit messy because of MINGW32. +testmath_ASFLAGS.amd64 = -m amd64 +testmath_CFLAGS = -Wall -g +testmath_CFLAGS.release = -O3 +testmath_LDFLAGS = -g +testmath_DEFS = MATHTEST_STANDALONE +testmath_SOURCES = Sun/testmath.c + + +include $(FILE_KBUILD_SUB_FOOTER) + diff --git a/src/recompiler/README.vbox b/src/recompiler/README.vbox new file mode 100644 index 00000000..d478b71e --- /dev/null +++ b/src/recompiler/README.vbox @@ -0,0 +1,6 @@ +QEMU is based on v0.13.0 (6ed912999d6ef636a5be5ccb266d7d3c0f0310b4) +from git://git.savannah.nongnu.org/qemu.git. + +Modified parts related to calls of external functions, to allow loading +recompiler library as regular shared object in high memory on 64-bit systems. + diff --git a/src/recompiler/Sun/Makefile.kup b/src/recompiler/Sun/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/Sun/Makefile.kup diff --git a/src/recompiler/Sun/config-host.h b/src/recompiler/Sun/config-host.h new file mode 100644 index 00000000..a18a147e --- /dev/null +++ b/src/recompiler/Sun/config-host.h @@ -0,0 +1,46 @@ +/* $Id: config-host.h $ */ +/** @file + * Sun host config - maintained by hand + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +#if defined(RT_ARCH_AMD64) +# define HOST_X86_64 1 +# define HOST_LONG_BITS 64 +#else +# define HOST_I386 1 +# define HOST_LONG_BITS 32 +#endif + +#ifndef IPRT_NO_CRT +# ifdef RT_OS_WINDOWS +# define CONFIG_WIN32 1 +# elif defined(RT_OS_OS2) +# define CONFIG_OS2 +# elif defined(RT_OS_DARWIN) +# define CONFIG_DARWIN +# elif defined(RT_OS_FREEBSD) || defined(RT_OS_NETBSD) || defined(RT_OS_OPENBSD) +# define HAVE_MACHINE_BSWAP_H +/*# define CONFIG_BSD*/ +# elif defined(RT_OS_SOLARIS) +# define CONFIG_SOLARIS +# else +# define HAVE_BYTESWAP_H 1 +# endif +#endif +#define QEMU_VERSION "0.13.0" +#define CONFIG_UNAME_RELEASE "" +#define CONFIG_QEMU_SHAREDIR "." + diff --git a/src/recompiler/Sun/config.h b/src/recompiler/Sun/config.h new file mode 100644 index 00000000..173b660d --- /dev/null +++ b/src/recompiler/Sun/config.h @@ -0,0 +1,44 @@ +/* $Id: config.h $ */ +/** @file + * Sun config - Maintained by hand + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#include "config-host.h" +#define CONFIG_QEMU_PREFIX "/usr/gnemul/qemu-i386" +#define TARGET_ARCH "i386" +#define TARGET_I386 1 +#define CONFIG_SOFTMMU 1 +#define TARGET_PHYS_ADDR_BITS 64 + +#ifdef VBOX_WITH_64_BITS_GUESTS +# if defined(__x86_64__) || defined (VBOX_ENABLE_VBOXREM64) +# define TARGET_X86_64 +# endif +#endif + +/* Uncomment to see all phys memory accesses */ +/* #define VBOX_DEBUG_PHYS */ +/* Uncomment to see emulated CPU state changes */ +/* #define VBOX_DUMP_STATE */ +/* Uncomment to see QEMU logging, goes to /tmp/vbox-qemu.log */ +/* #define DEBUG_ALL_LOGGING */ +/* Uncomment to see generated code */ +/* #define DEBUG_DISAS */ + +#if 0 /*defined(RT_ARCH_AMD64) && defined(VBOX_STRICT)*/ +# define VBOX_CHECK_ADDR(ptr) do { if ((uintptr_t)(ptr) >= _4G) __asm__("int3"); } while (0) +#else +# define VBOX_CHECK_ADDR(ptr) do { } while (0) +#endif diff --git a/src/recompiler/Sun/crt/stdio.h b/src/recompiler/Sun/crt/stdio.h new file mode 100644 index 00000000..5c73fb1a --- /dev/null +++ b/src/recompiler/Sun/crt/stdio.h @@ -0,0 +1,73 @@ +/* $Id: stdio.h $ */ +/** @file + * Our minimal stdio + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef ___Sun_stdio_h +#define ___Sun_stdio_h + +#ifndef LOG_GROUP +# define UNDO_LOG_GROUP +#endif + +#include <VBox/log.h> + +#ifdef UNDO_LOG_GROUP +# undef UNDO_LOG_GROUP +# undef LOG_GROUP +#endif + +#ifndef LOG_USE_C99 +# error "LOG_USE_C99 isn't defined." +#endif + +RT_C_DECLS_BEGIN + +typedef struct FILE FILE; + +#if defined(RT_OS_SOLARIS) +/** @todo Check solaris' floatingpoint.h as to why we do this */ +# define _FILEDEFED +#endif + +DECLINLINE(int) fprintf(FILE *ignored, const char *pszFormat, ...) +{ +/** @todo We don't support wrapping calls taking a va_list yet. It's not worth it yet, + * since there are only a couple of cases where this fprintf implementation is used. + * (The macro below will deal with the majority of the fprintf calls.) */ +#if 0 /*def LOG_ENABLED*/ + if (LogIsItEnabled(NULL, 0, LOG_GROUP_REM_PRINTF)) + { + va_list va; + va_start(va, pszFormat); + RTLogLoggerExV(NULL, 0, LOG_GROUP_REM_PRINTF, pszFormat, va); + va_end(va); + } +#endif + return 0; +} + +#define fflush(file) RTLogFlush(NULL) +#define printf(...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__)) +#define fprintf(logfile, ...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__)) + +#ifdef DEBUG_TMP_LOGGING +# error "DEBUG_TMP_LOGGING doesn't work with the Sun/crt/stdio.h wrapper." +#endif + +RT_C_DECLS_END + +#endif + diff --git a/src/recompiler/Sun/deftoimp.sed b/src/recompiler/Sun/deftoimp.sed new file mode 100644 index 00000000..3e35efa1 --- /dev/null +++ b/src/recompiler/Sun/deftoimp.sed @@ -0,0 +1,37 @@ +# $Id: deftoimp.sed $ +## @file +# SED script for generating a dummy .so from a windows .def file. +# + +# +# Copyright (C) 2006-2019 Oracle Corporation +# +# This file is part of VirtualBox Open Source Edition (OSE), as +# available from http://www.virtualbox.org. This file is free software; +# you can redistribute it and/or modify it under the terms of the GNU +# General Public License (GPL) as published by the Free Software +# Foundation, in version 2 as it comes in the "COPYING" file of the +# VirtualBox OSE distribution. VirtualBox OSE is distributed in the +# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +# + +s/;.*$//g +s/^[[:space:]][[:space:]]*//g +s/[[:space:]][[:space:]]*$//g +/^$/d + +# Handle text after EXPORTS +/EXPORTS/,//{ +s/^EXPORTS$// +/^$/b end + +s/^\(.*\)$/EXPORT\nvoid \1(void);\nvoid \1(void){}/ +b end +} +d +b end + + +# next expression +:end + diff --git a/src/recompiler/Sun/e_powl-amd64.S b/src/recompiler/Sun/e_powl-amd64.S new file mode 100644 index 00000000..5e0353e7 --- /dev/null +++ b/src/recompiler/Sun/e_powl-amd64.S @@ -0,0 +1,371 @@ +/* ix87 specific implementation of pow function. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +/*#include <machine/asm.h>*/ +#include <iprt/cdefs.h> + +#define ALIGNARG(log2) 1<<log2 +#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg; +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name; +#define ASM_GLOBAL_DIRECTIVE .global + +#define C_LABEL(name) name: +#define C_SYMBOL_NAME(name) name + +#define ENTRY(name) \ + ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \ + ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \ + .align ALIGNARG(4); \ + C_LABEL(name) + +#undef END +#define END(name) \ + ASM_SIZE_DIRECTIVE(name) + + +#ifdef __ELF__ + .section .rodata +#else + .text +#endif + + .align ALIGNARG(4) + ASM_TYPE_DIRECTIVE(infinity,@object) +inf_zero: +infinity: + .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f + ASM_SIZE_DIRECTIVE(infinity) + ASM_TYPE_DIRECTIVE(zero,@object) +zero: .double 0.0 + ASM_SIZE_DIRECTIVE(zero) + ASM_TYPE_DIRECTIVE(minf_mzero,@object) +minf_mzero: +minfinity: + .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff +mzero: + .byte 0, 0, 0, 0, 0, 0, 0, 0x80 + ASM_SIZE_DIRECTIVE(minf_mzero) + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: + .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) + +//#ifdef PIC +//#define MO(op) op##(%rip) +//#else +#define MO(op) op +//#endif + + .text +/*ENTRY(__ieee754_powl)*/ +ENTRY(RT_NOCRT(powl)) + + fldt 24(%rsp) // y + fxam + + + fnstsw + movb %ah, %dl + andb $0x45, %ah + cmpb $0x40, %ah // is y == 0 ? + je 11f + + cmpb $0x05, %ah // is y == ±inf ? + je 12f + + cmpb $0x01, %ah // is y == NaN ? + je 30f + + fldt 8(%rsp) // x : y + + fxam + fnstsw + movb %ah, %dh + andb $0x45, %ah + cmpb $0x40, %ah + je 20f // x is ±0 + + cmpb $0x05, %ah + je 15f // x is ±inf + + fxch // y : x + + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fldl MO(p63) // 1L<<63 : y : x + fld %st(1) // y : 1L<<63 : y : x + fabs // |y| : 1L<<63 : y : x + fcomip %st(1), %st // 1L<<63 : y : x + fstp %st(0) // y : x + jnc 2f + + /* First see whether `y' is a natural number. In this case we + can use a more precise algorithm. */ + fld %st // y : y : x + fistpll -8(%rsp) // y : x + fildll -8(%rsp) // int(y) : y : x + fucomip %st(1),%st // y : x + jne 2f + + /* OK, we have an integer value for y. */ + mov -8(%rsp),%eax + mov -4(%rsp),%edx + orl $0, %edx + fstp %st(0) // x + jns 4f // y >= 0, jump + fdivrl MO(one) // 1/x (now referred to as x) + negl %eax + adcl $0, %edx + negl %edx +4: fldl MO(one) // 1 : x + fxch + +6: shrdl $1, %edx, %eax + jnc 5f + fxch + fmul %st(1) // x : ST*x + fxch +5: fmul %st(0), %st // x*x : ST*x + shrl $1, %edx + movl %eax, %ecx + orl %edx, %ecx + jnz 6b + fstp %st(0) // ST*x + ret + + /* y is ±NAN */ +30: fldt 8(%rsp) // x : y + fldl MO(one) // 1.0 : x : y + fucomip %st(1),%st // x : y + je 31f + fxch // y : x +31: fstp %st(1) + ret + + .align ALIGNARG(4) +2: /* y is a real number. */ + fxch // x : y + fldl MO(one) // 1.0 : x : y + fld %st(1) // x : 1.0 : x : y + fsub %st(1) // x-1 : 1.0 : x : y + fabs // |x-1| : 1.0 : x : y + fcompl MO(limit) // 1.0 : x : y + fnstsw + fxch // x : 1.0 : y + test $4500,%eax + jz 7f + fsub %st(1) // x-1 : 1.0 : y + fyl2xp1 // log2(x) : y + jmp 8f + +7: fyl2x // log2(x) : y +8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f + fst %st(1) // y*log2(x) : y*log2(x) + frndint // int(y*log2(x)) : y*log2(x) + fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) + fxch // fract(y*log2(x)) : int(y*log2(x)) + f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) + faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) + fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) + fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) + ret + +28: fstp %st(1) // y*log2(x) + fldl MO(one) // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + fstp %st(1) // 2^(y*log2(x)) + ret + + // pow(x,±0) = 1 + .align ALIGNARG(4) +11: fstp %st(0) // pop y + fldl MO(one) + ret + + // y == ±inf + .align ALIGNARG(4) +12: fstp %st(0) // pop y + fldt 8(%rsp) // x + fabs + fcompl MO(one) // < 1, == 1, or > 1 + fnstsw + andb $0x45, %ah + cmpb $0x45, %ah + je 13f // jump if x is NaN + + cmpb $0x40, %ah + je 14f // jump if |x| == 1 + + shlb $1, %ah + xorb %ah, %dl + andl $2, %edx +#ifdef PIC + lea inf_zero(%rip),%rcx + fldl (%rcx, %rdx, 4) +#else + fldl inf_zero(,%rdx, 4) +#endif + ret + + .align ALIGNARG(4) +14: fldl MO(one) + ret + + .align ALIGNARG(4) +13: fldt 8(%rsp) // load x == NaN + ret + + .align ALIGNARG(4) + // x is ±inf +15: fstp %st(0) // y + testb $2, %dh + jz 16f // jump if x == +inf + + // We must find out whether y is an odd integer. + fld %st // y : y + fistpll -8(%rsp) // y + fildll -8(%rsp) // int(y) : y + fucomip %st(1),%st + ffreep %st // <empty> + jne 17f + + // OK, the value is an integer, but is it odd? + mov -8(%rsp), %eax + mov -4(%rsp), %edx + andb $1, %al + jz 18f // jump if not odd + // It's an odd integer. + shrl $31, %edx +#ifdef PIC + lea minf_mzero(%rip),%rcx + fldl (%rcx, %rdx, 8) +#else + fldl minf_mzero(,%rdx, 8) +#endif + ret + + .align ALIGNARG(4) +16: fcompl MO(zero) + fnstsw + shrl $5, %eax + andl $8, %eax +#ifdef PIC + lea inf_zero(%rip),%rcx + fldl (%rcx, %rax, 1) +#else + fldl inf_zero(,%rax, 1) +#endif + ret + + .align ALIGNARG(4) +17: shll $30, %edx // sign bit for y in right position +18: shrl $31, %edx +#ifdef PIC + lea inf_zero(%rip),%rcx + fldl (%rcx, %rdx, 8) +#else + fldl inf_zero(,%rdx, 8) +#endif + ret + + .align ALIGNARG(4) + // x is ±0 +20: fstp %st(0) // y + testb $2, %dl + jz 21f // y > 0 + + // x is ±0 and y is < 0. We must find out whether y is an odd integer. + testb $2, %dh + jz 25f + + fld %st // y : y + fistpll -8(%rsp) // y + fildll -8(%rsp) // int(y) : y + fucomip %st(1),%st + ffreep %st // <empty> + jne 26f + + // OK, the value is an integer, but is it odd? + mov -8(%rsp),%eax + mov -4(%rsp),%edx + andb $1, %al + jz 27f // jump if not odd + // It's an odd integer. + // Raise divide-by-zero exception and get minus infinity value. + fldl MO(one) + fdivl MO(zero) + fchs + ret + +25: fstp %st(0) +26: +27: // Raise divide-by-zero exception and get infinity value. + fldl MO(one) + fdivl MO(zero) + ret + + .align ALIGNARG(4) + // x is ±0 and y is > 0. We must find out whether y is an odd integer. +21: testb $2, %dh + jz 22f + + fld %st // y : y + fistpll -8(%rsp) // y + fildll -8(%rsp) // int(y) : y + fucomip %st(1),%st + ffreep %st // <empty> + jne 23f + + // OK, the value is an integer, but is it odd? + mov -8(%rsp),%eax + mov -4(%rsp),%edx + andb $1, %al + jz 24f // jump if not odd + // It's an odd integer. + fldl MO(mzero) + ret + +22: fstp %st(0) +23: +24: fldl MO(zero) + ret + +/*END(__ieee754_powl)*/ +END(RT_NOCRT(powl)) + diff --git a/src/recompiler/Sun/e_powl-x86.S b/src/recompiler/Sun/e_powl-x86.S new file mode 100644 index 00000000..cbc99256 --- /dev/null +++ b/src/recompiler/Sun/e_powl-x86.S @@ -0,0 +1,413 @@ +/* ix87 specific implementation of pow function. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004, 2005 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +/*#include <machine/asm.h>*/ +#include <iprt/cdefs.h> + +#ifdef __MINGW32__ +# define ASM_TYPE_DIRECTIVE(name,typearg) +# define ASM_SIZE_DIRECTIVE(name) +# define cfi_adjust_cfa_offset(a) +# define C_LABEL(name) _ ## name: +# define C_SYMBOL_NAME(name) _ ## name +# define ASM_GLOBAL_DIRECTIVE .global +# define ALIGNARG(log2) 1<<log2 +#elif __APPLE__ +# define ASM_TYPE_DIRECTIVE(name,typearg) +# define ASM_SIZE_DIRECTIVE(name) +# define cfi_adjust_cfa_offset(a) +# define C_LABEL(name) _ ## name: +# define C_SYMBOL_NAME(name) _ ## name +# define ASM_GLOBAL_DIRECTIVE .globl +# define ALIGNARG(log2) log2 +#else +# define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg; +# define ASM_SIZE_DIRECTIVE(name) .size name,.-name; +# define C_LABEL(name) name: +# define C_SYMBOL_NAME(name) name +# /* figure this one out. */ +# define cfi_adjust_cfa_offset(a) +# define ASM_GLOBAL_DIRECTIVE .global +# define ALIGNARG(log2) 1<<log2 +#endif + +#define ENTRY(name) \ + ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \ + ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \ + .align ALIGNARG(4); \ + C_LABEL(name) + +#undef END +#define END(name) \ + ASM_SIZE_DIRECTIVE(name) + +#ifdef __ELF__ + .section .rodata +#else + .text +#endif + + .align ALIGNARG(4) + ASM_TYPE_DIRECTIVE(infinity,@object) +inf_zero: +infinity: + .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f + ASM_SIZE_DIRECTIVE(infinity) + ASM_TYPE_DIRECTIVE(zero,@object) +zero: .double 0.0 + ASM_SIZE_DIRECTIVE(zero) + ASM_TYPE_DIRECTIVE(minf_mzero,@object) +minf_mzero: +minfinity: + .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff +mzero: + .byte 0, 0, 0, 0, 0, 0, 0, 0x80 + ASM_SIZE_DIRECTIVE(minf_mzero) + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) + +#ifdef PIC +#define MO(op) op##@GOTOFF(%ecx) +#define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) +#else +#define MO(op) op +#define MOX(op,x,f) op(,x,f) +#endif + + .text +//ENTRY(__ieee754_powl) +ENTRY(RT_NOCRT(powl)) +#ifdef RT_OS_DARWIN /* 16-byte long double with 8 byte alignment requirements */ + fldt 20(%esp) // y +#else + fldt 16(%esp) // y +#endif + fxam + +#ifdef PIC + LOAD_PIC_REG (cx) +#endif + + fnstsw + movb %ah, %dl + andb $0x45, %ah + cmpb $0x40, %ah // is y == 0 ? + je .L11 + + cmpb $0x05, %ah // is y == ±inf ? + je .L12 + + cmpb $0x01, %ah // is y == NaN ? + je .L30 + + fldt 4(%esp) // x : y + + subl $8,%esp + cfi_adjust_cfa_offset (8) + + fxam + fnstsw + movb %ah, %dh + andb $0x45, %ah + cmpb $0x40, %ah + je .L20 // x is ±0 + + cmpb $0x05, %ah + je .L15 // x is ±inf + + fxch // y : x + + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc .L2 + + /* First see whether `y' is a natural number. In this case we + can use a more precise algorithm. */ + fld %st // y : y : x + fistpll (%esp) // y : x + fildll (%esp) // int(y) : y : x + fucomp %st(1) // y : x + fnstsw + sahf + jne .L2 + + /* OK, we have an integer value for y. */ + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + orl $0, %edx + fstp %st(0) // x + jns .L4 // y >= 0, jump + fdivrl MO(one) // 1/x (now referred to as x) + negl %eax + adcl $0, %edx + negl %edx +.L4: fldl MO(one) // 1 : x + fxch + +.L6: shrdl $1, %edx, %eax + jnc .L5 + fxch + fmul %st(1) // x : ST*x + fxch +.L5: fmul %st(0), %st // x*x : ST*x + shrl $1, %edx + movl %eax, %ecx + orl %edx, %ecx + jnz .L6 + fstp %st(0) // ST*x + ret + + /* y is ±NAN */ +.L30: fldt 4(%esp) // x : y + fldl MO(one) // 1.0 : x : y + fucomp %st(1) // x : y + fnstsw + sahf + je .L31 + fxch // y : x +.L31: fstp %st(1) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) +.L2: /* y is a real number. */ + fxch // x : y + fldl MO(one) // 1.0 : x : y + fld %st(1) // x : 1.0 : x : y + fsub %st(1) // x-1 : 1.0 : x : y + fabs // |x-1| : 1.0 : x : y + fcompl MO(limit) // 1.0 : x : y + fnstsw + fxch // x : 1.0 : y + sahf + ja .L7 + fsub %st(1) // x-1 : 1.0 : y + fyl2xp1 // log2(x) : y + jmp .L8 + +.L7: fyl2x // log2(x) : y +.L8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je .L28 + fst %st(1) // y*log2(x) : y*log2(x) + frndint // int(y*log2(x)) : y*log2(x) + fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) + fxch // fract(y*log2(x)) : int(y*log2(x)) + f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) + faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) + fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) + addl $8, %esp + cfi_adjust_cfa_offset (-8) + fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) + ret + + cfi_adjust_cfa_offset (8) +.L28: fstp %st(1) // y*log2(x) + fldl MO(one) // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + addl $8, %esp + cfi_adjust_cfa_offset (-8) + fstp %st(1) // 2^(y*log2(x)) + ret + + // pow(x,±0) = 1 + .align ALIGNARG(4) +.L11: fstp %st(0) // pop y + fldl MO(one) + ret + + // y == ±inf + .align ALIGNARG(4) +.L12: fstp %st(0) // pop y + fldt 4(%esp) // x + fabs + fcompl MO(one) // < 1, == 1, or > 1 + fnstsw + andb $0x45, %ah + cmpb $0x45, %ah + je .L13 // jump if x is NaN + + cmpb $0x40, %ah + je .L14 // jump if |x| == 1 + + shlb $1, %ah + xorb %ah, %dl + andl $2, %edx + fldl MOX(inf_zero, %edx, 4) + ret + + .align ALIGNARG(4) +.L14: fldl MO(one) + ret + + .align ALIGNARG(4) +.L13: fldt 4(%esp) // load x == NaN + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) + // x is ±inf +.L15: fstp %st(0) // y + testb $2, %dh + jz .L16 // jump if x == +inf + + // We must find out whether y is an odd integer. + fld %st // y : y + fistpll (%esp) // y + fildll (%esp) // int(y) : y + fucompp // <empty> + fnstsw + sahf + jne .L17 + + // OK, the value is an integer, but is it odd? + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + andb $1, %al + jz .L18 // jump if not odd + // It's an odd integer. + shrl $31, %edx + fldl MOX(minf_mzero, %edx, 8) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) +.L16: fcompl MO(zero) + addl $8, %esp + cfi_adjust_cfa_offset (-8) + fnstsw + shrl $5, %eax + andl $8, %eax + fldl MOX(inf_zero, %eax, 1) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) +.L17: shll $30, %edx // sign bit for y in right position + addl $8, %esp + cfi_adjust_cfa_offset (-8) +.L18: shrl $31, %edx + fldl MOX(inf_zero, %edx, 8) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) + // x is ±0 +.L20: fstp %st(0) // y + testb $2, %dl + jz .L21 // y > 0 + + // x is ±0 and y is < 0. We must find out whether y is an odd integer. + testb $2, %dh + jz .L25 + + fld %st // y : y + fistpll (%esp) // y + fildll (%esp) // int(y) : y + fucompp // <empty> + fnstsw + sahf + jne .L26 + + // OK, the value is an integer, but is it odd? + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + andb $1, %al + jz .L27 // jump if not odd + // It's an odd integer. + // Raise divide-by-zero exception and get minus infinity value. + fldl MO(one) + fdivl MO(zero) + fchs + ret + + cfi_adjust_cfa_offset (8) +.L25: fstp %st(0) +.L26: addl $8, %esp + cfi_adjust_cfa_offset (-8) +.L27: // Raise divide-by-zero exception and get infinity value. + fldl MO(one) + fdivl MO(zero) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) + // x is ±0 and y is > 0. We must find out whether y is an odd integer. +.L21: testb $2, %dh + jz .L22 + + fld %st // y : y + fistpll (%esp) // y + fildll (%esp) // int(y) : y + fucompp // <empty> + fnstsw + sahf + jne .L23 + + // OK, the value is an integer, but is it odd? + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + andb $1, %al + jz .L24 // jump if not odd + // It's an odd integer. + fldl MO(mzero) + ret + + cfi_adjust_cfa_offset (8) +.L22: fstp %st(0) +.L23: addl $8, %esp // Don't use 2 x pop + cfi_adjust_cfa_offset (-8) +.L24: fldl MO(zero) + ret + +END(RT_NOCRT(powl)) +//END(__ieee754_powl) diff --git a/src/recompiler/Sun/kvm.h b/src/recompiler/Sun/kvm.h new file mode 100644 index 00000000..0403e3e4 --- /dev/null +++ b/src/recompiler/Sun/kvm.h @@ -0,0 +1,28 @@ +/* $Id: kvm.h $ */ +/** @file + * VBox Recompiler - kvm stub header. + */ + +/* + * Copyright (C) 2011-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef ___kvm_stub_h___ +#define ___kvm_stub_h___ + +#define kvm_enabled() false +#define kvm_update_guest_debug(a, b) AssertFailed() +#define kvm_set_phys_mem(a, b, c) AssertFailed() +#define kvm_arch_get_registers(a) AssertFailed() +#define cpu_synchronize_state(a) do { } while (0) + +#endif + diff --git a/src/recompiler/Sun/testmath.c b/src/recompiler/Sun/testmath.c new file mode 100644 index 00000000..b1650753 --- /dev/null +++ b/src/recompiler/Sun/testmath.c @@ -0,0 +1,828 @@ +/* $Id: testmath.c $ */ +/** @file + * Testcase for the no-crt math stuff. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#ifndef MATHTEST_STANDALONE +# include <iprt/assert.h> +# include <math.h> +# undef printf +# define printf RTAssertMsg2Weak +#else +# include <stdio.h> +# include <math.h> +#endif + +/* gcc starting with version 4.3 uses the MPFR library which results in more accurate results. gcc-4.3.1 seems to emit the less accurate result. So just allow both results. */ +#define SIN180a -0.8011526357338304777463731115L +#define SIN180b -0.801152635733830477871L + +static void bitch(const char *pszWhat, const long double *plrdResult, const long double *plrdExpected) +{ + const unsigned char *pach1 = (const unsigned char *)plrdResult; + const unsigned char *pach2 = (const unsigned char *)plrdExpected; +#ifndef MATHTEST_STANDALONE + printf("error: %s - %d instead of %d\n", pszWhat, (int)(*plrdResult * 100000), (int)(*plrdExpected * 100000)); +#else + printf("error: %s - %.25f instead of %.25f\n", pszWhat, (double)*plrdResult, (double)*plrdExpected); +#endif + printf(" %02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x\n", pach1[0], pach1[1], pach1[2], pach1[3], pach1[4], pach1[5], pach1[6], pach1[7], pach1[8], pach1[9]); + printf(" %02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x\n", pach2[0], pach2[1], pach2[2], pach2[3], pach2[4], pach2[5], pach2[6], pach2[7], pach2[8], pach2[9]); +} + +static void bitchll(const char *pszWhat, long long llResult, long long llExpected) +{ +#if defined(__MINGW32__) && !defined(Assert) + printf("error: %s - %I64d instead of %I64d\n", pszWhat, llResult, llExpected); +#else + printf("error: %s - %lld instead of %lld\n", pszWhat, llResult, llExpected); +#endif +} + +static void bitchl(const char *pszWhat, long lResult, long lExpected) +{ + printf("error: %s - %ld instead of %ld\n", pszWhat, lResult, lExpected); +} + +extern int testsin(void) +{ + return sinl(180.0L) == SIN180a || sinl(180.0L) == SIN180b; +} + +extern int testremainder(void) +{ + static double s_rd1 = 2.5; + static double s_rd2 = 2.0; + static double s_rd3 = 0.5; + return remainder(s_rd1, s_rd2) == s_rd3; +} + +static __inline__ void set_cw(unsigned cw) +{ + __asm __volatile("fldcw %0" : : "m" (cw)); +} + +static __inline__ unsigned get_cw(void) +{ + unsigned cw; + __asm __volatile("fstcw %0" : "=m" (cw)); + return cw & 0xffff; +} + +static long double check_lrd(const long double lrd, const unsigned long long ull, const unsigned short us) +{ + static volatile long double lrd2; + lrd2 = lrd; + if ( *(unsigned long long *)&lrd2 != ull + || ((unsigned short *)&lrd2)[4] != us) + { +#if defined(__MINGW32__) && !defined(Assert) + printf("%I64x:%04x instead of %I64x:%04x\n", *(unsigned long long *)&lrd2, ((unsigned short *)&lrd2)[4], ull, us); +#else + printf("%llx:%04x instead of %llx:%04x\n", *(unsigned long long *)&lrd2, ((unsigned short *)&lrd2)[4], ull, us); +#endif + __asm__("int $3\n"); + } + return lrd; +} + + +static long double make_lrd(const unsigned long long ull, const unsigned short us) +{ + union + { + long double lrd; + struct + { + unsigned long long ull; + unsigned short us; + } i; + } u; + + u.i.ull = ull; + u.i.us = us; + return u.lrd; +} + +static long double check_lrd_cw(const long double lrd, const unsigned long long ull, const unsigned short us, const unsigned cw) +{ + set_cw(cw); + if (cw != get_cw()) + { + printf("get_cw() -> %#x expected %#x\n", get_cw(), cw); + __asm__("int $3\n"); + } + return check_lrd(lrd, ull, us); +} + +static long double make_lrd_cw(unsigned long long ull, unsigned short us, unsigned cw) +{ + set_cw(cw); + return check_lrd_cw(make_lrd(ull, us), ull, us, cw); +} + +extern int testmath(void) +{ + unsigned cErrors = 0; + long double lrdResult; + long double lrdExpect; + long double lrd; +#define CHECK(operation, expect) \ + do { \ + lrdExpect = expect; \ + lrdResult = operation; \ + if (lrdResult != lrdExpect) \ + { \ + bitch(#operation, &lrdResult, &lrdExpect); \ + cErrors++; \ + } \ + } while (0) + + long long llResult; + long long llExpect; +#define CHECKLL(operation, expect) \ + do { \ + llExpect = expect; \ + llResult = operation; \ + if (llResult != llExpect) \ + { \ + bitchll(#operation, llResult, llExpect); \ + cErrors++; \ + } \ + } while (0) + + long lResult; + long lExpect; +#define CHECKL(operation, expect) \ + do { \ + lExpect = expect; \ + lResult = operation; \ + if (lResult != lExpect) \ + { \ + bitchl(#operation, lResult, lExpect); \ + cErrors++; \ + } \ + } while (0) + + CHECK(atan2l(1.0L, 1.0L), 0.785398163397448309603L); + CHECK(atan2l(2.3L, 3.3L), 0.608689307327411694890L); + + CHECK(ceill(1.9L), 2.0L); + CHECK(ceill(4.5L), 5.0L); + CHECK(ceill(3.3L), 4.0L); + CHECK(ceill(6.1L), 7.0L); + + CHECK(floorl(1.9L), 1.0L); + CHECK(floorl(4.5L), 4.0L); + CHECK(floorl(7.3L), 7.0L); + CHECK(floorl(1234.1L), 1234.0L); + CHECK(floor(1233.1), 1233.0); + CHECK(floor(1239.98989898), 1239.0); + CHECK(floorf(9999.999), 9999.0); + + CHECK(ldexpl(1.0L, 1), 2.0L); + CHECK(ldexpl(1.0L, 10), 1024.0L); + CHECK(ldexpl(2.25L, 10), 2304.0L); + + CHECKLL(llrintl(1.0L), 1); + CHECKLL(llrintl(1.3L), 1); + CHECKLL(llrintl(1.5L), 2); + CHECKLL(llrintl(1.9L), 2); + CHECKLL(llrintf(123.34), 123); + CHECKLL(llrintf(-123.50), -124); + CHECKLL(llrint(42.42), 42); + CHECKLL(llrint(-2147483648.12343), -2147483648LL); +#if !defined(RT_ARCH_AMD64) + CHECKLL(lrint(-21474836499.12343), -2147483648LL); + CHECKLL(lrint(-2147483649932412.12343), -2147483648LL); +#else + CHECKLL(lrint(-21474836499.12343), -21474836499L); + CHECKLL(lrint(-2147483649932412.12343), -2147483649932412L); +#endif + +// __asm__("int $3"); + CHECKL(lrintl(make_lrd_cw(000000000000000000ULL,000000,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x067f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x067f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0a7f)), 1L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0a7f)), 1L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x3ffe,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x067f)), -1L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x067f)), -1L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0a7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0a7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0xbffe,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x067f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x067f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0a7f)), 1L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0a7f)), 1L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x9249249249249000ULL,0x3ffc,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x027f)), 0L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x067f)), -1L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x067f)), -1L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0a7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0a7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0xe38e38e38e38e000ULL,0xbffb,0x0e7f)), 0L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x027f)), 32768L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x027f)), 32768L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x067f)), 32768L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x067f)), 32768L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0a7f)), 32768L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0a7f)), 32768L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0e7f)), 32768L); + CHECKL(lrintl(make_lrd_cw(0x8000000000000000ULL,0x400e,0x0e7f)), 32768L); +#if !defined(RT_ARCH_AMD64) + /* c90 says that the constant is 2147483648 (which is not representable as a signed 32-bit + * value). To that constant you've then applied the negation operation. c90 doesn't have + * negative constants, only positive ones that have been negated. */ + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x027f)), (long)(-2147483647L - 1)); + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x027f)), (long)(-2147483647L - 1)); + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x067f)), (long)(-2147483647L - 1)); + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x067f)), (long)(-2147483647L - 1)); + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0a7f)), (long)(-2147483647L - 1)); + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0a7f)), (long)(-2147483647L - 1)); + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0e7f)), (long)(-2147483647L - 1)); + CHECKL(lrintl(make_lrd_cw(0xad78ebc5ac620000ULL,0xc041,0x0e7f)), (long)(-2147483647L - 1)); +#endif + set_cw(0x27f); + + CHECK(logl(2.7182818284590452353602874713526625L), 1.0); + + CHECK(remainderl(1.0L, 1.0L), 0.0); + CHECK(remainderl(1.0L, 1.5L), -0.5); + CHECK(remainderl(42.0L, 34.25L), 7.75); + CHECK(remainderf(43.0, 34.25), 8.75); + CHECK(remainder(44.25, 34.25), 10.00); + double rd1 = 44.25; + double rd2 = 34.25; + CHECK(remainder(rd1, rd2), 10.00); + CHECK(remainder(2.5, 2.0), 0.5); + CHECK(remainder(2.5, 2.0), 0.5); + CHECK(remainder(2.5, 2.0), 0.5); + CHECKLL(testremainder(), 1); + + + /* Only works in extended precision, while double precision is default on BSD (including Darwin) */ + set_cw(0x37f); + CHECK(rintl(1.0L), 1.0); + CHECK(rintl(1.4L), 1.0); + CHECK(rintl(1.3L), 1.0); + CHECK(rintl(0.9L), 1.0); + CHECK(rintl(3123.1232L), 3123.0); + CHECK(rint(3985.13454), 3985.0); + CHECK(rintf(9999.999), 10000.0); + set_cw(0x27f); + + CHECK(sinl(1.0L), 0.84147098480789650664L); +#if 0 + lrd = 180.0L; + CHECK(sinl(lrd), -0.801152635733830477871L); +#else + lrd = 180.0L; + lrdExpect = SIN180a; + lrdResult = sinl(lrd); + if (lrdResult != lrdExpect) + { + lrdExpect = SIN180b; + if (lrdResult != lrdExpect) + { + bitch("sinl(lrd)", &lrdResult, &lrdExpect); + cErrors++; + } + } +#endif +#if 0 + CHECK(sinl(180.0L), SIN180); +#else + lrdExpect = SIN180a; + lrdResult = sinl(180.0L); + if (lrdResult != lrdExpect) + { + lrdExpect = SIN180b; + if (lrdResult != lrdExpect) + { + bitch("sinl(180.0L)", &lrdResult, &lrdExpect); + cErrors++; + } + } +#endif + CHECKLL(testsin(), 1); + + CHECK(sqrtl(1.0L), 1.0); + CHECK(sqrtl(4.0L), 2.0); + CHECK(sqrtl(1525225.0L), 1235.0); + + CHECK(tanl(0.0L), 0.0); + CHECK(tanl(0.7853981633974483096156608458198757L), 1.0); + + CHECK(powl(0.0, 0.0), 1.0); + CHECK(powl(2.0, 2.0), 4.0); + CHECK(powl(3.0, 3.0), 27.0); + + return cErrors; +} + + +///////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////// +#if 0 + +#define floatx_to_int32 floatx80_to_int32 +#define floatx_to_int64 floatx80_to_int64 +#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero +#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero +#define floatx_abs floatx80_abs +#define floatx_chs floatx80_chs +#define floatx_round_to_int(foo, bar) floatx80_round_to_int(foo, NULL) +#define floatx_compare floatx80_compare +#define floatx_compare_quiet floatx80_compare_quiet +#undef sin +#undef cos +#undef sqrt +#undef pow +#undef log +#undef tan +#undef atan2 +#undef floor +#undef ceil +#undef ldexp +#define sin sinl +#define cos cosl +#define sqrt sqrtl +#define pow powl +#define log logl +#define tan tanl +#define atan2 atan2l +#define floor floorl +#define ceil ceill +#define ldexp ldexpl + + +typedef long double CPU86_LDouble; + +typedef union { + long double d; + struct { + unsigned long long lower; + unsigned short upper; + } l; +} CPU86_LDoubleU; + +/* the following deal with x86 long double-precision numbers */ +#define MAXEXPD 0x7fff +#define EXPBIAS 16383 +#define EXPD(fp) (fp.l.upper & 0x7fff) +#define SIGND(fp) ((fp.l.upper) & 0x8000) +#define MANTD(fp) (fp.l.lower) +#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS + +typedef long double floatx80; +#define STATUS_PARAM , void *pv + +static floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM) +{ + return rintl(a); +} + + + +struct myenv +{ + unsigned int fpstt; /* top of stack index */ + unsigned int fpus; + unsigned int fpuc; + unsigned char fptags[8]; /* 0 = valid, 1 = empty */ + union { +#ifdef USE_X86LDOUBLE + CPU86_LDouble d __attribute__((aligned(16))); +#else + CPU86_LDouble d; +#endif + } fpregs[8]; + +} my_env, env_org, env_res, *env = &my_env; + + +#define ST0 (env->fpregs[env->fpstt].d) +#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) +#define ST1 ST(1) +#define MAXTAN 9223372036854775808.0 + + +static inline void fpush(void) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fptags[env->fpstt] = 0; /* validate stack entry */ +} + +static inline void fpop(void) +{ + env->fptags[env->fpstt] = 1; /* invalidate stack entry */ + env->fpstt = (env->fpstt + 1) & 7; +} + +static void helper_f2xm1(void) +{ + ST0 = pow(2.0,ST0) - 1.0; +} + +static void helper_fyl2x(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if (fptemp>0.0){ + fptemp = log(fptemp)/log(2.0); /* log2(ST) */ + ST1 *= fptemp; + fpop(); + } else { + env->fpus &= (~0x4700); + env->fpus |= 0x400; + } +} + +static void helper_fptan(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = tan(fptemp); + fpush(); + ST0 = 1.0; + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**52 only */ + } +} + +static void helper_fpatan(void) +{ + CPU86_LDouble fptemp, fpsrcop; + + fpsrcop = ST1; + fptemp = ST0; + ST1 = atan2(fpsrcop,fptemp); + fpop(); +} + +static void helper_fxtract(void) +{ + CPU86_LDoubleU temp; + unsigned int expdif; + + temp.d = ST0; + expdif = EXPD(temp) - EXPBIAS; + /*DP exponent bias*/ + ST0 = expdif; + fpush(); + BIASEXPONENT(temp); + ST0 = temp.d; +} + +static void helper_fprem1(void) +{ + CPU86_LDouble dblq, fpsrcop, fptemp; + CPU86_LDoubleU fpsrcop1, fptemp1; + int expdif; + int q; + + fpsrcop = ST0; + fptemp = ST1; + fpsrcop1.d = fpsrcop; + fptemp1.d = fptemp; + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + if (expdif < 53) { + dblq = fpsrcop / fptemp; + dblq = (dblq < 0.0)? ceil(dblq): floor(dblq); + ST0 = fpsrcop - fptemp*dblq; + q = (int)dblq; /* cutting off top bits is assumed here */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C1,C3) <-- (q2,q1,q0) */ + env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */ + env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */ + env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */ + } else { + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, expdif-50); + fpsrcop = (ST0 / ST1) / fptemp; + /* fpsrcop = integer obtained by rounding to the nearest */ + fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)? + floor(fpsrcop): ceil(fpsrcop); + ST0 -= (ST1 * fpsrcop * fptemp); + } +} + +static void helper_fprem(void) +{ +#if 0 +LogFlow(("helper_fprem: ST0=%.*Rhxs ST1=%.*Rhxs fpus=%#x\n", sizeof(ST0), &ST0, sizeof(ST1), &ST1, env->fpus)); + + __asm__ __volatile__("fldt (%2)\n" + "fldt (%1)\n" + "fprem \n" + "fnstsw (%0)\n" + "fstpt (%1)\n" + "fstpt (%2)\n" + : : "r" (&env->fpus), "r" (&ST0), "r" (&ST1) : "memory"); + +LogFlow(("helper_fprem: -> ST0=%.*Rhxs fpus=%#x c\n", sizeof(ST0), &ST0, env->fpus)); +#else + CPU86_LDouble dblq, fpsrcop, fptemp; + CPU86_LDoubleU fpsrcop1, fptemp1; + int expdif; + int q; + + fpsrcop = ST0; + fptemp = ST1; + fpsrcop1.d = fpsrcop; + fptemp1.d = fptemp; + + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + if ( expdif < 53 ) { + dblq = fpsrcop / fptemp; + dblq = (dblq < 0.0)? ceil(dblq): floor(dblq); + ST0 = fpsrcop - fptemp*dblq; + q = (int)dblq; /* cutting off top bits is assumed here */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C1,C3) <-- (q2,q1,q0) */ + env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */ + env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */ + env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */ + } else { + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, expdif-50); + fpsrcop = (ST0 / ST1) / fptemp; + /* fpsrcop = integer obtained by chopping */ + fpsrcop = (fpsrcop < 0.0)? + -(floor(fabs(fpsrcop))): floor(fpsrcop); + ST0 -= (ST1 * fpsrcop * fptemp); + } +#endif +} + +static void helper_fyl2xp1(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp+1.0)>0.0) { + fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */ + ST1 *= fptemp; + fpop(); + } else { + env->fpus &= (~0x4700); + env->fpus |= 0x400; + } +} + +static void helper_fsqrt(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if (fptemp<0.0) { + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + env->fpus |= 0x400; + } + ST0 = sqrt(fptemp); +} + +static void helper_fsincos(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = sin(fptemp); + fpush(); + ST0 = cos(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**63 only */ + } +} + +static void helper_frndint(void) +{ + ST0 = floatx_round_to_int(ST0, &env->fp_status); +} + +static void helper_fscale(void) +{ + ST0 = ldexp (ST0, (int)(ST1)); +} + +static void helper_fsin(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = sin(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**53 only */ + } +} + +static void helper_fcos(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = cos(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg5 < 2**63 only */ + } +} + +static void helper_fxam_ST0(void) +{ + CPU86_LDoubleU temp; + int expdif; + + temp.d = ST0; + + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + if (SIGND(temp)) + env->fpus |= 0x200; /* C1 <-- 1 */ + + /* XXX: test fptags too */ + expdif = EXPD(temp); + if (expdif == MAXEXPD) { +#ifdef USE_X86LDOUBLE + if (MANTD(temp) == 0x8000000000000000ULL) +#else + if (MANTD(temp) == 0) +#endif + env->fpus |= 0x500 /*Infinity*/; + else + env->fpus |= 0x100 /*NaN*/; + } else if (expdif == 0) { + if (MANTD(temp) == 0) + env->fpus |= 0x4000 /*Zero*/; + else + env->fpus |= 0x4400 /*Denormal*/; + } else { + env->fpus |= 0x400; + } +} + + +void check_env(void) +{ + int i; + for (i = 0; i < 8; i++) + { + CPU86_LDoubleU my, res; + my.d = env->fpregs[i].d; + res.d = env_res.fpregs[i].d; + + if ( my.l.lower != res.l.lower + || my.l.upper != res.l.upper) + printf("register %i: %#018llx:%#06x\n" + " expected %#018llx:%#06x\n", + i, + my.l.lower, my.l.upper, + res.l.lower, res.l.upper); + } + for (i = 0; i < 8; i++) + if (env->fptags[i] != env_res.fptags[i]) + printf("tag %i: %d != %d\n", i, env->fptags[i], env_res.fptags[i]); + if (env->fpstt != env_res.fpstt) + printf("fpstt: %#06x != %#06x\n", env->fpstt, env_res.fpstt); + if (env->fpuc != env_res.fpuc) + printf("fpuc: %#06x != %#06x\n", env->fpuc, env_res.fpuc); + if (env->fpus != env_res.fpus) + printf("fpus: %#06x != %#06x\n", env->fpus, env_res.fpus); +} +#endif /* not used. */ + +#if 0 /* insert this into helper.c */ +/* FPU helpers */ +CPU86_LDoubleU my_st[8]; +unsigned int my_fpstt; +unsigned int my_fpus; +unsigned int my_fpuc; +unsigned char my_fptags[8]; + +void hlp_fpu_enter(void) +{ + int i; + for (i = 0; i < 8; i++) + my_st[i].d = env->fpregs[i].d; + my_fpstt = env->fpstt; + my_fpus = env->fpus; + my_fpuc = env->fpuc; + memcpy(&my_fptags, &env->fptags, sizeof(my_fptags)); +} + +void hlp_fpu_leave(const char *psz) +{ + int i; + Log(("/*code*/ \n")); + for (i = 0; i < 8; i++) + Log(("/*code*/ *(unsigned long long *)&env_org.fpregs[%d] = %#018llxULL; ((unsigned short *)&env_org.fpregs[%d])[4] = %#06x; env_org.fptags[%d]=%d;\n", + i, my_st[i].l.lower, i, my_st[i].l.upper, i, my_fptags[i])); + Log(("/*code*/ env_org.fpstt=%#x;\n", my_fpstt)); + Log(("/*code*/ env_org.fpus=%#x;\n", my_fpus)); + Log(("/*code*/ env_org.fpuc=%#x;\n", my_fpuc)); + for (i = 0; i < 8; i++) + { + CPU86_LDoubleU u; + u.d = env->fpregs[i].d; + Log(("/*code*/ *(unsigned long long *)&env_res.fpregs[%d] = %#018llxULL; ((unsigned short *)&env_res.fpregs[%d])[4] = %#06x; env_res.fptags[%d]=%d;\n", + i, u.l.lower, i, u.l.upper, i, env->fptags[i])); + } + Log(("/*code*/ env_res.fpstt=%#x;\n", env->fpstt)); + Log(("/*code*/ env_res.fpus=%#x;\n", env->fpus)); + Log(("/*code*/ env_res.fpuc=%#x;\n", env->fpuc)); + + Log(("/*code*/ my_env = env_org;\n")); + Log(("/*code*/ %s();\n", psz)); + Log(("/*code*/ check_env();\n")); +} +#endif /* helper.c */ + +extern void testmath2(void ) +{ +#if 0 +#include "/tmp/code.h" +#endif +} + + +///////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////// + +#ifdef MATHTEST_STANDALONE + +void test_fops(double a, double b) +{ + printf("a=%f b=%f a+b=%f\n", a, b, a + b); + printf("a=%f b=%f a-b=%f\n", a, b, a - b); + printf("a=%f b=%f a*b=%f\n", a, b, a * b); + printf("a=%f b=%f a/b=%f\n", a, b, a / b); + printf("a=%f b=%f fmod(a, b)=%f\n", a, b, (double)fmod(a, b)); + printf("a=%f sqrt(a)=%f\n", a, (double)sqrtl(a)); + printf("a=%f sin(a)=%f\n", a, (double)sinl(a)); + printf("a=%f cos(a)=%f\n", a, (double)cos(a)); + printf("a=%f tan(a)=%f\n", a, (double)tanl(a)); + printf("a=%f log(a)=%f\n", a, (double)log(a)); + printf("a=%f exp(a)=%f\n", a, (double)exp(a)); + printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b)); + /* just to test some op combining */ + printf("a=%f asin(sinl(a))=%f\n", a, (double)asin(sinl(a))); + printf("a=%f acos(cos(a))=%f\n", a, (double)acos(cos(a))); + printf("a=%f atan(tanl(a))=%f\n", a, (double)atan(tanl(a))); +} + +int main() +{ + unsigned cErrors = testmath(); + + testmath2(); + test_fops(2, 3); + test_fops(1.4, -5); + + printf("cErrors=%u\n", cErrors); + return cErrors; +} +#endif + diff --git a/src/recompiler/VBoxREM.def b/src/recompiler/VBoxREM.def new file mode 100644 index 00000000..96e28350 --- /dev/null +++ b/src/recompiler/VBoxREM.def @@ -0,0 +1,50 @@ +; $Id: VBoxREM.def $ +;; @file +; VBoxREM Definition File. +; + +; +; Copyright (C) 2006-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + +LIBRARY VBoxREM.dll + +EXPORTS + REMR3Init + REMR3InitFinalize + REMR3Term + REMR3Reset + REMR3Step + REMR3BreakpointSet + REMR3BreakpointClear + REMR3Run + REMR3EmulateInstruction + REMR3State + REMR3StateBack + REMR3StateUpdate + REMR3A20Set + REMR3DisasEnableStepping + REMR3ReplayHandlerNotifications + REMR3NotifyPhysRamRegister + REMR3NotifyPhysRamDeregister + REMR3NotifyPhysRomRegister + REMR3NotifyHandlerPhysicalModify + REMR3NotifyHandlerPhysicalRegister + REMR3NotifyHandlerPhysicalDeregister + REMR3NotifyInterruptSet + REMR3NotifyInterruptClear + REMR3NotifyTimerPending + REMR3NotifyDmaPending + REMR3NotifyQueuePending + REMR3NotifyFF + REMR3NotifyCodePageChanged + REMR3IsPageAccessHandled + diff --git a/src/recompiler/VBoxREM.rc b/src/recompiler/VBoxREM.rc new file mode 100644 index 00000000..ccf23339 --- /dev/null +++ b/src/recompiler/VBoxREM.rc @@ -0,0 +1,51 @@ +/* $Id: VBoxREM.rc $ */ +/** @file + * VBoxREM - Resource file containing version info. + */ + +/* + * Copyright (C) 2015-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#include <windows.h> +#include <VBox/version.h> + +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US + +VS_VERSION_INFO VERSIONINFO + FILEVERSION VBOX_RC_FILE_VERSION + PRODUCTVERSION VBOX_RC_PRODUCT_VERSION + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK + FILEFLAGS VBOX_RC_FILE_FLAGS + FILEOS VBOX_RC_FILE_OS + FILETYPE VBOX_RC_TYPE_DLL + FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" // Lang=US English, CharSet=Unicode + BEGIN + VALUE "FileDescription", "VirtualBox Recompiler\0" + VALUE "InternalName", "VBoxREM\0" + VALUE "OriginalFilename", "VBoxREM.dll\0" + VALUE "CompanyName", VBOX_RC_COMPANY_NAME + VALUE "FileVersion", VBOX_RC_FILE_VERSION_STR + VALUE "LegalCopyright", VBOX_RC_LEGAL_COPYRIGHT + VALUE "ProductName", VBOX_RC_PRODUCT_NAME_STR + VALUE "ProductVersion", VBOX_RC_PRODUCT_VERSION_STR + VBOX_RC_MORE_STRINGS + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END diff --git a/src/recompiler/VBoxREMWrapper.cpp b/src/recompiler/VBoxREMWrapper.cpp new file mode 100644 index 00000000..4349bd9a --- /dev/null +++ b/src/recompiler/VBoxREMWrapper.cpp @@ -0,0 +1,2477 @@ +/* $Id: VBoxREMWrapper.cpp $ */ +/** @file + * + * VBoxREM Win64 DLL Wrapper. + */ +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_vboxrem_amd64 VBoxREM Hacks on AMD64 + * + * There are problems with building BoxREM both on WIN64 and 64-bit linux. + * + * On linux binutils refuses to link shared objects without -fPIC compiled code + * (bitches about some fixup types). But when trying to build with -fPIC dyngen + * doesn't like the code anymore. Sweet. The current solution is to build the + * VBoxREM code as a relocatable module and use our ELF loader to load it. + * + * On WIN64 we're not aware of any GCC port which can emit code using the MSC + * calling convention. So, we're in for some real fun here. The choice is between + * porting GCC to AMD64 WIN64 and coming up with some kind of wrapper around + * either the win32 build or the 64-bit linux build. + * + * -# Porting GCC will be a lot of work. For one thing the calling convention differs + * and messing with such stuff can easily create ugly bugs. We would also have to + * do some binutils changes, but I think those are rather small compared to GCC. + * (That said, the MSC calling convention is far simpler than the linux one, it + * reminds me of _Optlink which we have working already.) + * -# Wrapping win32 code will work, but addresses outside the first 4GB are + * inaccessible and we will have to create 32-64 thunks for all imported functions. + * (To switch between 32-bit and 64-bit is load the right CS using far jmps (32->64) + * or far returns (both).) + * -# Wrapping 64-bit linux code might be the easier solution. The requirements here + * are: + * - Remove all CRT references we possibly, either by using intrinsics or using + * IPRT. Part of IPRT will be linked into VBoxREM2.rel, this will be yet another + * IPRT mode which I've dubbed 'no-crt'. The no-crt mode provide basic non-system + * dependent stuff. + * - Compile and link it into a relocatable object (include the gcc intrinsics + * in libgcc). Call this VBoxREM2.rel. + * - Write a wrapper dll, VBoxREM.dll, for which during REMR3Init() will load + * VBoxREM2.rel (using IPRT) and generate calling convention wrappers + * for all IPRT functions and VBoxVMM functions that it uses. All exports + * will be wrapped vice versa. + * - For building on windows hosts, we will use a mingw32 hosted cross compiler. + * and add a 'no-crt' mode to IPRT where it provides the necessary CRT headers + * and function implementations. + * + * The 3rd solution will be tried out first since it requires the least effort and + * will let us make use of the full 64-bit register set. + * + * + * + * @section sec_vboxrem_amd64_compare Comparing the GCC and MSC calling conventions + * + * GCC expects the following (cut & past from page 20 in the ABI draft 0.96): + * + * @verbatim + %rax temporary register; with variable arguments passes information about the + number of SSE registers used; 1st return register. + [Not preserved] + %rbx callee-saved register; optionally used as base pointer. + [Preserved] + %rcx used to pass 4th integer argument to functions. + [Not preserved] + %rdx used to pass 3rd argument to functions; 2nd return register + [Not preserved] + %rsp stack pointer + [Preserved] + %rbp callee-saved register; optionally used as frame pointer + [Preserved] + %rsi used to pass 2nd argument to functions + [Not preserved] + %rdi used to pass 1st argument to functions + [Not preserved] + %r8 used to pass 5th argument to functions + [Not preserved] + %r9 used to pass 6th argument to functions + [Not preserved] + %r10 temporary register, used for passing a function's static chain + pointer [Not preserved] + %r11 temporary register + [Not preserved] + %r12-r15 callee-saved registers + [Preserved] + %xmm0-%xmm1 used to pass and return floating point arguments + [Not preserved] + %xmm2-%xmm7 used to pass floating point arguments + [Not preserved] + %xmm8-%xmm15 temporary registers + [Not preserved] + %mmx0-%mmx7 temporary registers + [Not preserved] + %st0 temporary register; used to return long double arguments + [Not preserved] + %st1 temporary registers; used to return long double arguments + [Not preserved] + %st2-%st7 temporary registers + [Not preserved] + %fs Reserved for system use (as thread specific data register) + [Not preserved] + @endverbatim + * + * Direction flag is preserved as cleared. + * The stack must be aligned on a 16-byte boundary before the 'call/jmp' instruction. + * + * + * + * MSC expects the following: + * @verbatim + rax return value, not preserved. + rbx preserved. + rcx 1st argument, integer, not preserved. + rdx 2nd argument, integer, not preserved. + rbp preserved. + rsp preserved. + rsi preserved. + rdi preserved. + r8 3rd argument, integer, not preserved. + r9 4th argument, integer, not preserved. + r10 scratch register, not preserved. + r11 scratch register, not preserved. + r12-r15 preserved. + xmm0 1st argument, fp, return value, not preserved. + xmm1 2st argument, fp, not preserved. + xmm2 3st argument, fp, not preserved. + xmm3 4st argument, fp, not preserved. + xmm4-xmm5 scratch, not preserved. + xmm6-xmm15 preserved. + @endverbatim + * + * Dunno what the direction flag is... + * The stack must be aligned on a 16-byte boundary before the 'call/jmp' instruction. + * + * + * Thus, When GCC code is calling MSC code we don't really have to preserve + * anything. But but MSC code is calling GCC code, we'll have to save esi and edi. + * + */ + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** @def USE_REM_STUBS + * Define USE_REM_STUBS to stub the entire REM stuff. This is useful during + * early porting (before we start running stuff). + */ +#if defined(DOXYGEN_RUNNING) +# define USE_REM_STUBS +#endif + +/** @def USE_REM_CALLING_CONVENTION_GLUE + * Define USE_REM_CALLING_CONVENTION_GLUE for platforms where it's necessary to + * use calling convention wrappers. + */ +#if (defined(RT_ARCH_AMD64) && defined(RT_OS_WINDOWS)) || defined(DOXYGEN_RUNNING) +# define USE_REM_CALLING_CONVENTION_GLUE +#endif + +/** @def USE_REM_IMPORT_JUMP_GLUE + * Define USE_REM_IMPORT_JUMP_GLUE for platforms where we need to + * emit some jump glue to deal with big addresses. + */ +#if (defined(RT_ARCH_AMD64) && !defined(USE_REM_CALLING_CONVENTION_GLUE) && !defined(RT_OS_DARWIN)) || defined(DOXYGEN_RUNNING) +# define USE_REM_IMPORT_JUMP_GLUE +#endif + +/** @def VBOX_USE_BITNESS_SELECTOR + * Define VBOX_USE_BITNESS_SELECTOR to build this module as a bitness selector + * between VBoxREM32 and VBoxREM64. + */ +#if defined(DOXYGEN_RUNNING) +# define VBOX_USE_BITNESS_SELECTOR +#endif + +/** @def VBOX_WITHOUT_REM_LDR_CYCLE + * Define VBOX_WITHOUT_REM_LDR_CYCLE dynamically resolve any dependencies on + * VBoxVMM and thus avoid the cyclic dependency between VBoxREM and VBoxVMM. + */ +#if defined(DOXYGEN_RUNNING) +# define VBOX_WITHOUT_REM_LDR_CYCLE +#endif + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_REM +#include <VBox/vmm/rem.h> +#include <VBox/vmm/vmm.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/dbg.h> +#include <VBox/vmm/csam.h> +#include <VBox/vmm/mm.h> +#include <VBox/vmm/em.h> +#include <VBox/vmm/ssm.h> +#include <VBox/vmm/hm.h> +#include <VBox/vmm/patm.h> +#include <VBox/vmm/pdm.h> +#include <VBox/vmm/pdmcritsect.h> +#include <VBox/vmm/pgm.h> +#include <VBox/vmm/iom.h> +#include <VBox/vmm/vm.h> +#include <VBox/err.h> +#include <VBox/log.h> +#include <VBox/dis.h> + +#include <iprt/alloc.h> +#include <iprt/assert.h> +#include <iprt/ldr.h> +#include <iprt/lockvalidator.h> +#include <iprt/param.h> +#include <iprt/path.h> +#include <iprt/string.h> +#include <iprt/stream.h> + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * Parameter descriptor. + */ +typedef struct REMPARMDESC +{ + /** Parameter flags (REMPARMDESC_FLAGS_*). */ + uint8_t fFlags; + /** The parameter size if REMPARMDESC_FLAGS_SIZE is set. */ + uint8_t cb; + /** Pointer to additional data. + * For REMPARMDESC_FLAGS_PFN this is a PREMFNDESC. */ + void *pvExtra; + +} REMPARMDESC, *PREMPARMDESC; +/** Pointer to a constant parameter descriptor. */ +typedef const REMPARMDESC *PCREMPARMDESC; + +/** @name Parameter descriptor flags. + * @{ */ +/** The parameter type is a kind of integer which could fit in a register. This includes pointers. */ +#define REMPARMDESC_FLAGS_INT 0 +/** The parameter is a GC pointer. */ +#define REMPARMDESC_FLAGS_GCPTR 1 +/** The parameter is a GC physical address. */ +#define REMPARMDESC_FLAGS_GCPHYS 2 +/** The parameter is a HC physical address. */ +#define REMPARMDESC_FLAGS_HCPHYS 3 +/** The parameter type is a kind of floating point. */ +#define REMPARMDESC_FLAGS_FLOAT 4 +/** The parameter value is a struct. This type takes a size. */ +#define REMPARMDESC_FLAGS_STRUCT 5 +/** The parameter is an elipsis. */ +#define REMPARMDESC_FLAGS_ELLIPSIS 6 +/** The parameter is a va_list. */ +#define REMPARMDESC_FLAGS_VALIST 7 +/** The parameter is a function pointer. pvExtra is a PREMFNDESC. */ +#define REMPARMDESC_FLAGS_PFN 8 +/** The parameter type mask. */ +#define REMPARMDESC_FLAGS_TYPE_MASK 15 +/** The parameter size field is valid. */ +#define REMPARMDESC_FLAGS_SIZE RT_BIT(7) +/** @} */ + +/** + * Function descriptor. + */ +typedef struct REMFNDESC +{ + /** The function name. */ + const char *pszName; + /** Exports: Pointer to the function pointer. + * Imports: Pointer to the function. */ + void *pv; + /** Array of parameter descriptors. */ + PCREMPARMDESC paParams; + /** The number of parameter descriptors pointed to by paParams. */ + uint8_t cParams; + /** Function flags (REMFNDESC_FLAGS_*). */ + uint8_t fFlags; + /** The size of the return value. */ + uint8_t cbReturn; + /** Pointer to the wrapper code for imports. */ + void *pvWrapper; +} REMFNDESC, *PREMFNDESC; +/** Pointer to a constant function descriptor. */ +typedef const REMFNDESC *PCREMFNDESC; + +/** @name Function descriptor flags. + * @{ */ +/** The return type is void. */ +#define REMFNDESC_FLAGS_RET_VOID 0 +/** The return type is a kind of integer passed in rax/eax. This includes pointers. */ +#define REMFNDESC_FLAGS_RET_INT 1 +/** The return type is a kind of floating point. */ +#define REMFNDESC_FLAGS_RET_FLOAT 2 +/** The return value is a struct. This type take a size. */ +#define REMFNDESC_FLAGS_RET_STRUCT 3 +/** The return type mask. */ +#define REMFNDESC_FLAGS_RET_TYPE_MASK 7 +/** The argument list contains one or more va_list arguments (i.e. problems). */ +#define REMFNDESC_FLAGS_VALIST RT_BIT(6) +/** The function has an ellipsis (i.e. a problem). */ +#define REMFNDESC_FLAGS_ELLIPSIS RT_BIT(7) +/** @} */ + +/** + * Chunk of read-write-executable memory. + */ +typedef struct REMEXECMEM +{ + /** The number of bytes left. */ + struct REMEXECMEM *pNext; + /** The size of this chunk. */ + uint32_t cb; + /** The offset of the next code block. */ + uint32_t off; +#if ARCH_BITS == 32 + uint32_t padding; +#endif +} REMEXECMEM, *PREMEXECMEM; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifndef USE_REM_STUBS +/** Loader handle of the REM object/DLL. */ +static RTLDRMOD g_ModREM2 = NIL_RTLDRMOD; +# ifndef VBOX_USE_BITNESS_SELECTOR +/** Pointer to the memory containing the loaded REM2 object/DLL. */ +static void *g_pvREM2 = NULL; +/** The size of the memory g_pvREM2 is pointing to. */ +static size_t g_cbREM2 = 0; +# endif +# ifdef VBOX_WITHOUT_REM_LDR_CYCLE +/** Loader handle of the VBoxVMM DLL. */ +static RTLDRMOD g_ModVMM = NIL_RTLDRMOD; +# endif + +/** Linux object export addresses. + * These are references from the assembly wrapper code. + * @{ */ +static DECLCALLBACKPTR(int, pfnREMR3Init)(PVM); +static DECLCALLBACKPTR(int, pfnREMR3InitFinalize)(PVM); +static DECLCALLBACKPTR(int, pfnREMR3Term)(PVM); +static DECLCALLBACKPTR(void, pfnREMR3Reset)(PVM); +static DECLCALLBACKPTR(int, pfnREMR3Step)(PVM, PVMCPU); +static DECLCALLBACKPTR(int, pfnREMR3BreakpointSet)(PVM, RTGCUINTPTR); +static DECLCALLBACKPTR(int, pfnREMR3BreakpointClear)(PVM, RTGCUINTPTR); +static DECLCALLBACKPTR(int, pfnREMR3EmulateInstruction)(PVM, PVMCPU); +static DECLCALLBACKPTR(int, pfnREMR3Run)(PVM, PVMCPU); +static DECLCALLBACKPTR(int, pfnREMR3State)(PVM, PVMCPU); +static DECLCALLBACKPTR(int, pfnREMR3StateBack)(PVM, PVMCPU); +static DECLCALLBACKPTR(void, pfnREMR3StateUpdate)(PVM, PVMCPU); +static DECLCALLBACKPTR(void, pfnREMR3A20Set)(PVM, PVMCPU, bool); +static DECLCALLBACKPTR(void, pfnREMR3ReplayHandlerNotifications)(PVM pVM); +static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRamRegister)(PVM, RTGCPHYS, RTGCPHYS, unsigned); +static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRamDeregister)(PVM, RTGCPHYS, RTUINT); +static DECLCALLBACKPTR(void, pfnREMR3NotifyPhysRomRegister)(PVM, RTGCPHYS, RTUINT, void *, bool); +static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalModify)(PVM, PGMPHYSHANDLERKIND, RTGCPHYS, RTGCPHYS, RTGCPHYS, bool, bool); +static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalRegister)(PVM, PGMPHYSHANDLERKIND, RTGCPHYS, RTGCPHYS, bool); +static DECLCALLBACKPTR(void, pfnREMR3NotifyHandlerPhysicalDeregister)(PVM, PGMPHYSHANDLERKIND, RTGCPHYS, RTGCPHYS, bool, bool); +static DECLCALLBACKPTR(void, pfnREMR3NotifyInterruptSet)(PVM, PVMCPU); +static DECLCALLBACKPTR(void, pfnREMR3NotifyInterruptClear)(PVM, PVMCPU); +static DECLCALLBACKPTR(void, pfnREMR3NotifyTimerPending)(PVM, PVMCPU); +static DECLCALLBACKPTR(void, pfnREMR3NotifyDmaPending)(PVM); +static DECLCALLBACKPTR(void, pfnREMR3NotifyQueuePending)(PVM); +static DECLCALLBACKPTR(void, pfnREMR3NotifyFF)(PVM); +static DECLCALLBACKPTR(int, pfnREMR3NotifyCodePageChanged)(PVM, PVMCPU, RTGCPTR); +static DECLCALLBACKPTR(int, pfnREMR3DisasEnableStepping)(PVM, bool); +static DECLCALLBACKPTR(bool, pfnREMR3IsPageAccessHandled)(PVM, RTGCPHYS); +/** @} */ + +/** Export and import parameter descriptors. + * @{ + */ +/* Common args. */ +static const REMPARMDESC g_aArgsSIZE_T[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; +static const REMPARMDESC g_aArgsPTR[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL } +}; +static const REMPARMDESC g_aArgsSIZE_TTag[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsPTRTag[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsPTR_SIZE_T[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; +static const REMPARMDESC g_aArgsSIZE_TTagLoc[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsPTRLoc[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsVM[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL } +}; +static const REMPARMDESC g_aArgsVMCPU[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL } +}; + +static const REMPARMDESC g_aArgsVMandVMCPU[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL } +}; + +/* REM args */ +static const REMPARMDESC g_aArgsBreakpoint[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINTPTR), NULL } +}; +static const REMPARMDESC g_aArgsA20Set[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; +static const REMPARMDESC g_aArgsNotifyPhysRamRegister[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL } +}; +static const REMPARMDESC g_aArgsNotifyPhysRamChunkRegister[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTHCUINTPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL } +}; +static const REMPARMDESC g_aArgsNotifyPhysRamDeregister[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL } +}; +static const REMPARMDESC g_aArgsNotifyPhysRomRegister[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; +static const REMPARMDESC g_aArgsNotifyHandlerPhysicalModify[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMPHYSHANDLERKIND), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; +static const REMPARMDESC g_aArgsNotifyHandlerPhysicalRegister[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMPHYSHANDLERKIND), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; +static const REMPARMDESC g_aArgsNotifyHandlerPhysicalDeregister[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMPHYSHANDLERKIND), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; +static const REMPARMDESC g_aArgsNotifyCodePageChanged[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINTPTR), NULL } +}; +static const REMPARMDESC g_aArgsNotifyPendingInterrupt[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL } +}; +static const REMPARMDESC g_aArgsDisasEnableStepping[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; +static const REMPARMDESC g_aArgsIsPageAccessHandled[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL } +}; + +# ifndef VBOX_USE_BITNESS_SELECTOR + +/* VMM args */ +static const REMPARMDESC g_aArgsAPICUpdatePendingInterrupts[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL } +}; +static const REMPARMDESC g_aArgsAPICGetTpr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL } +}; +static const REMPARMDESC g_aArgsAPICSetTpr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL } +}; +static const REMPARMDESC g_aArgsCPUMGetGuestCpl[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, +}; + +/* CPUMQueryGuestMsr args */ +static const REMPARMDESC g_aArgsCPUMQueryGuestMsr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t *), NULL }, +}; + +/* CPUMSetGuestMsr args */ +static const REMPARMDESC g_aArgsCPUMSetGuestMsr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL }, +}; + +static const REMPARMDESC g_aArgsCPUMGetGuestCpuId[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL } +}; + +static const REMPARMDESC g_aArgsCPUMR3RemEnter[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL } +}; + +static const REMPARMDESC g_aArgsCPUMR3RemLeave[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; + +static const REMPARMDESC g_aArgsCPUMSetChangedFlags[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; + +static const REMPARMDESC g_aArgsCPUMQueryGuestCtxPtr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL } +}; +static const REMPARMDESC g_aArgsCSAMR3MonitorPage[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(CSAMTAG), NULL } +}; +static const REMPARMDESC g_aArgsCSAMR3UnmonitorPage[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(CSAMTAG), NULL } +}; + +static const REMPARMDESC g_aArgsCSAMR3RecordCallAddress[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL } +}; + +# if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)) /* the callbacks are problematic */ +static const REMPARMDESC g_aArgsDBGCRegisterCommands[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PCDBGCCMD), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL } +}; +# endif +static const REMPARMDESC g_aArgsDBGFR3DisasInstrEx[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(VMCPUID), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTSEL), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTGCPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL } +}; +static const REMPARMDESC g_aArgsDBGFR3DisasInstrCurrentLogInternal[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL } +}; +static const REMPARMDESC g_aArgsDBGFR3Info[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PCDBGFINFOHLP), NULL } +}; +static const REMPARMDESC g_aArgsDBGFR3AsSymbolByAddr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTDBGAS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PCDBGFADDRESS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(PRTGCINTPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PRTDBGSYMBOL), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PRTDBGMOD), NULL } +}; +static const REMPARMDESC g_aArgsDBGFR3AddrFromFlat[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PUVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PDBGFADDRESS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTGCUINTPTR), NULL } +}; +static const REMPARMDESC g_aArgsDISInstrToStr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t const *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(DISCPUMODE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PDISCPUSTATE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; +static const REMPARMDESC g_aArgsEMR3FatalError[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(int), NULL } +}; +static const REMPARMDESC g_aArgsEMSetInhibitInterruptsPC[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTGCPTR), NULL } +}; +static const REMPARMDESC g_aArgsHMCanExecuteGuest[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PCPUMCTX), NULL }, +}; +static const REMPARMDESC g_aArgsIOMIOPortRead[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTIOPORT), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsIOMIOPortWrite[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTIOPORT), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsIOMMMIORead[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsIOMMMIOWrite[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsMMR3HeapAlloc[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(MMTAG), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsMMR3HeapAllocZ[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(MMTAG), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsPATMIsPatchGCAddr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTRCUINTPTR), NULL } +}; +static const REMPARMDESC g_aArgsPATMR3QueryOpcode[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTRCPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL } +}; +static const REMPARMDESC g_aArgsPDMGetInterrupt[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL } +}; +static const REMPARMDESC g_aArgsPDMIsaSetIrq[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsPDMR3CritSectInit[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PPDMCRITSECT), NULL }, + /* RT_SRC_POS_DECL */ + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL }, + { REMPARMDESC_FLAGS_ELLIPSIS, 0 } +}; +static const REMPARMDESC g_aArgsPDMCritSectEnter[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PPDMCRITSECT), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(int), NULL } +}; +static const REMPARMDESC g_aArgsPDMCritSectEnterDebug[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PPDMCRITSECT), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(int), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTHCUINTPTR), NULL }, + /* RT_SRC_POS_DECL */ + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsPGMGetGuestMode[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, +}; +static const REMPARMDESC g_aArgsPGMGstGetPage[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PRTGCPHYS), NULL } +}; +static const REMPARMDESC g_aArgsPGMInvalidatePage[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL } +}; +static const REMPARMDESC g_aArgsPGMR3PhysTlbGCPhys2Ptr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL } +}; +static const REMPARMDESC g_aArgsPGM3PhysGrowRange[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PCRTGCPHYS), NULL } +}; +static const REMPARMDESC g_aArgsPGMPhysIsGCPhysValid[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL } +}; +static const REMPARMDESC g_aArgsPGMPhysRead[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL } +}; +static const REMPARMDESC g_aArgsPGMPhysSimpleReadGCPtr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; +static const REMPARMDESC g_aArgsPGMPhysWrite[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL } +}; +static const REMPARMDESC g_aArgsPGMChangeMode[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL } +}; +static const REMPARMDESC g_aArgsPGMFlushTLB[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(bool), NULL } +}; +static const REMPARMDESC g_aArgsPGMR3PhysReadUxx[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL } +}; +static const REMPARMDESC g_aArgsPGMR3PhysWriteU8[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL } +}; +static const REMPARMDESC g_aArgsPGMR3PhysWriteU16[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint16_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL } +}; +static const REMPARMDESC g_aArgsPGMR3PhysWriteU32[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL } +}; +static const REMPARMDESC g_aArgsPGMR3PhysWriteU64[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_GCPHYS, sizeof(RTGCPHYS), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint64_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PGMACCESSORIGIN), NULL } +}; +static const REMPARMDESC g_aArgsRTMemReallocTag[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void*), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsRTMemEfRealloc[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void*), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned int), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsSSMR3GetGCPtr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PRTGCPTR), NULL } +}; +static const REMPARMDESC g_aArgsSSMR3GetMem[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; +static const REMPARMDESC g_aArgsSSMR3GetU32[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t *), NULL } +}; +static const REMPARMDESC g_aArgsSSMR3GetUInt[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PRTUINT), NULL } +}; +static const REMPARMDESC g_aArgsSSMR3PutGCPtr[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCPTR), NULL } +}; +static const REMPARMDESC g_aArgsSSMR3PutMem[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; +static const REMPARMDESC g_aArgsSSMR3PutU32[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, +}; +static const REMPARMDESC g_aArgsSSMR3PutUInt[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(RTUINT), NULL }, +}; + +static const REMPARMDESC g_aArgsSSMIntLiveExecCallback[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, +}; +static REMFNDESC g_SSMIntLiveExecCallback = +{ + "SSMIntLiveExecCallback", NULL, &g_aArgsSSMIntLiveExecCallback[0], RT_ELEMENTS(g_aArgsSSMIntLiveExecCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL +}; + +static const REMPARMDESC g_aArgsSSMIntLiveVoteCallback[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, +}; +static REMFNDESC g_SSMIntLiveVoteCallback = +{ + "SSMIntLiveVoteCallback", NULL, &g_aArgsSSMIntLiveVoteCallback[0], RT_ELEMENTS(g_aArgsSSMIntLiveVoteCallback), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL +}; + +static const REMPARMDESC g_aArgsSSMIntCallback[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, +}; +static REMFNDESC g_SSMIntCallback = +{ + "SSMIntCallback", NULL, &g_aArgsSSMIntCallback[0], RT_ELEMENTS(g_aArgsSSMIntCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL +}; + +static const REMPARMDESC g_aArgsSSMIntLoadExecCallback[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(PSSMHANDLE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, +}; +static REMFNDESC g_SSMIntLoadExecCallback = +{ + "SSMIntLoadExecCallback", NULL, &g_aArgsSSMIntLoadExecCallback[0], RT_ELEMENTS(g_aArgsSSMIntLoadExecCallback), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL +}; +/* Note: don't forget about the handwritten assembly wrapper when changing this! */ +static const REMPARMDESC g_aArgsSSMR3RegisterInternal[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLIVEPREP), &g_SSMIntCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLIVEEXEC), &g_SSMIntLiveExecCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLIVEVOTE), &g_SSMIntLiveVoteCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTSAVEPREP), &g_SSMIntCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTSAVEEXEC), &g_SSMIntCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTSAVEDONE), &g_SSMIntCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLOADPREP), &g_SSMIntCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLOADEXEC), &g_SSMIntLoadExecCallback }, + { REMPARMDESC_FLAGS_PFN, sizeof(PFNSSMINTLOADDONE), &g_SSMIntCallback }, +}; + +static const REMPARMDESC g_aArgsSTAMR3Register[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(STAMTYPE), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(STAMVISIBILITY), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(STAMUNIT), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsSTAMR3Deregister[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, +}; +static const REMPARMDESC g_aArgsTRPMAssertTrap[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(TRPMEVENT), NULL } +}; +static const REMPARMDESC g_aArgsTRPMQueryTrap[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(uint8_t *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(TRPMEVENT *), NULL } +}; +static const REMPARMDESC g_aArgsTRPMSetErrorCode[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINT), NULL } +}; +static const REMPARMDESC g_aArgsTRPMSetFaultAddress[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMCPU), NULL }, + { REMPARMDESC_FLAGS_GCPTR, sizeof(RTGCUINT), NULL } +}; +static const REMPARMDESC g_aArgsVMR3ReqCallWait[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVM), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(VMCPUID), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL } +}; +static const REMPARMDESC g_aArgsVMR3ReqFree[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PVMREQ), NULL } +}; + +/* IPRT args */ +static const REMPARMDESC g_aArgsRTAssertMsg1[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsRTAssertMsg2[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL } +}; +static const REMPARMDESC g_aArgsRTAssertMsg2V[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_VALIST, 0, NULL } +}; +static const REMPARMDESC g_aArgsRTLogGetDefaultInstanceEx[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(uint32_t), NULL } +}; +static const REMPARMDESC g_aArgsRTLogFlags[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL } +}; +static const REMPARMDESC g_aArgsRTLogFlush[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL } +}; +static const REMPARMDESC g_aArgsRTLogLoggerEx[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL } +}; +static const REMPARMDESC g_aArgsRTLogLoggerExV[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(PRTLOGGER), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_VALIST, 0, NULL } +}; +static const REMPARMDESC g_aArgsRTLogPrintf[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL } +}; +static const REMPARMDESC g_aArgsRTMemProtect[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(unsigned), NULL } +}; +static const REMPARMDESC g_aArgsRTStrPrintf[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_ELLIPSIS, 0, NULL } +}; +static const REMPARMDESC g_aArgsRTStrPrintfV[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(char *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const char *), NULL }, + { REMPARMDESC_FLAGS_VALIST, 0, NULL } +}; +static const REMPARMDESC g_aArgsThread[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(RTTHREAD), NULL } +}; + + +/* CRT args */ +static const REMPARMDESC g_aArgsmemcpy[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(const void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; +static const REMPARMDESC g_aArgsmemset[] = +{ + { REMPARMDESC_FLAGS_INT, sizeof(void *), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(int), NULL }, + { REMPARMDESC_FLAGS_INT, sizeof(size_t), NULL } +}; + +# endif /* !VBOX_USE_BITNESS_SELECTOR */ + +/** @} */ + +/** + * Descriptors for the exported functions. + */ +static const REMFNDESC g_aExports[] = +{ /* pszName, (void *)pv, pParams, cParams, fFlags, cb, pvWrapper. */ + { "REMR3Init", (void *)&pfnREMR3Init, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3InitFinalize", (void *)&pfnREMR3InitFinalize, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3Term", (void *)&pfnREMR3Term, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3Reset", (void *)&pfnREMR3Reset, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3Step", (void *)&pfnREMR3Step, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3BreakpointSet", (void *)&pfnREMR3BreakpointSet, &g_aArgsBreakpoint[0], RT_ELEMENTS(g_aArgsBreakpoint), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3BreakpointClear", (void *)&pfnREMR3BreakpointClear, &g_aArgsBreakpoint[0], RT_ELEMENTS(g_aArgsBreakpoint), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3EmulateInstruction", (void *)&pfnREMR3EmulateInstruction, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3Run", (void *)&pfnREMR3Run, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3State", (void *)&pfnREMR3State, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3StateBack", (void *)&pfnREMR3StateBack, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3StateUpdate", (void *)&pfnREMR3StateUpdate, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3A20Set", (void *)&pfnREMR3A20Set, &g_aArgsA20Set[0], RT_ELEMENTS(g_aArgsA20Set), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3ReplayHandlerNotifications", (void *)&pfnREMR3ReplayHandlerNotifications, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyPhysRamRegister", (void *)&pfnREMR3NotifyPhysRamRegister, &g_aArgsNotifyPhysRamRegister[0], RT_ELEMENTS(g_aArgsNotifyPhysRamRegister), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyPhysRamDeregister", (void *)&pfnREMR3NotifyPhysRamDeregister, &g_aArgsNotifyPhysRamDeregister[0], RT_ELEMENTS(g_aArgsNotifyPhysRamDeregister), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyPhysRomRegister", (void *)&pfnREMR3NotifyPhysRomRegister, &g_aArgsNotifyPhysRomRegister[0], RT_ELEMENTS(g_aArgsNotifyPhysRomRegister), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyHandlerPhysicalModify", (void *)&pfnREMR3NotifyHandlerPhysicalModify, &g_aArgsNotifyHandlerPhysicalModify[0], RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalModify), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyHandlerPhysicalRegister", (void *)&pfnREMR3NotifyHandlerPhysicalRegister, &g_aArgsNotifyHandlerPhysicalRegister[0], RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalRegister), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyHandlerPhysicalDeregister", (void *)&pfnREMR3NotifyHandlerPhysicalDeregister, &g_aArgsNotifyHandlerPhysicalDeregister[0], RT_ELEMENTS(g_aArgsNotifyHandlerPhysicalDeregister), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyInterruptSet", (void *)&pfnREMR3NotifyInterruptSet, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyInterruptClear", (void *)&pfnREMR3NotifyInterruptClear, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyTimerPending", (void *)&pfnREMR3NotifyTimerPending, &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyDmaPending", (void *)&pfnREMR3NotifyDmaPending, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyQueuePending", (void *)&pfnREMR3NotifyQueuePending, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyFF", (void *)&pfnREMR3NotifyFF, &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "REMR3NotifyCodePageChanged", (void *)&pfnREMR3NotifyCodePageChanged, &g_aArgsNotifyCodePageChanged[0], RT_ELEMENTS(g_aArgsNotifyCodePageChanged), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3DisasEnableStepping", (void *)&pfnREMR3DisasEnableStepping, &g_aArgsDisasEnableStepping[0], RT_ELEMENTS(g_aArgsDisasEnableStepping), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "REMR3IsPageAccessHandled", (void *)&pfnREMR3IsPageAccessHandled, &g_aArgsIsPageAccessHandled[0], RT_ELEMENTS(g_aArgsIsPageAccessHandled), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL } +}; + +# ifndef VBOX_USE_BITNESS_SELECTOR + +# ifdef VBOX_WITHOUT_REM_LDR_CYCLE +# define VMM_FN(name) NULL +# else +# define VMM_FN(name) (void *)(uintptr_t)& name +# endif + +/** + * Descriptors for the functions imported from VBoxVMM. + */ +static REMFNDESC g_aVMMImports[] = +{ + { "APICUpdatePendingInterrupts", VMM_FN(APICUpdatePendingInterrupts), &g_aArgsAPICUpdatePendingInterrupts[0], RT_ELEMENTS(g_aArgsAPICUpdatePendingInterrupts), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "APICGetTpr", VMM_FN(APICGetTpr), &g_aArgsAPICGetTpr[0], RT_ELEMENTS(g_aArgsAPICGetTpr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "APICSetTpr", VMM_FN(APICSetTpr), &g_aArgsAPICSetTpr[0], RT_ELEMENTS(g_aArgsAPICSetTpr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "CPUMR3RemEnter", VMM_FN(CPUMR3RemEnter), &g_aArgsCPUMR3RemEnter[0], RT_ELEMENTS(g_aArgsCPUMR3RemEnter), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMR3RemLeave", VMM_FN(CPUMR3RemLeave), &g_aArgsCPUMR3RemLeave[0], RT_ELEMENTS(g_aArgsCPUMR3RemLeave), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "CPUMSetChangedFlags", VMM_FN(CPUMSetChangedFlags), &g_aArgsCPUMSetChangedFlags[0], RT_ELEMENTS(g_aArgsCPUMSetChangedFlags), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "CPUMGetGuestCPL", VMM_FN(CPUMGetGuestCPL), &g_aArgsCPUMGetGuestCpl[0], RT_ELEMENTS(g_aArgsCPUMGetGuestCpl), REMFNDESC_FLAGS_RET_INT, sizeof(unsigned), NULL }, + { "CPUMQueryGuestMsr", VMM_FN(CPUMQueryGuestMsr), &g_aArgsCPUMQueryGuestMsr[0], RT_ELEMENTS(g_aArgsCPUMQueryGuestMsr), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL }, + { "CPUMSetGuestMsr", VMM_FN(CPUMSetGuestMsr), &g_aArgsCPUMSetGuestMsr[0], RT_ELEMENTS(g_aArgsCPUMSetGuestMsr), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "CPUMGetGuestCpuId", VMM_FN(CPUMGetGuestCpuId), &g_aArgsCPUMGetGuestCpuId[0], RT_ELEMENTS(g_aArgsCPUMGetGuestCpuId), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "CPUMGetGuestEAX", VMM_FN(CPUMGetGuestEAX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestEBP", VMM_FN(CPUMGetGuestEBP), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestEBX", VMM_FN(CPUMGetGuestEBX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestECX", VMM_FN(CPUMGetGuestECX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestEDI", VMM_FN(CPUMGetGuestEDI), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestEDX", VMM_FN(CPUMGetGuestEDX), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestEIP", VMM_FN(CPUMGetGuestEIP), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestESI", VMM_FN(CPUMGetGuestESI), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestESP", VMM_FN(CPUMGetGuestESP), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "CPUMGetGuestCS", VMM_FN(CPUMGetGuestCS), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTSEL), NULL }, + { "CPUMGetGuestSS", VMM_FN(CPUMGetGuestSS), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTSEL), NULL }, + { "CPUMGetGuestCpuVendor", VMM_FN(CPUMGetGuestCpuVendor), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(CPUMCPUVENDOR), NULL }, + { "CPUMQueryGuestCtxPtr", VMM_FN(CPUMQueryGuestCtxPtr), &g_aArgsCPUMQueryGuestCtxPtr[0], RT_ELEMENTS(g_aArgsCPUMQueryGuestCtxPtr), REMFNDESC_FLAGS_RET_INT, sizeof(PCPUMCTX), NULL }, + { "CSAMR3MonitorPage", VMM_FN(CSAMR3MonitorPage), &g_aArgsCSAMR3MonitorPage[0], RT_ELEMENTS(g_aArgsCSAMR3MonitorPage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "CSAMR3UnmonitorPage", VMM_FN(CSAMR3UnmonitorPage), &g_aArgsCSAMR3UnmonitorPage[0], RT_ELEMENTS(g_aArgsCSAMR3UnmonitorPage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "CSAMR3RecordCallAddress", VMM_FN(CSAMR3RecordCallAddress), &g_aArgsCSAMR3RecordCallAddress[0], RT_ELEMENTS(g_aArgsCSAMR3RecordCallAddress), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, +# if defined(VBOX_WITH_DEBUGGER) && !(defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)) /* the callbacks are problematic */ + { "DBGCRegisterCommands", VMM_FN(DBGCRegisterCommands), &g_aArgsDBGCRegisterCommands[0], RT_ELEMENTS(g_aArgsDBGCRegisterCommands), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, +# endif + { "DBGFR3DisasInstrEx", VMM_FN(DBGFR3DisasInstrEx), &g_aArgsDBGFR3DisasInstrEx[0], RT_ELEMENTS(g_aArgsDBGFR3DisasInstrEx), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "DBGFR3DisasInstrCurrentLogInternal", VMM_FN(DBGFR3DisasInstrCurrentLogInternal), &g_aArgsDBGFR3DisasInstrCurrentLogInternal[0], RT_ELEMENTS(g_aArgsDBGFR3DisasInstrCurrentLogInternal),REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "DBGFR3Info", VMM_FN(DBGFR3Info), &g_aArgsDBGFR3Info[0], RT_ELEMENTS(g_aArgsDBGFR3Info), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "DBGFR3InfoLogRelHlp", VMM_FN(DBGFR3InfoLogRelHlp), NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "DBGFR3AsSymbolByAddr", VMM_FN(DBGFR3AsSymbolByAddr), &g_aArgsDBGFR3AsSymbolByAddr[0], RT_ELEMENTS(g_aArgsDBGFR3AsSymbolByAddr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "DBGFR3AddrFromFlat", VMM_FN(DBGFR3AddrFromFlat), &g_aArgsDBGFR3AddrFromFlat[0], RT_ELEMENTS(g_aArgsDBGFR3AddrFromFlat), REMFNDESC_FLAGS_RET_INT, sizeof(PDBGFADDRESS), NULL }, + { "DISInstrToStr", VMM_FN(DISInstrToStr), &g_aArgsDISInstrToStr[0], RT_ELEMENTS(g_aArgsDISInstrToStr), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "EMR3FatalError", VMM_FN(EMR3FatalError), &g_aArgsEMR3FatalError[0], RT_ELEMENTS(g_aArgsEMR3FatalError), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "EMRemLock", VMM_FN(EMRemLock), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "EMRemUnlock", VMM_FN(EMRemUnlock), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "EMRemIsLockOwner", VMM_FN(EMRemIsLockOwner), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, sizeof(bool), NULL }, + { "EMGetInhibitInterruptsPC", VMM_FN(EMGetInhibitInterruptsPC), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTGCPTR), NULL }, + { "EMSetInhibitInterruptsPC", VMM_FN(EMSetInhibitInterruptsPC), &g_aArgsEMSetInhibitInterruptsPC[0], RT_ELEMENTS(g_aArgsEMSetInhibitInterruptsPC), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "HMIsEnabledNotMacro", VMM_FN(HMIsEnabledNotMacro), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "HMCanExecuteGuest", VMM_FN(HMCanExecuteGuest), &g_aArgsHMCanExecuteGuest[0], RT_ELEMENTS(g_aArgsHMCanExecuteGuest), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "IOMIOPortRead", VMM_FN(IOMIOPortRead), &g_aArgsIOMIOPortRead[0], RT_ELEMENTS(g_aArgsIOMIOPortRead), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "IOMIOPortWrite", VMM_FN(IOMIOPortWrite), &g_aArgsIOMIOPortWrite[0], RT_ELEMENTS(g_aArgsIOMIOPortWrite), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "IOMMMIORead", VMM_FN(IOMMMIORead), &g_aArgsIOMMMIORead[0], RT_ELEMENTS(g_aArgsIOMMMIORead), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "IOMMMIOWrite", VMM_FN(IOMMMIOWrite), &g_aArgsIOMMMIOWrite[0], RT_ELEMENTS(g_aArgsIOMMMIOWrite), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "MMR3HeapAlloc", VMM_FN(MMR3HeapAlloc), &g_aArgsMMR3HeapAlloc[0], RT_ELEMENTS(g_aArgsMMR3HeapAlloc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "MMR3HeapAllocZ", VMM_FN(MMR3HeapAllocZ), &g_aArgsMMR3HeapAllocZ[0], RT_ELEMENTS(g_aArgsMMR3HeapAllocZ), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "MMR3PhysGetRamSize", VMM_FN(MMR3PhysGetRamSize), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL }, + { "PATMIsPatchGCAddr", VMM_FN(PATMIsPatchGCAddr), &g_aArgsPATMIsPatchGCAddr[0], RT_ELEMENTS(g_aArgsPATMIsPatchGCAddr), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "PATMR3QueryOpcode", VMM_FN(PATMR3QueryOpcode), &g_aArgsPATMR3QueryOpcode[0], RT_ELEMENTS(g_aArgsPATMR3QueryOpcode), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PDMR3DmaRun", VMM_FN(PDMR3DmaRun), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "PDMR3CritSectInit", VMM_FN(PDMR3CritSectInit), &g_aArgsPDMR3CritSectInit[0], RT_ELEMENTS(g_aArgsPDMR3CritSectInit), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PDMCritSectEnter", VMM_FN(PDMCritSectEnter), &g_aArgsPDMCritSectEnter[0], RT_ELEMENTS(g_aArgsPDMCritSectEnter), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PDMCritSectLeave", VMM_FN(PDMCritSectLeave), &g_aArgsPTR[0], RT_ELEMENTS(g_aArgsPTR), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, +# ifdef VBOX_STRICT + { "PDMCritSectEnterDebug", VMM_FN(PDMCritSectEnterDebug), &g_aArgsPDMCritSectEnterDebug[0], RT_ELEMENTS(g_aArgsPDMCritSectEnterDebug), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, +# endif + { "PDMGetInterrupt", VMM_FN(PDMGetInterrupt), &g_aArgsPDMGetInterrupt[0], RT_ELEMENTS(g_aArgsPDMGetInterrupt), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PDMIsaSetIrq", VMM_FN(PDMIsaSetIrq), &g_aArgsPDMIsaSetIrq[0], RT_ELEMENTS(g_aArgsPDMIsaSetIrq), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMGetGuestMode", VMM_FN(PGMGetGuestMode), &g_aArgsPGMGetGuestMode[0], RT_ELEMENTS(g_aArgsPGMGetGuestMode), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMGstGetPage", VMM_FN(PGMGstGetPage), &g_aArgsPGMGstGetPage[0], RT_ELEMENTS(g_aArgsPGMGstGetPage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMInvalidatePage", VMM_FN(PGMInvalidatePage), &g_aArgsPGMInvalidatePage[0], RT_ELEMENTS(g_aArgsPGMInvalidatePage), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMPhysIsGCPhysValid", VMM_FN(PGMPhysIsGCPhysValid), &g_aArgsPGMPhysIsGCPhysValid[0], RT_ELEMENTS(g_aArgsPGMPhysIsGCPhysValid), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "PGMPhysIsA20Enabled", VMM_FN(PGMPhysIsA20Enabled), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "PGMPhysRead", VMM_FN(PGMPhysRead), &g_aArgsPGMPhysRead[0], RT_ELEMENTS(g_aArgsPGMPhysRead), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMPhysSimpleReadGCPtr", VMM_FN(PGMPhysSimpleReadGCPtr), &g_aArgsPGMPhysSimpleReadGCPtr[0], RT_ELEMENTS(g_aArgsPGMPhysSimpleReadGCPtr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMPhysWrite", VMM_FN(PGMPhysWrite), &g_aArgsPGMPhysWrite[0], RT_ELEMENTS(g_aArgsPGMPhysWrite), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "PGMChangeMode", VMM_FN(PGMChangeMode), &g_aArgsPGMChangeMode[0], RT_ELEMENTS(g_aArgsPGMChangeMode), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMFlushTLB", VMM_FN(PGMFlushTLB), &g_aArgsPGMFlushTLB[0], RT_ELEMENTS(g_aArgsPGMFlushTLB), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMCr0WpEnabled", VMM_FN(PGMCr0WpEnabled), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "PGMR3PhysReadU8", VMM_FN(PGMR3PhysReadU8), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint8_t), NULL }, + { "PGMR3PhysReadU16", VMM_FN(PGMR3PhysReadU16), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint16_t), NULL }, + { "PGMR3PhysReadU32", VMM_FN(PGMR3PhysReadU32), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint32_t), NULL }, + { "PGMR3PhysReadU64", VMM_FN(PGMR3PhysReadU64), &g_aArgsPGMR3PhysReadUxx[0], RT_ELEMENTS(g_aArgsPGMR3PhysReadUxx), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL }, + { "PGMR3PhysWriteU8", VMM_FN(PGMR3PhysWriteU8), &g_aArgsPGMR3PhysWriteU8[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU8), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "PGMR3PhysWriteU16", VMM_FN(PGMR3PhysWriteU16), &g_aArgsPGMR3PhysWriteU16[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU16), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "PGMR3PhysWriteU32", VMM_FN(PGMR3PhysWriteU32), &g_aArgsPGMR3PhysWriteU32[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU32), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "PGMR3PhysWriteU64", VMM_FN(PGMR3PhysWriteU64), &g_aArgsPGMR3PhysWriteU64[0], RT_ELEMENTS(g_aArgsPGMR3PhysWriteU32), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "PGMR3PhysTlbGCPhys2Ptr", VMM_FN(PGMR3PhysTlbGCPhys2Ptr), &g_aArgsPGMR3PhysTlbGCPhys2Ptr[0], RT_ELEMENTS(g_aArgsPGMR3PhysTlbGCPhys2Ptr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "PGMIsLockOwner", VMM_FN(PGMIsLockOwner), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "SSMR3GetGCPtr", VMM_FN(SSMR3GetGCPtr), &g_aArgsSSMR3GetGCPtr[0], RT_ELEMENTS(g_aArgsSSMR3GetGCPtr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3GetMem", VMM_FN(SSMR3GetMem), &g_aArgsSSMR3GetMem[0], RT_ELEMENTS(g_aArgsSSMR3GetMem), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3GetU32", VMM_FN(SSMR3GetU32), &g_aArgsSSMR3GetU32[0], RT_ELEMENTS(g_aArgsSSMR3GetU32), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3GetUInt", VMM_FN(SSMR3GetUInt), &g_aArgsSSMR3GetUInt[0], RT_ELEMENTS(g_aArgsSSMR3GetUInt), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3PutGCPtr", VMM_FN(SSMR3PutGCPtr), &g_aArgsSSMR3PutGCPtr[0], RT_ELEMENTS(g_aArgsSSMR3PutGCPtr), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3PutMem", VMM_FN(SSMR3PutMem), &g_aArgsSSMR3PutMem[0], RT_ELEMENTS(g_aArgsSSMR3PutMem), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3PutU32", VMM_FN(SSMR3PutU32), &g_aArgsSSMR3PutU32[0], RT_ELEMENTS(g_aArgsSSMR3PutU32), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3PutUInt", VMM_FN(SSMR3PutUInt), &g_aArgsSSMR3PutUInt[0], RT_ELEMENTS(g_aArgsSSMR3PutUInt), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "SSMR3RegisterInternal", VMM_FN(SSMR3RegisterInternal), &g_aArgsSSMR3RegisterInternal[0], RT_ELEMENTS(g_aArgsSSMR3RegisterInternal), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "STAMR3Register", VMM_FN(STAMR3Register), &g_aArgsSTAMR3Register[0], RT_ELEMENTS(g_aArgsSTAMR3Register), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "STAMR3Deregister", VMM_FN(STAMR3Deregister), &g_aArgsSTAMR3Deregister[0], RT_ELEMENTS(g_aArgsSTAMR3Deregister), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "TMCpuTickGet", VMM_FN(TMCpuTickGet), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(uint64_t), NULL }, + { "TMR3NotifySuspend", VMM_FN(TMR3NotifySuspend), &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "TMR3NotifyResume", VMM_FN(TMR3NotifyResume), &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "TMNotifyEndOfExecution", VMM_FN(TMNotifyEndOfExecution), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "TMNotifyStartOfExecution", VMM_FN(TMNotifyStartOfExecution), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "TMTimerPollBool", VMM_FN(TMTimerPollBool), &g_aArgsVMandVMCPU[0], RT_ELEMENTS(g_aArgsVMandVMCPU), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "TMR3TimerQueuesDo", VMM_FN(TMR3TimerQueuesDo), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "TRPMAssertTrap", VMM_FN(TRPMAssertTrap), &g_aArgsTRPMAssertTrap[0], RT_ELEMENTS(g_aArgsTRPMAssertTrap), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "TRPMGetErrorCode", VMM_FN(TRPMGetErrorCode), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTGCUINT), NULL }, + { "TRPMGetFaultAddress", VMM_FN(TRPMGetFaultAddress), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(RTGCUINTPTR),NULL }, + { "TRPMQueryTrap", VMM_FN(TRPMQueryTrap), &g_aArgsTRPMQueryTrap[0], RT_ELEMENTS(g_aArgsTRPMQueryTrap), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "TRPMResetTrap", VMM_FN(TRPMResetTrap), &g_aArgsVMCPU[0], RT_ELEMENTS(g_aArgsVMCPU), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "TRPMSetErrorCode", VMM_FN(TRPMSetErrorCode), &g_aArgsTRPMSetErrorCode[0], RT_ELEMENTS(g_aArgsTRPMSetErrorCode), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "TRPMSetFaultAddress", VMM_FN(TRPMSetFaultAddress), &g_aArgsTRPMSetFaultAddress[0], RT_ELEMENTS(g_aArgsTRPMSetFaultAddress), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "VMMGetCpu", VMM_FN(VMMGetCpu), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(PVMCPU), NULL }, + { "VMR3ReqPriorityCallWait", VMM_FN(VMR3ReqPriorityCallWait), &g_aArgsVMR3ReqCallWait[0], RT_ELEMENTS(g_aArgsVMR3ReqCallWait), REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_ELLIPSIS, sizeof(int), NULL }, + { "VMR3ReqFree", VMM_FN(VMR3ReqFree), &g_aArgsVMR3ReqFree[0], RT_ELEMENTS(g_aArgsVMR3ReqFree), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, +// { "", VMM_FN(), &g_aArgsVM[0], RT_ELEMENTS(g_aArgsVM), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, +}; + + +/** + * Descriptors for the functions imported from VBoxRT. + */ +static REMFNDESC g_aRTImports[] = +{ + { "RTAssertMsg1", (void *)(uintptr_t)&RTAssertMsg1, &g_aArgsRTAssertMsg1[0], RT_ELEMENTS(g_aArgsRTAssertMsg1), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTAssertMsg1Weak", (void *)(uintptr_t)&RTAssertMsg1Weak, &g_aArgsRTAssertMsg1[0], RT_ELEMENTS(g_aArgsRTAssertMsg1), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTAssertMsg2", (void *)(uintptr_t)&RTAssertMsg2, &g_aArgsRTAssertMsg2[0], RT_ELEMENTS(g_aArgsRTAssertMsg2), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL }, + { "RTAssertMsg2V", (void *)(uintptr_t)&RTAssertMsg2V, &g_aArgsRTAssertMsg2V[0], RT_ELEMENTS(g_aArgsRTAssertMsg2V), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_VALIST, 0, NULL }, + { "RTAssertMsg2Weak", (void *)(uintptr_t)&RTAssertMsg2Weak, &g_aArgsRTAssertMsg2[0], RT_ELEMENTS(g_aArgsRTAssertMsg2), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL }, + { "RTAssertShouldPanic", (void *)(uintptr_t)&RTAssertShouldPanic, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(bool), NULL }, + { "RTLogDefaultInstance", (void *)(uintptr_t)&RTLogDefaultInstance, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL }, + { "RTLogRelGetDefaultInstance", (void *)(uintptr_t)&RTLogRelGetDefaultInstance, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL }, + { "RTLogDefaultInstanceEx", (void *)(uintptr_t)&RTLogDefaultInstance, &g_aArgsRTLogGetDefaultInstanceEx[0], RT_ELEMENTS(g_aArgsRTLogGetDefaultInstanceEx), REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL }, + { "RTLogRelGetDefaultInstanceEx", (void *)(uintptr_t)&RTLogRelGetDefaultInstance, &g_aArgsRTLogGetDefaultInstanceEx[0], RT_ELEMENTS(g_aArgsRTLogGetDefaultInstanceEx), REMFNDESC_FLAGS_RET_INT, sizeof(PRTLOGGER), NULL }, + { "RTLogFlags", (void *)(uintptr_t)&RTLogFlags, &g_aArgsRTLogFlags[0], RT_ELEMENTS(g_aArgsRTLogFlags), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "RTLogFlush", (void *)(uintptr_t)&RTLogFlush, &g_aArgsRTLogFlush[0], RT_ELEMENTS(g_aArgsRTLogFlush), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "RTLogLoggerEx", (void *)(uintptr_t)&RTLogLoggerEx, &g_aArgsRTLogLoggerEx[0], RT_ELEMENTS(g_aArgsRTLogLoggerEx), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_ELLIPSIS, 0, NULL }, + { "RTLogLoggerExV", (void *)(uintptr_t)&RTLogLoggerExV, &g_aArgsRTLogLoggerExV[0], RT_ELEMENTS(g_aArgsRTLogLoggerExV), REMFNDESC_FLAGS_RET_VOID | REMFNDESC_FLAGS_VALIST, 0, NULL }, + { "RTLogPrintf", (void *)(uintptr_t)&RTLogPrintf, &g_aArgsRTLogPrintf[0], RT_ELEMENTS(g_aArgsRTLogPrintf), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTLogRelPrintf", (void *)(uintptr_t)&RTLogRelPrintf, &g_aArgsRTLogPrintf[0], RT_ELEMENTS(g_aArgsRTLogPrintf), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTMemAllocTag", (void *)(uintptr_t)&RTMemAllocTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemAllocZTag", (void *)(uintptr_t)&RTMemAllocZTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemReallocTag", (void *)(uintptr_t)&RTMemReallocTag, &g_aArgsRTMemReallocTag[0], RT_ELEMENTS(g_aArgsRTMemReallocTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemExecAllocTag", (void *)(uintptr_t)&RTMemExecAllocTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemExecFree", (void *)(uintptr_t)&RTMemExecFree, &g_aArgsPTR_SIZE_T[0], RT_ELEMENTS(g_aArgsPTR_SIZE_T), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTMemFree", (void *)(uintptr_t)&RTMemFree, &g_aArgsPTR[0], RT_ELEMENTS(g_aArgsPTR), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTMemPageAllocTag", (void *)(uintptr_t)&RTMemPageAllocTag, &g_aArgsSIZE_TTag[0], RT_ELEMENTS(g_aArgsSIZE_TTag), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemPageFree", (void *)(uintptr_t)&RTMemPageFree, &g_aArgsPTR_SIZE_T[0], RT_ELEMENTS(g_aArgsPTR_SIZE_T), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTMemProtect", (void *)(uintptr_t)&RTMemProtect, &g_aArgsRTMemProtect[0], RT_ELEMENTS(g_aArgsRTMemProtect), REMFNDESC_FLAGS_RET_INT, sizeof(int), NULL }, + { "RTMemEfAlloc", (void *)(uintptr_t)&RTMemEfAlloc, &g_aArgsSIZE_TTagLoc[0], RT_ELEMENTS(g_aArgsSIZE_TTagLoc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemEfAllocZ", (void *)(uintptr_t)&RTMemEfAllocZ, &g_aArgsSIZE_TTagLoc[0], RT_ELEMENTS(g_aArgsSIZE_TTagLoc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemEfRealloc", (void *)(uintptr_t)&RTMemEfRealloc, &g_aArgsRTMemEfRealloc[0], RT_ELEMENTS(g_aArgsRTMemEfRealloc), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "RTMemEfFree", (void *)(uintptr_t)&RTMemEfFree, &g_aArgsPTRLoc[0], RT_ELEMENTS(g_aArgsPTRLoc), REMFNDESC_FLAGS_RET_VOID, 0, NULL }, + { "RTStrPrintf", (void *)(uintptr_t)&RTStrPrintf, &g_aArgsRTStrPrintf[0], RT_ELEMENTS(g_aArgsRTStrPrintf), REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_ELLIPSIS, sizeof(size_t), NULL }, + { "RTStrPrintfV", (void *)(uintptr_t)&RTStrPrintfV, &g_aArgsRTStrPrintfV[0], RT_ELEMENTS(g_aArgsRTStrPrintfV), REMFNDESC_FLAGS_RET_INT | REMFNDESC_FLAGS_VALIST, sizeof(size_t), NULL }, + { "RTThreadSelf", (void *)(uintptr_t)&RTThreadSelf, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(RTTHREAD), NULL }, + { "RTThreadNativeSelf", (void *)(uintptr_t)&RTThreadNativeSelf, NULL, 0, REMFNDESC_FLAGS_RET_INT, sizeof(RTNATIVETHREAD), NULL }, + { "RTLockValidatorWriteLockGetCount", (void *)(uintptr_t)&RTLockValidatorWriteLockGetCount, &g_aArgsThread[0], 0, REMFNDESC_FLAGS_RET_INT, sizeof(int32_t), NULL }, +}; + + +/** + * Descriptors for the functions imported from VBoxRT. + */ +static REMFNDESC g_aCRTImports[] = +{ + { "memcpy", (void *)(uintptr_t)&memcpy, &g_aArgsmemcpy[0], RT_ELEMENTS(g_aArgsmemcpy), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL }, + { "memset", (void *)(uintptr_t)&memset, &g_aArgsmemset[0], RT_ELEMENTS(g_aArgsmemset), REMFNDESC_FLAGS_RET_INT, sizeof(void *), NULL } +/* +floor floor +memcpy memcpy +sqrt sqrt +sqrtf sqrtf +*/ +}; + + +# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE) +/** LIFO of read-write-executable memory chunks used for wrappers. */ +static PREMEXECMEM g_pExecMemHead; +# endif +# endif /* !VBOX_USE_BITNESS_SELECTOR */ + + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +# ifndef VBOX_USE_BITNESS_SELECTOR +static int remGenerateExportGlue(PRTUINTPTR pValue, PCREMFNDESC pDesc); + +# ifdef USE_REM_CALLING_CONVENTION_GLUE +DECLASM(int) WrapGCC2MSC0Int(void); DECLASM(int) WrapGCC2MSC0Int_EndProc(void); +DECLASM(int) WrapGCC2MSC1Int(void); DECLASM(int) WrapGCC2MSC1Int_EndProc(void); +DECLASM(int) WrapGCC2MSC2Int(void); DECLASM(int) WrapGCC2MSC2Int_EndProc(void); +DECLASM(int) WrapGCC2MSC3Int(void); DECLASM(int) WrapGCC2MSC3Int_EndProc(void); +DECLASM(int) WrapGCC2MSC4Int(void); DECLASM(int) WrapGCC2MSC4Int_EndProc(void); +DECLASM(int) WrapGCC2MSC5Int(void); DECLASM(int) WrapGCC2MSC5Int_EndProc(void); +DECLASM(int) WrapGCC2MSC6Int(void); DECLASM(int) WrapGCC2MSC6Int_EndProc(void); +DECLASM(int) WrapGCC2MSC7Int(void); DECLASM(int) WrapGCC2MSC7Int_EndProc(void); +DECLASM(int) WrapGCC2MSC8Int(void); DECLASM(int) WrapGCC2MSC8Int_EndProc(void); +DECLASM(int) WrapGCC2MSC9Int(void); DECLASM(int) WrapGCC2MSC9Int_EndProc(void); +DECLASM(int) WrapGCC2MSC10Int(void); DECLASM(int) WrapGCC2MSC10Int_EndProc(void); +DECLASM(int) WrapGCC2MSC11Int(void); DECLASM(int) WrapGCC2MSC11Int_EndProc(void); +DECLASM(int) WrapGCC2MSC12Int(void); DECLASM(int) WrapGCC2MSC12Int_EndProc(void); +DECLASM(int) WrapGCC2MSCVariadictInt(void); DECLASM(int) WrapGCC2MSCVariadictInt_EndProc(void); +DECLASM(int) WrapGCC2MSC_SSMR3RegisterInternal(void); DECLASM(int) WrapGCC2MSC_SSMR3RegisterInternal_EndProc(void); + +DECLASM(int) WrapMSC2GCC0Int(void); DECLASM(int) WrapMSC2GCC0Int_EndProc(void); +DECLASM(int) WrapMSC2GCC1Int(void); DECLASM(int) WrapMSC2GCC1Int_EndProc(void); +DECLASM(int) WrapMSC2GCC2Int(void); DECLASM(int) WrapMSC2GCC2Int_EndProc(void); +DECLASM(int) WrapMSC2GCC3Int(void); DECLASM(int) WrapMSC2GCC3Int_EndProc(void); +DECLASM(int) WrapMSC2GCC4Int(void); DECLASM(int) WrapMSC2GCC4Int_EndProc(void); +DECLASM(int) WrapMSC2GCC5Int(void); DECLASM(int) WrapMSC2GCC5Int_EndProc(void); +DECLASM(int) WrapMSC2GCC6Int(void); DECLASM(int) WrapMSC2GCC6Int_EndProc(void); +DECLASM(int) WrapMSC2GCC7Int(void); DECLASM(int) WrapMSC2GCC7Int_EndProc(void); +DECLASM(int) WrapMSC2GCC8Int(void); DECLASM(int) WrapMSC2GCC8Int_EndProc(void); +DECLASM(int) WrapMSC2GCC9Int(void); DECLASM(int) WrapMSC2GCC9Int_EndProc(void); +# endif + + +# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE) +/** + * Allocates a block of memory for glue code. + * + * The returned memory is padded with INT3s. + * + * @returns Pointer to the allocated memory. + * @param The amount of memory to allocate. + */ +static void *remAllocGlue(size_t cb) +{ + PREMEXECMEM pCur = g_pExecMemHead; + uint32_t cbAligned = (uint32_t)RT_ALIGN_32(cb, 32); + while (pCur) + { + if (pCur->cb - pCur->off >= cbAligned) + { + void *pv = (uint8_t *)pCur + pCur->off; + pCur->off += cbAligned; + return memset(pv, 0xcc, cbAligned); + } + pCur = pCur->pNext; + } + + /* add a new chunk */ + AssertReturn(_64K - RT_ALIGN_Z(sizeof(*pCur), 32) > cbAligned, NULL); + pCur = (PREMEXECMEM)RTMemExecAlloc(_64K); + AssertReturn(pCur, NULL); + pCur->cb = _64K; + pCur->off = RT_ALIGN_32(sizeof(*pCur), 32) + cbAligned; + pCur->pNext = g_pExecMemHead; + g_pExecMemHead = pCur; + return memset((uint8_t *)pCur + RT_ALIGN_Z(sizeof(*pCur), 32), 0xcc, cbAligned); +} +# endif /* USE_REM_CALLING_CONVENTION_GLUE || USE_REM_IMPORT_JUMP_GLUE */ + + +# ifdef USE_REM_CALLING_CONVENTION_GLUE +/** + * Checks if a function is all straight forward integers. + * + * @returns True if it's simple, false if it's bothersome. + * @param pDesc The function descriptor. + */ +static bool remIsFunctionAllInts(PCREMFNDESC pDesc) +{ + if ( ( (pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) != REMFNDESC_FLAGS_RET_INT + || pDesc->cbReturn > sizeof(uint64_t)) + && (pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) != REMFNDESC_FLAGS_RET_VOID) + return false; + unsigned i = pDesc->cParams; + while (i-- > 0) + switch (pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK) + { + case REMPARMDESC_FLAGS_INT: + case REMPARMDESC_FLAGS_GCPTR: + case REMPARMDESC_FLAGS_GCPHYS: + case REMPARMDESC_FLAGS_HCPHYS: + break; + + default: + AssertReleaseMsgFailed(("Invalid param flags %#x for #%d of %s!\n", pDesc->paParams[i].fFlags, i, pDesc->pszName)); + case REMPARMDESC_FLAGS_VALIST: + case REMPARMDESC_FLAGS_ELLIPSIS: + case REMPARMDESC_FLAGS_FLOAT: + case REMPARMDESC_FLAGS_STRUCT: + case REMPARMDESC_FLAGS_PFN: + return false; + } + return true; +} + + +/** + * Checks if the function has an ellipsis (...) argument. + * + * @returns true if it has an ellipsis, otherwise false. + * @param pDesc The function descriptor. + */ +static bool remHasFunctionEllipsis(PCREMFNDESC pDesc) +{ + unsigned i = pDesc->cParams; + while (i-- > 0) + if ((pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK) == REMPARMDESC_FLAGS_ELLIPSIS) + return true; + return false; +} + + +/** + * Checks if the function uses floating point (FP) arguments or return value. + * + * @returns true if it uses floating point, otherwise false. + * @param pDesc The function descriptor. + */ +static bool remIsFunctionUsingFP(PCREMFNDESC pDesc) +{ + if ((pDesc->fFlags & REMFNDESC_FLAGS_RET_TYPE_MASK) == REMFNDESC_FLAGS_RET_FLOAT) + return true; + unsigned i = pDesc->cParams; + while (i-- > 0) + if ((pDesc->paParams[i].fFlags & REMPARMDESC_FLAGS_TYPE_MASK) == REMPARMDESC_FLAGS_FLOAT) + return true; + return false; +} + + +/** @name The export and import fixups. + * @{ */ +# define REM_FIXUP_32_REAL_STUFF UINT32_C(0xdeadbeef) +# define REM_FIXUP_64_REAL_STUFF UINT64_C(0xdeadf00df00ddead) +# define REM_FIXUP_64_DESC UINT64_C(0xdead00010001dead) +# define REM_FIXUP_64_LOG_ENTRY UINT64_C(0xdead00020002dead) +# define REM_FIXUP_64_LOG_EXIT UINT64_C(0xdead00030003dead) +# define REM_FIXUP_64_WRAP_GCC_CB UINT64_C(0xdead00040004dead) +/** @} */ + + +/** + * Entry logger function. + * + * @param pDesc The description. + */ +DECLASM(void) remLogEntry(PCREMFNDESC pDesc) +{ + RTPrintf("calling %s\n", pDesc->pszName); +} + + +/** + * Exit logger function. + * + * @param pDesc The description. + * @param pvRet The return code. + */ +DECLASM(void) remLogExit(PCREMFNDESC pDesc, void *pvRet) +{ + RTPrintf("returning %p from %s\n", pvRet, pDesc->pszName); +} + + +/** + * Creates a wrapper for the specified callback function at run time. + * + * @param pDesc The function descriptor. + * @param pValue Upon entry *pValue contains the address of the function to be wrapped. + * Upon return *pValue contains the address of the wrapper glue function. + * @param iParam The parameter index in the function descriptor (0 based). + * If UINT32_MAX pDesc is the descriptor for *pValue. + */ +DECLASM(void) remWrapGCCCallback(PCREMFNDESC pDesc, PRTUINTPTR pValue, uint32_t iParam) +{ + AssertPtr(pDesc); + AssertPtr(pValue); + + /* + * Simple? + */ + if (!*pValue) + return; + + /* + * Locate the right function descriptor. + */ + if (iParam != UINT32_MAX) + { + AssertRelease(iParam < pDesc->cParams); + pDesc = (PCREMFNDESC)pDesc->paParams[iParam].pvExtra; + AssertPtr(pDesc); + } + + /* + * When we get serious, here is where to insert the hash table lookup. + */ + + /* + * Create a new glue patch. + */ +# ifdef RT_OS_WINDOWS + int rc = remGenerateExportGlue(pValue, pDesc); +# else +# error "port me" +# endif + AssertReleaseRC(rc); + + /* + * Add it to the hash (later) + */ +} + + +/** + * Fixes export glue. + * + * @param pvGlue The glue code. + * @param cb The size of the glue code. + * @param pvExport The address of the export we're wrapping. + * @param pDesc The export descriptor. + */ +static void remGenerateExportGlueFixup(void *pvGlue, size_t cb, uintptr_t uExport, PCREMFNDESC pDesc) +{ + union + { + uint8_t *pu8; + int32_t *pi32; + uint32_t *pu32; + uint64_t *pu64; + void *pv; + } u; + u.pv = pvGlue; + + while (cb >= 4) + { + /** @todo add defines for the fixup constants... */ + if (*u.pu32 == REM_FIXUP_32_REAL_STUFF) + { + /* 32-bit rel jmp/call to real export. */ + *u.pi32 = uExport - (uintptr_t)(u.pi32 + 1); + Assert((uintptr_t)(u.pi32 + 1) + *u.pi32 == uExport); + u.pi32++; + cb -= 4; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_REAL_STUFF) + { + /* 64-bit address to the real export. */ + *u.pu64++ = uExport; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_DESC) + { + /* 64-bit address to the descriptor. */ + *u.pu64++ = (uintptr_t)pDesc; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_WRAP_GCC_CB) + { + /* 64-bit address to the entry logger function. */ + *u.pu64++ = (uintptr_t)remWrapGCCCallback; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_ENTRY) + { + /* 64-bit address to the entry logger function. */ + *u.pu64++ = (uintptr_t)remLogEntry; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_EXIT) + { + /* 64-bit address to the entry logger function. */ + *u.pu64++ = (uintptr_t)remLogExit; + cb -= 8; + continue; + } + + /* move on. */ + u.pu8++; + cb--; + } +} + + +/** + * Fixes import glue. + * + * @param pvGlue The glue code. + * @param cb The size of the glue code. + * @param pDesc The import descriptor. + */ +static void remGenerateImportGlueFixup(void *pvGlue, size_t cb, PCREMFNDESC pDesc) +{ + union + { + uint8_t *pu8; + int32_t *pi32; + uint32_t *pu32; + uint64_t *pu64; + void *pv; + } u; + u.pv = pvGlue; + + while (cb >= 4) + { + if (*u.pu32 == REM_FIXUP_32_REAL_STUFF) + { + /* 32-bit rel jmp/call to real function. */ + *u.pi32 = (uintptr_t)pDesc->pv - (uintptr_t)(u.pi32 + 1); + Assert((uintptr_t)(u.pi32 + 1) + *u.pi32 == (uintptr_t)pDesc->pv); + u.pi32++; + cb -= 4; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_REAL_STUFF) + { + /* 64-bit address to the real function. */ + *u.pu64++ = (uintptr_t)pDesc->pv; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_DESC) + { + /* 64-bit address to the descriptor. */ + *u.pu64++ = (uintptr_t)pDesc; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_WRAP_GCC_CB) + { + /* 64-bit address to the entry logger function. */ + *u.pu64++ = (uintptr_t)remWrapGCCCallback; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_ENTRY) + { + /* 64-bit address to the entry logger function. */ + *u.pu64++ = (uintptr_t)remLogEntry; + cb -= 8; + continue; + } + if (cb >= 8 && *u.pu64 == REM_FIXUP_64_LOG_EXIT) + { + /* 64-bit address to the entry logger function. */ + *u.pu64++ = (uintptr_t)remLogExit; + cb -= 8; + continue; + } + + /* move on. */ + u.pu8++; + cb--; + } +} + +# endif /* USE_REM_CALLING_CONVENTION_GLUE */ + + +/** + * Generate wrapper glue code for an export. + * + * This is only used on win64 when loading a 64-bit linux module. So, on other + * platforms it will not do anything. + * + * @returns VBox status code. + * @param pValue IN: Where to get the address of the function to wrap. + * OUT: Where to store the glue address. + * @param pDesc The export descriptor. + */ +static int remGenerateExportGlue(PRTUINTPTR pValue, PCREMFNDESC pDesc) +{ +# ifdef USE_REM_CALLING_CONVENTION_GLUE + uintptr_t *ppfn = (uintptr_t *)pDesc->pv; + + uintptr_t pfn = 0; /* a little hack for the callback glue */ + if (!ppfn) + ppfn = &pfn; + + if (!*ppfn) + { + if (remIsFunctionAllInts(pDesc)) + { + static const struct { void *pvStart, *pvEnd; } s_aTemplates[] = + { + { (void *)&WrapMSC2GCC0Int, (void *)&WrapMSC2GCC0Int_EndProc }, + { (void *)&WrapMSC2GCC1Int, (void *)&WrapMSC2GCC1Int_EndProc }, + { (void *)&WrapMSC2GCC2Int, (void *)&WrapMSC2GCC2Int_EndProc }, + { (void *)&WrapMSC2GCC3Int, (void *)&WrapMSC2GCC3Int_EndProc }, + { (void *)&WrapMSC2GCC4Int, (void *)&WrapMSC2GCC4Int_EndProc }, + { (void *)&WrapMSC2GCC5Int, (void *)&WrapMSC2GCC5Int_EndProc }, + { (void *)&WrapMSC2GCC6Int, (void *)&WrapMSC2GCC6Int_EndProc }, + { (void *)&WrapMSC2GCC7Int, (void *)&WrapMSC2GCC7Int_EndProc }, + { (void *)&WrapMSC2GCC8Int, (void *)&WrapMSC2GCC8Int_EndProc }, + { (void *)&WrapMSC2GCC9Int, (void *)&WrapMSC2GCC9Int_EndProc }, + }; + const unsigned i = pDesc->cParams; + AssertReleaseMsg(i < RT_ELEMENTS(s_aTemplates), ("%d (%s)\n", i, pDesc->pszName)); + + /* duplicate the patch. */ + const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart; + uint8_t *pb = (uint8_t *)remAllocGlue(cb); + AssertReturn(pb, VERR_NO_MEMORY); + memcpy(pb, s_aTemplates[i].pvStart, cb); + + /* fix it up. */ + remGenerateExportGlueFixup(pb, cb, *pValue, pDesc); + *ppfn = (uintptr_t)pb; + } + else + { + /* custom hacks - it's simpler to make assembly templates than writing a more generic code generator... */ + static const struct { const char *pszName; PFNRT pvStart, pvEnd; } s_aTemplates[] = + { + { "somefunction", (PFNRT)&WrapMSC2GCC9Int, (PFNRT)&WrapMSC2GCC9Int_EndProc }, + }; + unsigned i; + for (i = 0; i < RT_ELEMENTS(s_aTemplates); i++) + if (!strcmp(pDesc->pszName, s_aTemplates[i].pszName)) + break; + AssertReleaseMsgReturn(i < RT_ELEMENTS(s_aTemplates), ("Not implemented! %s\n", pDesc->pszName), VERR_NOT_IMPLEMENTED); + + /* duplicate the patch. */ + const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart; + uint8_t *pb = (uint8_t *)remAllocGlue(cb); + AssertReturn(pb, VERR_NO_MEMORY); + memcpy(pb, s_aTemplates[i].pvStart, cb); + + /* fix it up. */ + remGenerateExportGlueFixup(pb, cb, *pValue, pDesc); + *ppfn = (uintptr_t)pb; + } + } + *pValue = *ppfn; + return VINF_SUCCESS; +# else /* !USE_REM_CALLING_CONVENTION_GLUE */ + return VINF_SUCCESS; +# endif /* !USE_REM_CALLING_CONVENTION_GLUE */ +} + + +/** + * Generate wrapper glue code for an import. + * + * This is only used on win64 when loading a 64-bit linux module. So, on other + * platforms it will simply return the address of the imported function + * without generating any glue code. + * + * @returns VBox status code. + * @param pValue Where to store the glue address. + * @param pDesc The export descriptor. + */ +static int remGenerateImportGlue(PRTUINTPTR pValue, PREMFNDESC pDesc) +{ +# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE) + if (!pDesc->pvWrapper) + { +# ifdef USE_REM_CALLING_CONVENTION_GLUE + if (remIsFunctionAllInts(pDesc)) + { + static const struct { void *pvStart, *pvEnd; } s_aTemplates[] = + { + { (void *)&WrapGCC2MSC0Int, (void *)&WrapGCC2MSC0Int_EndProc }, + { (void *)&WrapGCC2MSC1Int, (void *)&WrapGCC2MSC1Int_EndProc }, + { (void *)&WrapGCC2MSC2Int, (void *)&WrapGCC2MSC2Int_EndProc }, + { (void *)&WrapGCC2MSC3Int, (void *)&WrapGCC2MSC3Int_EndProc }, + { (void *)&WrapGCC2MSC4Int, (void *)&WrapGCC2MSC4Int_EndProc }, + { (void *)&WrapGCC2MSC5Int, (void *)&WrapGCC2MSC5Int_EndProc }, + { (void *)&WrapGCC2MSC6Int, (void *)&WrapGCC2MSC6Int_EndProc }, + { (void *)&WrapGCC2MSC7Int, (void *)&WrapGCC2MSC7Int_EndProc }, + { (void *)&WrapGCC2MSC8Int, (void *)&WrapGCC2MSC8Int_EndProc }, + { (void *)&WrapGCC2MSC9Int, (void *)&WrapGCC2MSC9Int_EndProc }, + { (void *)&WrapGCC2MSC10Int, (void *)&WrapGCC2MSC10Int_EndProc }, + { (void *)&WrapGCC2MSC11Int, (void *)&WrapGCC2MSC11Int_EndProc }, + { (void *)&WrapGCC2MSC12Int, (void *)&WrapGCC2MSC12Int_EndProc } + }; + const unsigned i = pDesc->cParams; + AssertReleaseMsg(i < RT_ELEMENTS(s_aTemplates), ("%d (%s)\n", i, pDesc->pszName)); + + /* duplicate the patch. */ + const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart; + pDesc->pvWrapper = remAllocGlue(cb); + AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY); + memcpy(pDesc->pvWrapper, s_aTemplates[i].pvStart, cb); + + /* fix it up. */ + remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc); + } + else if ( remHasFunctionEllipsis(pDesc) + && !remIsFunctionUsingFP(pDesc)) + { + /* duplicate the patch. */ + const size_t cb = (uintptr_t)&WrapGCC2MSCVariadictInt_EndProc - (uintptr_t)&WrapGCC2MSCVariadictInt; + pDesc->pvWrapper = remAllocGlue(cb); + AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY); + memcpy(pDesc->pvWrapper, (void *)&WrapGCC2MSCVariadictInt, cb); + + /* fix it up. */ + remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc); + } + else + { + /* custom hacks - it's simpler to make assembly templates than writing a more generic code generator... */ + static const struct { const char *pszName; PFNRT pvStart, pvEnd; } s_aTemplates[] = + { + { "SSMR3RegisterInternal", (PFNRT)&WrapGCC2MSC_SSMR3RegisterInternal, (PFNRT)&WrapGCC2MSC_SSMR3RegisterInternal_EndProc }, + }; + unsigned i; + for (i = 0; i < RT_ELEMENTS(s_aTemplates); i++) + if (!strcmp(pDesc->pszName, s_aTemplates[i].pszName)) + break; + AssertReleaseMsgReturn(i < RT_ELEMENTS(s_aTemplates), ("Not implemented! %s\n", pDesc->pszName), VERR_NOT_IMPLEMENTED); + + /* duplicate the patch. */ + const size_t cb = (uintptr_t)s_aTemplates[i].pvEnd - (uintptr_t)s_aTemplates[i].pvStart; + pDesc->pvWrapper = remAllocGlue(cb); + AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY); + memcpy(pDesc->pvWrapper, s_aTemplates[i].pvStart, cb); + + /* fix it up. */ + remGenerateImportGlueFixup((uint8_t *)pDesc->pvWrapper, cb, pDesc); + } +# else /* !USE_REM_CALLING_CONVENTION_GLUE */ + + /* + * Generate a jump patch. + */ + uint8_t *pb; +# ifdef RT_ARCH_AMD64 + pDesc->pvWrapper = pb = (uint8_t *)remAllocGlue(32); + AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY); + /**pb++ = 0xcc;*/ + *pb++ = 0xff; + *pb++ = 0x24; + *pb++ = 0x25; + *(uint32_t *)pb = (uintptr_t)pb + 5; + pb += 5; + *(uint64_t *)pb = (uint64_t)pDesc->pv; +# else + pDesc->pvWrapper = pb = (uint8_t *)remAllocGlue(8); + AssertReturn(pDesc->pvWrapper, VERR_NO_MEMORY); + *pb++ = 0xea; + *(uint32_t *)pb = (uint32_t)pDesc->pv; +# endif +# endif /* !USE_REM_CALLING_CONVENTION_GLUE */ + } + *pValue = (uintptr_t)pDesc->pvWrapper; +# else /* !USE_REM_CALLING_CONVENTION_GLUE */ + *pValue = (uintptr_t)pDesc->pv; +# endif /* !USE_REM_CALLING_CONVENTION_GLUE */ + return VINF_SUCCESS; +} + + +/** + * Resolve an external symbol during RTLdrGetBits(). + * + * @returns iprt status code. + * @param hLdrMod The loader module handle. + * @param pszModule Module name. + * @param pszSymbol Symbol name, NULL if uSymbol should be used. + * @param uSymbol Symbol ordinal, ~0 if pszSymbol should be used. + * @param pValue Where to store the symbol value (address). + * @param pvUser User argument. + */ +static DECLCALLBACK(int) remGetImport(RTLDRMOD hLdrMod, const char *pszModule, const char *pszSymbol, unsigned uSymbol, RTUINTPTR *pValue, void *pvUser) +{ + unsigned i; + for (i = 0; i < RT_ELEMENTS(g_aVMMImports); i++) + if (!strcmp(g_aVMMImports[i].pszName, pszSymbol)) + return remGenerateImportGlue(pValue, &g_aVMMImports[i]); + for (i = 0; i < RT_ELEMENTS(g_aRTImports); i++) + if (!strcmp(g_aRTImports[i].pszName, pszSymbol)) + return remGenerateImportGlue(pValue, &g_aRTImports[i]); + for (i = 0; i < RT_ELEMENTS(g_aCRTImports); i++) + if (!strcmp(g_aCRTImports[i].pszName, pszSymbol)) + return remGenerateImportGlue(pValue, &g_aCRTImports[i]); + LogRel(("Missing REM Import: %s\n", pszSymbol)); +# if 1 + *pValue = 0; + AssertMsgFailed(("%s.%s\n", pszModule, pszSymbol)); + return VERR_SYMBOL_NOT_FOUND; +# else + return remGenerateImportGlue(pValue, &g_aCRTImports[0]); +# endif +} + +/** + * Loads the linux object, resolves all imports and exports. + * + * @returns VBox status code. + */ +static int remLoadLinuxObj(void) +{ + size_t offFilename; + char szPath[RTPATH_MAX]; + int rc = RTPathAppPrivateArch(szPath, sizeof(szPath) - 32); + AssertRCReturn(rc, rc); + offFilename = strlen(szPath); + +# ifdef VBOX_WITHOUT_REM_LDR_CYCLE + /* + * Resolve all the VBoxVMM references. + */ + if (g_ModVMM != NIL_RTLDRMOD) + { + rc = SUPR3HardenedLdrLoadAppPriv("VBoxVMM", &g_ModVMM, RTLDRLOAD_FLAGS_LOCAL, NULL); + AssertRCReturn(rc, rc); + for (size_t i = 0; i < RT_ELEMENTS(g_aVMMImports); i++) + { + rc = RTLdrGetSymbol(g_ModVMM, g_aVMMImports[i].pszName, &g_aVMMImports[i].pv); + AssertLogRelMsgRCReturn(rc, ("RTLdrGetSymbol(VBoxVMM,%s,) -> %Rrc\n", g_aVMMImports[i].pszName, rc), rc); + } + } +# endif + + /* + * Load the VBoxREM2.rel object/DLL. + */ + strcpy(&szPath[offFilename], "/VBoxREM2.rel"); + rc = RTLdrOpen(szPath, 0, RTLDRARCH_HOST, &g_ModREM2); + if (RT_SUCCESS(rc)) + { + g_cbREM2 = RTLdrSize(g_ModREM2); + g_pvREM2 = RTMemExecAlloc(g_cbREM2); + if (g_pvREM2) + { + RTPathChangeToUnixSlashes(szPath, true); +# ifdef DEBUG /* How to load the VBoxREM2.rel symbols into the GNU debugger. */ + RTPrintf("VBoxREMWrapper: (gdb) add-symbol-file %s 0x%p\n", szPath, g_pvREM2); +# endif + LogRel(("REM: Loading %s at 0x%p (%d bytes)\n" + "REM: (gdb) add-symbol-file %s 0x%p\n", + szPath, g_pvREM2, RTLdrSize(g_ModREM2), szPath, g_pvREM2)); + rc = RTLdrGetBits(g_ModREM2, g_pvREM2, (RTUINTPTR)g_pvREM2, remGetImport, NULL); + if (RT_SUCCESS(rc)) + { + /* + * Resolve exports. + */ + unsigned i; + for (i = 0; i < RT_ELEMENTS(g_aExports); i++) + { + RTUINTPTR Value; + rc = RTLdrGetSymbolEx(g_ModREM2, g_pvREM2, (RTUINTPTR)g_pvREM2, UINT32_MAX, g_aExports[i].pszName, &Value); + AssertMsgRC(rc, ("%s rc=%Rrc\n", g_aExports[i].pszName, rc)); + if (RT_FAILURE(rc)) + break; + rc = remGenerateExportGlue(&Value, &g_aExports[i]); + if (RT_FAILURE(rc)) + break; + *(void **)g_aExports[i].pv = (void *)(uintptr_t)Value; + } + return rc; + } + + RTMemExecFree(g_pvREM2, g_cbREM2); + g_pvREM2 = NULL; + } + g_cbREM2 = 0; + RTLdrClose(g_ModREM2); + g_ModREM2 = NIL_RTLDRMOD; + } + LogRel(("REM: failed loading '%s', rc=%Rrc\n", szPath, rc)); + return rc; +} + + +/** + * Unloads the linux object, freeing up all resources (dlls and + * import glue) we allocated during remLoadLinuxObj(). + */ +static void remUnloadLinuxObj(void) +{ + unsigned i; + + /* close modules. */ + RTLdrClose(g_ModREM2); + g_ModREM2 = NIL_RTLDRMOD; + RTMemExecFree(g_pvREM2, g_cbREM2); + g_pvREM2 = NULL; + g_cbREM2 = 0; + + /* clear the pointers. */ + for (i = 0; i < RT_ELEMENTS(g_aExports); i++) + *(void **)g_aExports[i].pv = NULL; +# if defined(USE_REM_CALLING_CONVENTION_GLUE) || defined(USE_REM_IMPORT_JUMP_GLUE) + for (i = 0; i < RT_ELEMENTS(g_aVMMImports); i++) + g_aVMMImports[i].pvWrapper = NULL; + for (i = 0; i < RT_ELEMENTS(g_aRTImports); i++) + g_aRTImports[i].pvWrapper = NULL; + for (i = 0; i < RT_ELEMENTS(g_aCRTImports); i++) + g_aCRTImports[i].pvWrapper = NULL; + + /* free wrapper memory. */ + while (g_pExecMemHead) + { + PREMEXECMEM pCur = g_pExecMemHead; + g_pExecMemHead = pCur->pNext; + memset(pCur, 0xcc, pCur->cb); + RTMemExecFree(pCur, pCur->cb); + } +# endif +} + +# else /* VBOX_USE_BITNESS_SELECTOR */ + +/** + * Checks if 64-bit support is enabled. + * + * @returns true / false. + * @param pVM Pointer to the shared VM structure. + */ +static bool remIs64bitEnabled(PVM pVM) +{ + bool f; + int rc; + +# ifdef VBOX_WITHOUT_REM_LDR_CYCLE + if (g_ModVMM == NIL_RTLDRMOD) + { + rc = SUPR3HardenedLdrLoadAppPriv("VBoxVMM", &g_ModVMM, RTLDRLOAD_FLAGS_LOCAL, NULL); + AssertRCReturn(rc, false); + } + + DECLCALLBACKMEMBER(PCFGMNODE, pfnCFGMR3GetRoot)(PVM); + rc = RTLdrGetSymbol(g_ModVMM, "CFGMR3GetRoot", (void **)&pfnCFGMR3GetRoot); + AssertRCReturn(rc, false); + + DECLCALLBACKMEMBER(PCFGMNODE, pfnCFGMR3GetChild)(PCFGMNODE, const char *); + rc = RTLdrGetSymbol(g_ModVMM, "CFGMR3GetChild", (void **)&pfnCFGMR3GetChild); + AssertRCReturn(rc, false); + + DECLCALLBACKMEMBER(int, pfnCFGMR3QueryBoolDef)(PCFGMNODE, const char *, bool *, bool); + rc = RTLdrGetSymbol(g_ModVMM, "CFGMR3QueryBoolDef", (void **)&pfnCFGMR3QueryBoolDef); + AssertRCReturn(rc, false); + + rc = pfnCFGMR3QueryBoolDef(pfnCFGMR3GetChild(pfnCFGMR3GetRoot(pVM), "REM"), "64bitEnabled", &f, false); +# else + rc = CFGMR3QueryBoolDef(CFGMR3GetChild(CFGMR3GetRoot(pVM), "REM"), "64bitEnabled", &f, false); +# endif + AssertRCReturn(rc, false); + return f; +} + + +/** + * Loads real REM object, resolves all exports (imports are done by native loader). + * + * @returns VBox status code. + */ +static int remLoadProperObj(PVM pVM) +{ + /* + * Load the VBoxREM32/64 object/DLL. + */ + const char *pszModule = remIs64bitEnabled(pVM) ? "VBoxREM64" : "VBoxREM32"; + int rc = SUPR3HardenedLdrLoadAppPriv(pszModule, &g_ModREM2, RTLDRLOAD_FLAGS_LOCAL, NULL); + if (RT_SUCCESS(rc)) + { + LogRel(("REM: %s\n", pszModule)); + + /* + * Resolve exports. + */ + unsigned i; + for (i = 0; i < RT_ELEMENTS(g_aExports); i++) + { + void *pvValue; + rc = RTLdrGetSymbol(g_ModREM2, g_aExports[i].pszName, &pvValue); + AssertLogRelMsgRCBreak(rc, ("%s rc=%Rrc\n", g_aExports[i].pszName, rc)); + *(void **)g_aExports[i].pv = pvValue; + } + } + + return rc; +} + + +/** + * Unloads the real REM object. + */ +static void remUnloadProperObj(void) +{ + /* close module. */ + RTLdrClose(g_ModREM2); + g_ModREM2 = NIL_RTLDRMOD; +} + +# endif /* VBOX_USE_BITNESS_SELECTOR */ +#endif /* USE_REM_STUBS */ + +REMR3DECL(int) REMR3Init(PVM pVM) +{ +#ifdef USE_REM_STUBS + return VINF_SUCCESS; + +#elif defined(VBOX_USE_BITNESS_SELECTOR) + if (!pfnREMR3Init) + { + int rc = remLoadProperObj(pVM); + if (RT_FAILURE(rc)) + return rc; + } + return pfnREMR3Init(pVM); + +#else + if (!pfnREMR3Init) + { + int rc = remLoadLinuxObj(); + if (RT_FAILURE(rc)) + return rc; + } + return pfnREMR3Init(pVM); +#endif +} + +REMR3DECL(int) REMR3InitFinalize(PVM pVM) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3InitFinalize)); + return pfnREMR3InitFinalize(pVM); +#endif +} + +REMR3DECL(int) REMR3Term(PVM pVM) +{ +#ifdef USE_REM_STUBS + return VINF_SUCCESS; + +#elif defined(VBOX_USE_BITNESS_SELECTOR) + int rc; + Assert(VALID_PTR(pfnREMR3Term)); + rc = pfnREMR3Term(pVM); + remUnloadProperObj(); + return rc; + +#else + int rc; + Assert(VALID_PTR(pfnREMR3Term)); + rc = pfnREMR3Term(pVM); + remUnloadLinuxObj(); + return rc; +#endif +} + +REMR3DECL(void) REMR3Reset(PVM pVM) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3Reset)); + pfnREMR3Reset(pVM); +#endif +} + +REMR3DECL(int) REMR3Step(PVM pVM, PVMCPU pVCpu) +{ +#ifdef USE_REM_STUBS + return VERR_NOT_IMPLEMENTED; +#else + Assert(VALID_PTR(pfnREMR3Step)); + return pfnREMR3Step(pVM, pVCpu); +#endif +} + +REMR3DECL(int) REMR3BreakpointSet(PVM pVM, RTGCUINTPTR Address) +{ +#ifdef USE_REM_STUBS + return VERR_REM_NO_MORE_BP_SLOTS; +#else + Assert(VALID_PTR(pfnREMR3BreakpointSet)); + return pfnREMR3BreakpointSet(pVM, Address); +#endif +} + +REMR3DECL(int) REMR3BreakpointClear(PVM pVM, RTGCUINTPTR Address) +{ +#ifdef USE_REM_STUBS + return VERR_NOT_IMPLEMENTED; +#else + Assert(VALID_PTR(pfnREMR3BreakpointClear)); + return pfnREMR3BreakpointClear(pVM, Address); +#endif +} + +REMR3DECL(int) REMR3EmulateInstruction(PVM pVM, PVMCPU pVCpu) +{ +#ifdef USE_REM_STUBS + return VERR_NOT_IMPLEMENTED; +#else + Assert(VALID_PTR(pfnREMR3EmulateInstruction)); + return pfnREMR3EmulateInstruction(pVM, pVCpu); +#endif +} + +REMR3DECL(int) REMR3Run(PVM pVM, PVMCPU pVCpu) +{ +#ifdef USE_REM_STUBS + return VERR_NOT_IMPLEMENTED; +#else + Assert(VALID_PTR(pfnREMR3Run)); + return pfnREMR3Run(pVM, pVCpu); +#endif +} + +REMR3DECL(int) REMR3State(PVM pVM, PVMCPU pVCpu) +{ +#ifdef USE_REM_STUBS + return VERR_NOT_IMPLEMENTED; +#else + Assert(VALID_PTR(pfnREMR3State)); + return pfnREMR3State(pVM, pVCpu); +#endif +} + +REMR3DECL(int) REMR3StateBack(PVM pVM, PVMCPU pVCpu) +{ +#ifdef USE_REM_STUBS + return VERR_NOT_IMPLEMENTED; +#else + Assert(VALID_PTR(pfnREMR3StateBack)); + return pfnREMR3StateBack(pVM, pVCpu); +#endif +} + +REMR3DECL(void) REMR3StateUpdate(PVM pVM, PVMCPU pVCpu) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3StateUpdate)); + pfnREMR3StateUpdate(pVM, pVCpu); +#endif +} + +REMR3DECL(void) REMR3A20Set(PVM pVM, PVMCPU pVCpu, bool fEnable) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3A20Set)); + pfnREMR3A20Set(pVM, pVCpu, fEnable); +#endif +} + +REMR3DECL(void) REMR3ReplayHandlerNotifications(PVM pVM) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3ReplayHandlerNotifications)); + pfnREMR3ReplayHandlerNotifications(pVM); +#endif +} + +REMR3DECL(int) REMR3NotifyCodePageChanged(PVM pVM, PVMCPU pVCpu, RTGCPTR pvCodePage) +{ +#ifdef USE_REM_STUBS + return VINF_SUCCESS; +#else + Assert(VALID_PTR(pfnREMR3NotifyCodePageChanged)); + return pfnREMR3NotifyCodePageChanged(pVM, pVCpu, pvCodePage); +#endif +} + +REMR3DECL(void) REMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, unsigned fFlags) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyPhysRamRegister)); + pfnREMR3NotifyPhysRamRegister(pVM, GCPhys, cb, fFlags); +#endif +} + +REMR3DECL(void) REMR3NotifyPhysRomRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, void *pvCopy, bool fShadow) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyPhysRomRegister)); + pfnREMR3NotifyPhysRomRegister(pVM, GCPhys, cb, pvCopy, fShadow); +#endif +} + +REMR3DECL(void) REMR3NotifyPhysRamDeregister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyPhysRamDeregister)); + pfnREMR3NotifyPhysRamDeregister(pVM, GCPhys, cb); +#endif +} + +REMR3DECL(void) REMR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalRegister)); + pfnREMR3NotifyHandlerPhysicalRegister(pVM, enmKind, GCPhys, cb, fHasHCHandler); +#endif +} + +REMR3DECL(void) REMR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalDeregister)); + pfnREMR3NotifyHandlerPhysicalDeregister(pVM, enmKind, GCPhys, cb, fHasHCHandler, fRestoreAsRAM); +#endif +} + +REMR3DECL(void) REMR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyHandlerPhysicalModify)); + pfnREMR3NotifyHandlerPhysicalModify(pVM, enmKind, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM); +#endif +} + +REMR3DECL(bool) REMR3IsPageAccessHandled(PVM pVM, RTGCPHYS GCPhys) +{ +#ifdef USE_REM_STUBS + return false; +#else + Assert(VALID_PTR(pfnREMR3IsPageAccessHandled)); + return pfnREMR3IsPageAccessHandled(pVM, GCPhys); +#endif +} + +REMR3DECL(int) REMR3DisasEnableStepping(PVM pVM, bool fEnable) +{ +#ifdef USE_REM_STUBS + return VERR_NOT_IMPLEMENTED; +#else + Assert(VALID_PTR(pfnREMR3DisasEnableStepping)); + return pfnREMR3DisasEnableStepping(pVM, fEnable); +#endif +} + +REMR3DECL(void) REMR3NotifyInterruptSet(PVM pVM, PVMCPU pVCpu) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyInterruptSet)); + pfnREMR3NotifyInterruptSet(pVM, pVCpu); +#endif +} + +REMR3DECL(void) REMR3NotifyInterruptClear(PVM pVM, PVMCPU pVCpu) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyInterruptClear)); + pfnREMR3NotifyInterruptClear(pVM, pVCpu); +#endif +} + +REMR3DECL(void) REMR3NotifyTimerPending(PVM pVM, PVMCPU pVCpuDst) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyTimerPending)); + pfnREMR3NotifyTimerPending(pVM, pVCpuDst); +#endif +} + +REMR3DECL(void) REMR3NotifyDmaPending(PVM pVM) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyDmaPending)); + pfnREMR3NotifyDmaPending(pVM); +#endif +} + +REMR3DECL(void) REMR3NotifyQueuePending(PVM pVM) +{ +#ifndef USE_REM_STUBS + Assert(VALID_PTR(pfnREMR3NotifyQueuePending)); + pfnREMR3NotifyQueuePending(pVM); +#endif +} + +REMR3DECL(void) REMR3NotifyFF(PVM pVM) +{ +#ifndef USE_REM_STUBS + /* the timer can call this early on, so don't be picky. */ + if (pfnREMR3NotifyFF) + pfnREMR3NotifyFF(pVM); +#endif +} diff --git a/src/recompiler/VBoxREMWrapperA.asm b/src/recompiler/VBoxREMWrapperA.asm new file mode 100644 index 00000000..a947031f --- /dev/null +++ b/src/recompiler/VBoxREMWrapperA.asm @@ -0,0 +1,912 @@ +; $Id: VBoxREMWrapperA.asm $ +;; @file +; VBoxREM Wrapper, Assembly routines and wrapper Templates. +; + +; +; Copyright (C) 2006-2019 Oracle Corporation +; +; This file is part of VirtualBox Open Source Edition (OSE), as +; available from http://www.virtualbox.org. This file is free software; +; you can redistribute it and/or modify it under the terms of the GNU +; General Public License (GPL) as published by the Free Software +; Foundation, in version 2 as it comes in the "COPYING" file of the +; VirtualBox OSE distribution. VirtualBox OSE is distributed in the +; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. +; + + + + +;******************************************************************************* +;* Header Files * +;******************************************************************************* +%include "iprt/asmdefs.mac" + +%define REM_FIXUP_32_REAL_STUFF 0deadbeefh +%define REM_FIXUP_64_REAL_STUFF 0deadf00df00ddeadh +%define REM_FIXUP_64_DESC 0dead00010001deadh +%define REM_FIXUP_64_LOG_ENTRY 0dead00020002deadh +%define REM_FIXUP_64_LOG_EXIT 0dead00030003deadh +%define REM_FIXUP_64_WRAP_GCC_CB 0dead00040004deadh + +;%define ENTRY_LOGGING 1 +;%define EXIT_LOGGING 1 + + +%ifdef RT_ARCH_AMD64 + ;; + ; 64-bit pushad + %macro MY_PUSHAQ 0 + push rax + push rbx + push rcx + push rdx + push rsi + push rdi + push rbp + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + %endmacro + + ;; + ; 64-bit popad + %macro MY_POPAQ 0 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop rbp + pop rdi + pop rsi + pop rdx + pop rcx + pop rbx + pop rax + %endmacro + + ;; + ; Entry logging + %ifdef ENTRY_LOGGING + %macro LOG_ENTRY 0 + MY_PUSHAQ + push rbp + mov rbp, rsp + and rsp, ~0fh + sub rsp, 20h ; shadow space + + %ifdef RT_OS_WINDOWS + mov rcx, REM_FIXUP_64_DESC + %else + mov rdi, REM_FIXUP_64_DESC + %endif + mov rax, REM_FIXUP_64_LOG_ENTRY + call rax + + leave + MY_POPAQ + %endmacro + %else + %define LOG_ENTRY + %endif + + ;; + ; Exit logging + %ifdef EXIT_LOGGING + %macro LOG_EXIT 0 + MY_PUSHAQ + push rbp + mov rbp, rsp + and rsp, ~0fh + sub rsp, 20h ; shadow space + + %ifdef RT_OS_WINDOWS + mov rdx, rax + mov rcx, REM_FIXUP_64_DESC + %else + mov rsi, eax + mov rdi, REM_FIXUP_64_DESC + %endif + mov rax, REM_FIXUP_64_LOG_EXIT + call rax + + leave + MY_POPAQ + %endmacro + %else + %define LOG_EXIT + %endif + +%else + %define LOG_ENTRY + %define LOG_EXIT +%endif + + +BEGINCODE + +%ifdef RT_OS_WINDOWS + %ifdef RT_ARCH_AMD64 + + +BEGINPROC WrapGCC2MSC0Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC0Int + + +BEGINPROC WrapGCC2MSC1Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC1Int + + +BEGINPROC WrapGCC2MSC2Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC2Int + + +BEGINPROC WrapGCC2MSC3Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC3Int + + +BEGINPROC WrapGCC2MSC4Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC4Int + + +BEGINPROC WrapGCC2MSC5Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 30h + + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC5Int + + +BEGINPROC WrapGCC2MSC6Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 30h + + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC6Int + + +BEGINPROC WrapGCC2MSC7Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 40h + + mov r11, [rbp + 10h] + mov [rsp + 30h], r11 + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC7Int + + +BEGINPROC WrapGCC2MSC8Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 40h + + mov r10, [rbp + 18h] + mov [rsp + 38h], r10 + mov r11, [rbp + 10h] + mov [rsp + 30h], r11 + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC8Int + + +BEGINPROC WrapGCC2MSC9Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 50h + + mov rax, [rbp + 20h] + mov [rsp + 40h], rax + mov r10, [rbp + 18h] + mov [rsp + 38h], r10 + mov r11, [rbp + 10h] + mov [rsp + 30h], r11 + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC9Int + + +BEGINPROC WrapGCC2MSC10Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 50h + + mov r11, [rbp + 28h] + mov [rsp + 48h], r11 + mov rax, [rbp + 20h] + mov [rsp + 40h], rax + mov r10, [rbp + 18h] + mov [rsp + 38h], r10 + mov r11, [rbp + 10h] + mov [rsp + 30h], r11 + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC10Int + + +BEGINPROC WrapGCC2MSC11Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 60h + + mov r10, [rbp + 30h] + mov [rsp + 50h], r10 + mov r11, [rbp + 28h] + mov [rsp + 48h], r11 + mov rax, [rbp + 20h] + mov [rsp + 40h], rax + mov r10, [rbp + 18h] + mov [rsp + 38h], r10 + mov r11, [rbp + 10h] + mov [rsp + 30h], r11 + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC11Int + + +BEGINPROC WrapGCC2MSC12Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 60h + + mov rax, [rbp + 28h] + mov [rsp + 48h], rax + mov r10, [rbp + 30h] + mov [rsp + 50h], r10 + mov r11, [rbp + 28h] + mov [rsp + 48h], r11 + mov rax, [rbp + 20h] + mov [rsp + 40h], rax + mov r10, [rbp + 18h] + mov [rsp + 38h], r10 + mov r11, [rbp + 10h] + mov [rsp + 30h], r11 + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov r8, rdx + mov rdx, rsi + mov rcx, rdi +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC12Int + + + +BEGINPROC WrapGCC2MSCVariadictInt + LOG_ENTRY +%ifdef DEBUG + ; check that there are NO floting point arguments in XMM registers! + or rax, rax + jz .ok + int3 +.ok: +%endif + sub rsp, 28h + mov r11, [rsp + 28h] ; r11 = return address. + mov [rsp + 28h], r9 + mov [rsp + 20h], r8 + mov r9, rcx + mov [rsp + 18h], r9 ; (*) + mov r8, rdx + mov [rsp + 14h], r8 ; (*) + mov rdx, rsi + mov [rsp + 8h], rdx ; (*) + mov rcx, rdi + mov [rsp], rcx ; (*) + mov rsi, r11 ; rsi is preserved by the callee. +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + add rsp, 30h + LOG_EXIT + jmp rsi + ; (*) unconditionally spill the registers, just in case '...' implies weird stuff on MSC. Check this out! +ENDPROC WrapGCC2MSCVariadictInt + + +;; +; Custom template for SSMR3RegisterInternal. +; +; (This is based on the WrapGCC2MSC11Int template.) +; +; @cproto +; +; SSMR3DECL(int) SSMR3RegisterInternal(PVM pVM, const char *pszName, uint32_t u32Instance, uint32_t u32Version, size_t cbGuess, +; PFNSSMINTLIVEPREP pfnLivePrep, PFNSSMINTLIVEEXEC pfnLiveExec, PFNSSMINTLIVEVOTE pfnLiveVote, +; PFNSSMINTSAVEPREP pfnSavePrep, PFNSSMINTSAVEEXEC pfnSaveExec, PFNSSMINTSAVEDONE pfnSaveDone, +; PFNSSMINTLOADPREP pfnLoadPrep, PFNSSMINTLOADEXEC pfnLoadExec, PFNSSMINTLOADDONE pfnLoadDone); +; +; @param pVM rdi 0 +; @param pszName rsi 1 +; @param u32Instance rdx 2 +; @param u32Version rcx 3 +; @param cbGuess r8 4 +; @param pfnLivePrep r9 5 +; @param pfnLiveExec rbp + 10h 6 +; @param pfnLiveVote rbp + 18h 7 +; @param pfnSavePrep rbp + 20h 8 +; @param pfnSaveExec rbp + 28h 9 +; @param pfnSaveDone rbp + 30h 10 +; @param pfnLoadPrep rbp + 38h 11 +; @param pfnLoadExec rbp + 40h 12 +; @param pfnLoadDone rbp + 48h 13 +; +BEGINPROC WrapGCC2MSC_SSMR3RegisterInternal + LOG_ENTRY + push rbp + mov rbp, rsp + + sub rsp, 80h + + mov r10, [rbp + 48h] + mov [rsp + 68h], r10 ; pfnLiveDone + mov r11, [rbp + 40h] + mov [rsp + 60h], r11 ; pfnLiveExec + mov rax, [rbp + 38h] + mov [rsp + 58h], rax ; pfnLivePrep + mov r10, [rbp + 30h] + mov [rsp + 50h], r10 ; pfnLoadDone + mov r11, [rbp + 28h] + mov [rsp + 48h], r11 ; pfnLoadExec + mov rax, [rbp + 20h] + mov [rsp + 40h], rax ; pfnLoadPrep + mov r10, [rbp + 18h] + mov [rsp + 38h], r10 ; pfnSaveDone + mov r11, [rbp + 10h] + mov [rsp + 30h], r11 ; pfnSaveExec + mov [rsp + 28h], r9 ; pfnSavePrep + mov [rsp + 20h], r8 + mov [rsp + 18h], rcx ; -> r9 + mov [rsp + 10h], rdx ; -> r8 + mov [rsp + 08h], rsi ; -> rdx + mov [rsp], rdi ; -> rcx + + ; Now convert the function pointers. Have to setup a new shadow + ; space here since the SSMR3RegisterInternal one is already in use. + sub rsp, 20h + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 28h + 20h] ; pValue + mov r8d, 5 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 30h + 20h] ; pValue + mov r8d, 6 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 38h + 20h] ; pValue + mov r8d, 7 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 40h + 20h] ; pValue + mov r8d, 8 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 48h + 20h] ; pValue + mov r8d, 9 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 50h + 20h] ; pValue + mov r8d, 10 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 58h + 20h] ; pValue + mov r8d, 11 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 60h + 20h] ; pValue + mov r8d, 12 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + mov rcx, REM_FIXUP_64_DESC ; pDesc + lea rdx, [rsp + 68h + 20h] ; pValue + mov r8d, 13 ; iParam + mov rax, REM_FIXUP_64_WRAP_GCC_CB + call rax + + add rsp, 20h + + ; finally do the call. + mov r9, [rsp + 18h] + mov r8, [rsp + 10h] + mov rdx, [rsp + 08h] + mov rcx, [rsp] +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + leave + LOG_EXIT + ret +ENDPROC WrapGCC2MSC_SSMR3RegisterInternal + + +; +; The other way around: +; + + +BEGINPROC WrapMSC2GCC0Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 10h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC0Int + + +BEGINPROC WrapMSC2GCC1Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC1Int + + +BEGINPROC WrapMSC2GCC2Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx +%ifdef USE_DIRECT_CALLS + call $+5+REM_FIXUP_32_REAL_STUFF +%else + mov rax, REM_FIXUP_64_REAL_STUFF + call rax +%endif + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC2Int + + +BEGINPROC WrapMSC2GCC3Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + call $+5+REM_FIXUP_32_REAL_STUFF + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC3Int + + +BEGINPROC WrapMSC2GCC4Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + call $+5+REM_FIXUP_32_REAL_STUFF + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC4Int + + +BEGINPROC WrapMSC2GCC5Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, [rbp + 30h] + call $+5+REM_FIXUP_32_REAL_STUFF + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC5Int + + +BEGINPROC WrapMSC2GCC6Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 20h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, [rbp + 30h] + mov r9, [rbp + 38h] + call $+5+REM_FIXUP_32_REAL_STUFF + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC6Int + + +BEGINPROC WrapMSC2GCC7Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 30h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, [rbp + 30h] + mov r9, [rbp + 38h] + mov r10, [rbp + 40h] + mov [rsp], r10 + call $+5+REM_FIXUP_32_REAL_STUFF + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC7Int + + +BEGINPROC WrapMSC2GCC8Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 30h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, [rbp + 30h] + mov r9, [rbp + 38h] + mov r10, [rbp + 40h] + mov [rsp], r10 + mov r11, [rbp + 48h] + mov [rsp + 8], r11 + call $+5+REM_FIXUP_32_REAL_STUFF + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC8Int + + +BEGINPROC WrapMSC2GCC9Int + LOG_ENTRY + push rbp + mov rbp, rsp + sub rsp, 40h + mov [rbp - 10h], rsi + mov [rbp - 18h], rdi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + mov r8, [rbp + 30h] + mov r9, [rbp + 38h] + mov r10, [rbp + 40h] + mov [rsp], r10 + mov r11, [rbp + 48h] + mov [rsp + 8], r11 + mov rax, [rbp + 50h] + mov [rsp + 10h], rax + call $+5+REM_FIXUP_32_REAL_STUFF + + mov rdi, [rbp - 18h] + mov rsi, [rbp - 10h] + leave + LOG_EXIT + ret +ENDPROC WrapMSC2GCC9Int + + %endif ; RT_ARCH_AMD64 +%endif ; RT_OS_WINDOWS + diff --git a/src/recompiler/VBoxRecompiler.c b/src/recompiler/VBoxRecompiler.c new file mode 100644 index 00000000..fd19df22 --- /dev/null +++ b/src/recompiler/VBoxRecompiler.c @@ -0,0 +1,5481 @@ +/* $Id: VBoxRecompiler.c $ */ +/** @file + * VBox Recompiler - QEMU. + */ + +/* + * Copyright (C) 2006-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/** @page pg_rem REM - Recompiled Execution Manager. + * + * The recompiled exeuction manager (REM) serves the final fallback for guest + * execution, after HM / raw-mode and IEM have given up. + * + * The REM is qemu with a whole bunch of VBox specific customization for + * interfacing with PATM, CSAM, PGM and other components. + * + * @sa @ref grp_rem + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_REM +#include <stdio.h> /* FILE */ +#include "osdep.h" +#include "config.h" +#include "cpu.h" +#include "exec-all.h" +#include "ioport.h" + +#include <VBox/vmm/rem.h> +#include <VBox/vmm/vmapi.h> +#include <VBox/vmm/tm.h> +#include <VBox/vmm/ssm.h> +#include <VBox/vmm/em.h> +#include <VBox/vmm/iem.h> +#include <VBox/vmm/trpm.h> +#include <VBox/vmm/iom.h> +#include <VBox/vmm/mm.h> +#include <VBox/vmm/pgm.h> +#include <VBox/vmm/pdm.h> +#include <VBox/vmm/dbgf.h> +#include <VBox/dbg.h> +#include <VBox/vmm/apic.h> +#include <VBox/vmm/hm.h> +#include <VBox/vmm/patm.h> +#include <VBox/vmm/csam.h> +#include "REMInternal.h" +#include <VBox/vmm/vm.h> +#include <VBox/vmm/uvm.h> +#include <VBox/param.h> +#include <VBox/err.h> + +#include <VBox/log.h> +#include <iprt/alloca.h> +#include <iprt/semaphore.h> +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/thread.h> +#include <iprt/string.h> + +/* Don't wanna include everything. */ +extern void cpu_exec_init_all(uintptr_t tb_size); +extern void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3); +extern void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0); +extern void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4); +extern void tlb_flush_page(CPUX86State *env, target_ulong addr); +extern void tlb_flush(CPUX86State *env, int flush_global); +extern void sync_seg(CPUX86State *env1, int seg_reg, int selector); +extern void sync_ldtr(CPUX86State *env1, int selector); + +#ifdef VBOX_STRICT +ram_addr_t get_phys_page_offset(target_ulong addr); +#endif + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ + +/** Copy 80-bit fpu register at pSrc to pDst. + * This is probably faster than *calling* memcpy. + */ +#define REM_COPY_FPU_REG(pDst, pSrc) \ + do { *(PX86FPUMMX)(pDst) = *(const X86FPUMMX *)(pSrc); } while (0) + +/** How remR3RunLoggingStep operates. */ +#define REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING + + +/** Selector flag shift between qemu and VBox. + * VBox shifts the qemu bits to the right. */ +#define SEL_FLAGS_SHIFT (8) +/** Mask applied to the shifted qemu selector flags to get the attributes VBox + * (VT-x) needs. */ +#define SEL_FLAGS_SMASK UINT32_C(0x1F0FF) + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static DECLCALLBACK(int) remR3Save(PVM pVM, PSSMHANDLE pSSM); +static DECLCALLBACK(int) remR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass); +static DECLCALLBACK(int) remR3LoadDone(PVM pVM, PSSMHANDLE pSSM); +static void remR3StateUpdate(PVM pVM, PVMCPU pVCpu); +static int remR3InitPhysRamSizeAndDirtyMap(PVM pVM, bool fGuarded); + +static uint32_t remR3MMIOReadU8(void *pvEnv, target_phys_addr_t GCPhys); +static uint32_t remR3MMIOReadU16(void *pvEnv, target_phys_addr_t GCPhys); +static uint32_t remR3MMIOReadU32(void *pvEnv, target_phys_addr_t GCPhys); +static void remR3MMIOWriteU8(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32); +static void remR3MMIOWriteU16(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32); +static void remR3MMIOWriteU32(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32); + +static uint32_t remR3HandlerReadU8(void *pvVM, target_phys_addr_t GCPhys); +static uint32_t remR3HandlerReadU16(void *pvVM, target_phys_addr_t GCPhys); +static uint32_t remR3HandlerReadU32(void *pvVM, target_phys_addr_t GCPhys); +static void remR3HandlerWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32); +static void remR3HandlerWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32); +static void remR3HandlerWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32); + +static void remR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM); +static void remR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler); +static void remR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM); + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ + +/** @todo Move stats to REM::s some rainy day we have nothing do to. */ +#ifdef VBOX_WITH_STATISTICS +static STAMPROFILEADV gStatExecuteSingleInstr; +static STAMPROFILEADV gStatCompilationQEmu; +static STAMPROFILEADV gStatRunCodeQEmu; +static STAMPROFILEADV gStatTotalTimeQEmu; +static STAMPROFILEADV gStatTimers; +static STAMPROFILEADV gStatTBLookup; +static STAMPROFILEADV gStatIRQ; +static STAMPROFILEADV gStatRawCheck; +static STAMPROFILEADV gStatMemRead; +static STAMPROFILEADV gStatMemWrite; +static STAMPROFILE gStatGCPhys2HCVirt; +static STAMCOUNTER gStatCpuGetTSC; +static STAMCOUNTER gStatRefuseTFInhibit; +static STAMCOUNTER gStatRefuseVM86; +static STAMCOUNTER gStatRefusePaging; +static STAMCOUNTER gStatRefusePAE; +static STAMCOUNTER gStatRefuseIOPLNot0; +static STAMCOUNTER gStatRefuseIF0; +static STAMCOUNTER gStatRefuseCode16; +static STAMCOUNTER gStatRefuseWP0; +static STAMCOUNTER gStatRefuseRing1or2; +static STAMCOUNTER gStatRefuseCanExecute; +static STAMCOUNTER gaStatRefuseStale[6]; +static STAMCOUNTER gStatREMGDTChange; +static STAMCOUNTER gStatREMIDTChange; +static STAMCOUNTER gStatREMLDTRChange; +static STAMCOUNTER gStatREMTRChange; +static STAMCOUNTER gStatSelOutOfSync[6]; +static STAMCOUNTER gStatSelOutOfSyncStateBack[6]; +static STAMCOUNTER gStatFlushTBs; +#endif +/* in exec.c */ +extern uint32_t tlb_flush_count; +extern uint32_t tb_flush_count; +extern uint32_t tb_phys_invalidate_count; + +/* + * Global stuff. + */ + +/** MMIO read callbacks. */ +CPUReadMemoryFunc *g_apfnMMIORead[3] = +{ + remR3MMIOReadU8, + remR3MMIOReadU16, + remR3MMIOReadU32 +}; + +/** MMIO write callbacks. */ +CPUWriteMemoryFunc *g_apfnMMIOWrite[3] = +{ + remR3MMIOWriteU8, + remR3MMIOWriteU16, + remR3MMIOWriteU32 +}; + +/** Handler read callbacks. */ +CPUReadMemoryFunc *g_apfnHandlerRead[3] = +{ + remR3HandlerReadU8, + remR3HandlerReadU16, + remR3HandlerReadU32 +}; + +/** Handler write callbacks. */ +CPUWriteMemoryFunc *g_apfnHandlerWrite[3] = +{ + remR3HandlerWriteU8, + remR3HandlerWriteU16, + remR3HandlerWriteU32 +}; + + +#ifdef VBOX_WITH_DEBUGGER +/* + * Debugger commands. + */ +static FNDBGCCMD remR3CmdDisasEnableStepping;; + +/** '.remstep' arguments. */ +static const DBGCVARDESC g_aArgRemStep[] = +{ + /* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */ + { 0, ~0U, DBGCVAR_CAT_NUMBER, 0, "on/off", "Boolean value/mnemonic indicating the new state." }, +}; + +/** Command descriptors. */ +static const DBGCCMD g_aCmds[] = +{ + { + .pszCmd ="remstep", + .cArgsMin = 0, + .cArgsMax = 1, + .paArgDescs = &g_aArgRemStep[0], + .cArgDescs = RT_ELEMENTS(g_aArgRemStep), + .fFlags = 0, + .pfnHandler = remR3CmdDisasEnableStepping, + .pszSyntax = "[on/off]", + .pszDescription = "Enable or disable the single stepping with logged disassembly. " + "If no arguments show the current state." + } +}; +#endif + +/** Prologue code, must be in lower 4G to simplify jumps to/from generated code. + * @todo huh??? That cannot be the case on the mac... So, this + * point is probably not valid any longer. */ +uint8_t *code_gen_prologue; + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +void remAbort(int rc, const char *pszTip); +extern int testmath(void); + +/* Put them here to avoid unused variable warning. */ +AssertCompile(RT_SIZEOFMEMB(VM, rem.padding) >= RT_SIZEOFMEMB(VM, rem.s)); +#if !defined(IPRT_NO_CRT) && (defined(RT_OS_LINUX) || defined(RT_OS_DARWIN) || defined(RT_OS_WINDOWS)) +//AssertCompileMemberSize(REM, Env, REM_ENV_SIZE); +/* Why did this have to be identical?? */ +AssertCompile(RT_SIZEOFMEMB(REM, Env) <= REM_ENV_SIZE); +#else +AssertCompile(RT_SIZEOFMEMB(REM, Env) <= REM_ENV_SIZE); +#endif + + +/** + * Initializes the REM. + * + * @returns VBox status code. + * @param pVM The VM to operate on. + */ +REMR3DECL(int) REMR3Init(PVM pVM) +{ + PREMHANDLERNOTIFICATION pCur; + uint32_t u32Dummy; + int rc; + unsigned i; + +#ifdef VBOX_ENABLE_VBOXREM64 + LogRel(("Using 64-bit aware REM\n")); +#endif + + /* + * Assert sanity. + */ + AssertReleaseMsg(sizeof(pVM->rem.padding) >= sizeof(pVM->rem.s), ("%#x >= %#x; sizeof(Env)=%#x\n", sizeof(pVM->rem.padding), sizeof(pVM->rem.s), sizeof(pVM->rem.s.Env))); + AssertReleaseMsg(sizeof(pVM->rem.s.Env) <= REM_ENV_SIZE, ("%#x == %#x\n", sizeof(pVM->rem.s.Env), REM_ENV_SIZE)); + AssertReleaseMsg(!(RT_UOFFSETOF(VM, rem) & 31), ("off=%#zx\n", RT_UOFFSETOF(VM, rem))); +#if 0 /* just an annoyance at the moment. */ +#if defined(DEBUG) && !defined(RT_OS_SOLARIS) && !defined(RT_OS_FREEBSD) /// @todo fix the solaris and freebsd math stuff. + Assert(!testmath()); +#endif +#endif + + /* + * Init some internal data members. + */ + pVM->rem.s.offVM = RT_UOFFSETOF(VM, rem.s); + pVM->rem.s.Env.pVM = pVM; +#ifdef CPU_RAW_MODE_INIT + pVM->rem.s.state |= CPU_RAW_MODE_INIT; +#endif + + /* + * Initialize the REM critical section. + * + * Note: This is not a 100% safe solution as updating the internal memory state while another VCPU + * is executing code could be dangerous. Taking the REM lock is not an option due to the danger of + * deadlocks. (mostly pgm vs rem locking) + */ + rc = PDMR3CritSectInit(pVM, &pVM->rem.s.CritSectRegister, RT_SRC_POS, "REM-Register"); + AssertRCReturn(rc, rc); + + /* ctx. */ + pVM->rem.s.pCtx = NULL; /* set when executing code. */ + AssertMsg(MMR3PhysGetRamSize(pVM) == 0, ("Init order has changed! REM depends on notification about ALL physical memory registrations\n")); + + /* ignore all notifications */ + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + code_gen_prologue = RTMemExecAlloc(_1K); + AssertLogRelReturn(code_gen_prologue, VERR_NO_MEMORY); + + cpu_exec_init_all(0); + + /* + * Init the recompiler. + */ + if (!cpu_x86_init(&pVM->rem.s.Env, "vbox")) + { + AssertMsgFailed(("cpu_x86_init failed - impossible!\n")); + return VERR_GENERAL_FAILURE; + } + PVMCPU pVCpu = VMMGetCpu(pVM); + CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features); + CPUMGetGuestCpuId(pVCpu, 0x80000001, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext3_features, &pVM->rem.s.Env.cpuid_ext2_features); + + EMRemLock(pVM); + cpu_reset(&pVM->rem.s.Env); + EMRemUnlock(pVM); + + /* allocate code buffer for single instruction emulation. */ + pVM->rem.s.Env.cbCodeBuffer = 4096; + pVM->rem.s.Env.pvCodeBuffer = RTMemExecAlloc(pVM->rem.s.Env.cbCodeBuffer); + AssertMsgReturn(pVM->rem.s.Env.pvCodeBuffer, ("Failed to allocate code buffer!\n"), VERR_NO_MEMORY); + + /* Finally, set the cpu_single_env global. */ + cpu_single_env = &pVM->rem.s.Env; + + /* Nothing is pending by default */ + pVM->rem.s.uStateLoadPendingInterrupt = REM_NO_PENDING_IRQ; + + /* + * Register ram types. + */ + pVM->rem.s.iMMIOMemType = cpu_register_io_memory(g_apfnMMIORead, g_apfnMMIOWrite, &pVM->rem.s.Env); + AssertReleaseMsg(pVM->rem.s.iMMIOMemType >= 0, ("pVM->rem.s.iMMIOMemType=%d\n", pVM->rem.s.iMMIOMemType)); + pVM->rem.s.iHandlerMemType = cpu_register_io_memory(g_apfnHandlerRead, g_apfnHandlerWrite, pVM); + AssertReleaseMsg(pVM->rem.s.iHandlerMemType >= 0, ("pVM->rem.s.iHandlerMemType=%d\n", pVM->rem.s.iHandlerMemType)); + Log2(("REM: iMMIOMemType=%d iHandlerMemType=%d\n", pVM->rem.s.iMMIOMemType, pVM->rem.s.iHandlerMemType)); + + /* stop ignoring. */ + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); + + /* + * Register the saved state data unit. + */ + rc = SSMR3RegisterInternal(pVM, "rem", 1, REM_SAVED_STATE_VERSION, sizeof(uint32_t) * 10, + NULL, NULL, NULL, + NULL, remR3Save, NULL, + NULL, remR3Load, remR3LoadDone); + if (RT_FAILURE(rc)) + return rc; + +#ifdef VBOX_WITH_DEBUGGER + /* + * Debugger commands. + */ + static bool fRegisteredCmds = false; + if (!fRegisteredCmds) + { + int rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds)); + if (RT_SUCCESS(rc)) + fRegisteredCmds = true; + } +#endif + +#ifdef VBOX_WITH_STATISTICS + /* + * Statistics. + */ + STAM_REG(pVM, &gStatExecuteSingleInstr, STAMTYPE_PROFILE, "/PROF/REM/SingleInstr",STAMUNIT_TICKS_PER_CALL, "Profiling single instruction emulation."); + STAM_REG(pVM, &gStatCompilationQEmu, STAMTYPE_PROFILE, "/PROF/REM/Compile", STAMUNIT_TICKS_PER_CALL, "Profiling QEmu compilation."); + STAM_REG(pVM, &gStatRunCodeQEmu, STAMTYPE_PROFILE, "/PROF/REM/Runcode", STAMUNIT_TICKS_PER_CALL, "Profiling QEmu code execution."); + STAM_REG(pVM, &gStatTotalTimeQEmu, STAMTYPE_PROFILE, "/PROF/REM/Emulate", STAMUNIT_TICKS_PER_CALL, "Profiling code emulation."); + STAM_REG(pVM, &gStatTimers, STAMTYPE_PROFILE, "/PROF/REM/Timers", STAMUNIT_TICKS_PER_CALL, "Profiling timer queue processing."); + STAM_REG(pVM, &gStatTBLookup, STAMTYPE_PROFILE, "/PROF/REM/TBLookup", STAMUNIT_TICKS_PER_CALL, "Profiling translation block lookup."); + STAM_REG(pVM, &gStatIRQ, STAMTYPE_PROFILE, "/PROF/REM/IRQ", STAMUNIT_TICKS_PER_CALL, "Profiling IRQ delivery."); + STAM_REG(pVM, &gStatRawCheck, STAMTYPE_PROFILE, "/PROF/REM/RawCheck", STAMUNIT_TICKS_PER_CALL, "Profiling remR3CanExecuteRaw calls."); + STAM_REG(pVM, &gStatMemRead, STAMTYPE_PROFILE, "/PROF/REM/MemRead", STAMUNIT_TICKS_PER_CALL, "Profiling memory access."); + STAM_REG(pVM, &gStatMemWrite, STAMTYPE_PROFILE, "/PROF/REM/MemWrite", STAMUNIT_TICKS_PER_CALL, "Profiling memory access."); + STAM_REG(pVM, &gStatGCPhys2HCVirt, STAMTYPE_PROFILE, "/PROF/REM/GCPhys2HCVirt", STAMUNIT_TICKS_PER_CALL, "Profiling memory conversion (PGMR3PhysTlbGCPhys2Ptr)."); + + STAM_REG(pVM, &gStatCpuGetTSC, STAMTYPE_COUNTER, "/REM/CpuGetTSC", STAMUNIT_OCCURENCES, "cpu_get_tsc calls"); + + STAM_REG(pVM, &gStatRefuseTFInhibit, STAMTYPE_COUNTER, "/REM/Refuse/TFInibit", STAMUNIT_OCCURENCES, "Raw mode refused because of TF or irq inhibit"); + STAM_REG(pVM, &gStatRefuseVM86, STAMTYPE_COUNTER, "/REM/Refuse/VM86", STAMUNIT_OCCURENCES, "Raw mode refused because of VM86"); + STAM_REG(pVM, &gStatRefusePaging, STAMTYPE_COUNTER, "/REM/Refuse/Paging", STAMUNIT_OCCURENCES, "Raw mode refused because of disabled paging/pm"); + STAM_REG(pVM, &gStatRefusePAE, STAMTYPE_COUNTER, "/REM/Refuse/PAE", STAMUNIT_OCCURENCES, "Raw mode refused because of PAE"); + STAM_REG(pVM, &gStatRefuseIOPLNot0, STAMTYPE_COUNTER, "/REM/Refuse/IOPLNot0", STAMUNIT_OCCURENCES, "Raw mode refused because of IOPL != 0"); + STAM_REG(pVM, &gStatRefuseIF0, STAMTYPE_COUNTER, "/REM/Refuse/IF0", STAMUNIT_OCCURENCES, "Raw mode refused because of IF=0"); + STAM_REG(pVM, &gStatRefuseCode16, STAMTYPE_COUNTER, "/REM/Refuse/Code16", STAMUNIT_OCCURENCES, "Raw mode refused because of 16 bit code"); + STAM_REG(pVM, &gStatRefuseWP0, STAMTYPE_COUNTER, "/REM/Refuse/WP0", STAMUNIT_OCCURENCES, "Raw mode refused because of WP=0"); + STAM_REG(pVM, &gStatRefuseRing1or2, STAMTYPE_COUNTER, "/REM/Refuse/Ring1or2", STAMUNIT_OCCURENCES, "Raw mode refused because of ring 1/2 execution"); + STAM_REG(pVM, &gStatRefuseCanExecute, STAMTYPE_COUNTER, "/REM/Refuse/CanExecuteRaw", STAMUNIT_OCCURENCES, "Raw mode refused because of cCanExecuteRaw"); + STAM_REG(pVM, &gaStatRefuseStale[R_ES], STAMTYPE_COUNTER, "/REM/Refuse/StaleES", STAMUNIT_OCCURENCES, "Raw mode refused because of stale ES"); + STAM_REG(pVM, &gaStatRefuseStale[R_CS], STAMTYPE_COUNTER, "/REM/Refuse/StaleCS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale CS"); + STAM_REG(pVM, &gaStatRefuseStale[R_SS], STAMTYPE_COUNTER, "/REM/Refuse/StaleSS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale SS"); + STAM_REG(pVM, &gaStatRefuseStale[R_DS], STAMTYPE_COUNTER, "/REM/Refuse/StaleDS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale DS"); + STAM_REG(pVM, &gaStatRefuseStale[R_FS], STAMTYPE_COUNTER, "/REM/Refuse/StaleFS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale FS"); + STAM_REG(pVM, &gaStatRefuseStale[R_GS], STAMTYPE_COUNTER, "/REM/Refuse/StaleGS", STAMUNIT_OCCURENCES, "Raw mode refused because of stale GS"); + STAM_REG(pVM, &gStatFlushTBs, STAMTYPE_COUNTER, "/REM/FlushTB", STAMUNIT_OCCURENCES, "Number of TB flushes"); + + STAM_REG(pVM, &gStatREMGDTChange, STAMTYPE_COUNTER, "/REM/Change/GDTBase", STAMUNIT_OCCURENCES, "GDT base changes"); + STAM_REG(pVM, &gStatREMLDTRChange, STAMTYPE_COUNTER, "/REM/Change/LDTR", STAMUNIT_OCCURENCES, "LDTR changes"); + STAM_REG(pVM, &gStatREMIDTChange, STAMTYPE_COUNTER, "/REM/Change/IDTBase", STAMUNIT_OCCURENCES, "IDT base changes"); + STAM_REG(pVM, &gStatREMTRChange, STAMTYPE_COUNTER, "/REM/Change/TR", STAMUNIT_OCCURENCES, "TR selector changes"); + + STAM_REG(pVM, &gStatSelOutOfSync[0], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/ES", STAMUNIT_OCCURENCES, "ES out of sync"); + STAM_REG(pVM, &gStatSelOutOfSync[1], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/CS", STAMUNIT_OCCURENCES, "CS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSync[2], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/SS", STAMUNIT_OCCURENCES, "SS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSync[3], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/DS", STAMUNIT_OCCURENCES, "DS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSync[4], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/FS", STAMUNIT_OCCURENCES, "FS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSync[5], STAMTYPE_COUNTER, "/REM/State/SelOutOfSync/GS", STAMUNIT_OCCURENCES, "GS out of sync"); + + STAM_REG(pVM, &gStatSelOutOfSyncStateBack[0], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/ES", STAMUNIT_OCCURENCES, "ES out of sync"); + STAM_REG(pVM, &gStatSelOutOfSyncStateBack[1], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/CS", STAMUNIT_OCCURENCES, "CS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSyncStateBack[2], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/SS", STAMUNIT_OCCURENCES, "SS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSyncStateBack[3], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/DS", STAMUNIT_OCCURENCES, "DS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSyncStateBack[4], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/FS", STAMUNIT_OCCURENCES, "FS out of sync"); + STAM_REG(pVM, &gStatSelOutOfSyncStateBack[5], STAMTYPE_COUNTER, "/REM/StateBack/SelOutOfSync/GS", STAMUNIT_OCCURENCES, "GS out of sync"); + + STAM_REG(pVM, &pVM->rem.s.Env.StatTbFlush, STAMTYPE_PROFILE, "/REM/TbFlush", STAMUNIT_TICKS_PER_CALL, "profiling tb_flush()."); +#endif /* VBOX_WITH_STATISTICS */ + AssertCompileMemberAlignment(CPUX86State, StatTbFlush, 4); + AssertCompileMemberAlignment(CPUX86State, StatTbFlush, 8); + + STAM_REL_REG(pVM, &tb_flush_count, STAMTYPE_U32_RESET, "/REM/TbFlushCount", STAMUNIT_OCCURENCES, "tb_flush() calls"); + STAM_REL_REG(pVM, &tb_phys_invalidate_count, STAMTYPE_U32_RESET, "/REM/TbPhysInvldCount", STAMUNIT_OCCURENCES, "tb_phys_invalidate() calls"); + STAM_REL_REG(pVM, &tlb_flush_count, STAMTYPE_U32_RESET, "/REM/TlbFlushCount", STAMUNIT_OCCURENCES, "tlb_flush() calls"); + + +#ifdef DEBUG_ALL_LOGGING + loglevel = ~0; +#endif + + /* + * Init the handler notification lists. + */ + pVM->rem.s.idxPendingList = UINT32_MAX; + pVM->rem.s.idxFreeList = 0; + + for (i = 0 ; i < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications); i++) + { + pCur = &pVM->rem.s.aHandlerNotifications[i]; + pCur->idxNext = i + 1; + pCur->idxSelf = i; + } + pCur->idxNext = UINT32_MAX; /* the last record. */ + + return rc; +} + + +/** + * Finalizes the REM initialization. + * + * This is called after all components, devices and drivers has + * been initialized. Its main purpose it to finish the RAM related + * initialization. + * + * @returns VBox status code. + * + * @param pVM The VM handle. + */ +REMR3DECL(int) REMR3InitFinalize(PVM pVM) +{ + int rc; + + /* + * Ram size & dirty bit map. + */ + Assert(!pVM->rem.s.fGCPhysLastRamFixed); + pVM->rem.s.fGCPhysLastRamFixed = true; +#ifdef RT_STRICT + rc = remR3InitPhysRamSizeAndDirtyMap(pVM, true /* fGuarded */); +#else + rc = remR3InitPhysRamSizeAndDirtyMap(pVM, false /* fGuarded */); +#endif + return rc; +} + +/** + * Initializes ram_list.phys_dirty and ram_list.phys_dirty_size. + * + * @returns VBox status code. + * @param pVM The VM handle. + * @param fGuarded Whether to guard the map. + */ +static int remR3InitPhysRamSizeAndDirtyMap(PVM pVM, bool fGuarded) +{ + int rc = VINF_SUCCESS; + RTGCPHYS cb; + + AssertLogRelReturn(QLIST_EMPTY(&ram_list.blocks), VERR_INTERNAL_ERROR_2); + + cb = pVM->rem.s.GCPhysLastRam + 1; + AssertLogRelMsgReturn(cb > pVM->rem.s.GCPhysLastRam, + ("GCPhysLastRam=%RGp - out of range\n", pVM->rem.s.GCPhysLastRam), + VERR_OUT_OF_RANGE); + + ram_list.phys_dirty_size = cb >> PAGE_SHIFT; + AssertMsg(((RTGCPHYS)ram_list.phys_dirty_size << PAGE_SHIFT) == cb, ("%RGp\n", cb)); + + if (!fGuarded) + { + ram_list.phys_dirty = MMR3HeapAlloc(pVM, MM_TAG_REM, ram_list.phys_dirty_size); + AssertLogRelMsgReturn(ram_list.phys_dirty, ("Failed to allocate %u bytes of dirty page map bytes\n", ram_list.phys_dirty_size), VERR_NO_MEMORY); + } + else + { + /* + * Fill it up the nearest 4GB RAM and leave at least _64KB of guard after it. + */ + uint32_t cbBitmapAligned = RT_ALIGN_32(ram_list.phys_dirty_size, PAGE_SIZE); + uint32_t cbBitmapFull = RT_ALIGN_32(ram_list.phys_dirty_size, (_4G >> PAGE_SHIFT)); + if (cbBitmapFull == cbBitmapAligned) + cbBitmapFull += _4G >> PAGE_SHIFT; + else if (cbBitmapFull - cbBitmapAligned < _64K) + cbBitmapFull += _64K; + + ram_list.phys_dirty = RTMemPageAlloc(cbBitmapFull); + AssertLogRelMsgReturn(ram_list.phys_dirty, ("Failed to allocate %u bytes of dirty page map bytes\n", cbBitmapFull), VERR_NO_MEMORY); + + rc = RTMemProtect(ram_list.phys_dirty + cbBitmapAligned, cbBitmapFull - cbBitmapAligned, RTMEM_PROT_NONE); + if (RT_FAILURE(rc)) + { + RTMemPageFree(ram_list.phys_dirty, cbBitmapFull); + AssertLogRelRCReturn(rc, rc); + } + + ram_list.phys_dirty += cbBitmapAligned - ram_list.phys_dirty_size; + } + + /* initialize it. */ + memset(ram_list.phys_dirty, 0xff, ram_list.phys_dirty_size); + return rc; +} + + +/** + * Terminates the REM. + * + * Termination means cleaning up and freeing all resources, + * the VM it self is at this point powered off or suspended. + * + * @returns VBox status code. + * @param pVM The VM to operate on. + */ +REMR3DECL(int) REMR3Term(PVM pVM) +{ + /* + * Statistics. + */ + STAMR3Deregister(pVM->pUVM, "/PROF/REM/*"); + STAMR3Deregister(pVM->pUVM, "/REM/*"); + + return VINF_SUCCESS; +} + + +/** + * The VM is being reset. + * + * For the REM component this means to call the cpu_reset() and + * reinitialize some state variables. + * + * @param pVM VM handle. + */ +REMR3DECL(void) REMR3Reset(PVM pVM) +{ + EMRemLock(pVM); /* Only pro forma, we're in a rendezvous. */ + + /* + * Reset the REM cpu. + */ + Assert(pVM->rem.s.cIgnoreAll == 0); + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + cpu_reset(&pVM->rem.s.Env); + pVM->rem.s.cInvalidatedPages = 0; + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); + Assert(pVM->rem.s.cIgnoreAll == 0); + + /* Clear raw ring 0 init state */ + pVM->rem.s.Env.state &= ~CPU_RAW_RING0; + + /* Flush the TBs the next time we execute code here. */ + pVM->rem.s.fFlushTBs = true; + + EMRemUnlock(pVM); +} + + +/** + * Execute state save operation. + * + * @returns VBox status code. + * @param pVM VM Handle. + * @param pSSM SSM operation handle. + */ +static DECLCALLBACK(int) remR3Save(PVM pVM, PSSMHANDLE pSSM) +{ + PREM pRem = &pVM->rem.s; + + /* + * Save the required CPU Env bits. + * (Not much because we're never in REM when doing the save.) + */ + LogFlow(("remR3Save:\n")); + Assert(!pRem->fInREM); + SSMR3PutU32(pSSM, pRem->Env.hflags); + SSMR3PutU32(pSSM, ~0); /* separator */ + + /* Remember if we've entered raw mode (vital for ring 1 checks in e.g. iret emulation). */ + SSMR3PutU32(pSSM, !!(pRem->Env.state & CPU_RAW_RING0)); + SSMR3PutU32(pSSM, REM_NO_PENDING_IRQ); + + return SSMR3PutU32(pSSM, ~0); /* terminator */ +} + + +/** + * Execute state load operation. + * + * @returns VBox status code. + * @param pVM VM Handle. + * @param pSSM SSM operation handle. + * @param uVersion Data layout version. + * @param uPass The data pass. + */ +static DECLCALLBACK(int) remR3Load(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + uint32_t u32Dummy; + uint32_t fRawRing0 = false; + uint32_t u32Sep; + uint32_t i; + int rc; + PREM pRem; + + LogFlow(("remR3Load:\n")); + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Validate version. + */ + if ( uVersion != REM_SAVED_STATE_VERSION + && uVersion != REM_SAVED_STATE_VERSION_VER1_6) + { + AssertMsgFailed(("remR3Load: Invalid version uVersion=%d!\n", uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Do a reset to be on the safe side... + */ + REMR3Reset(pVM); + + /* + * Ignore all ignorable notifications. + * (Not doing this will cause serious trouble.) + */ + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + /* + * Load the required CPU Env bits. + * (Not much because we're never in REM when doing the save.) + */ + pRem = &pVM->rem.s; + Assert(!pRem->fInREM); + SSMR3GetU32(pSSM, &pRem->Env.hflags); + if (uVersion == REM_SAVED_STATE_VERSION_VER1_6) + { + /* Redundant REM CPU state has to be loaded, but can be ignored. */ + CPUX86State_Ver16 temp; + SSMR3GetMem(pSSM, &temp, RT_UOFFSETOF(CPUX86State_Ver16, jmp_env)); + } + + rc = SSMR3GetU32(pSSM, &u32Sep); /* separator */ + if (RT_FAILURE(rc)) + return rc; + if (u32Sep != ~0U) + { + AssertMsgFailed(("u32Sep=%#x\n", u32Sep)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + + /* Remember if we've entered raw mode (vital for ring 1 checks in e.g. iret emulation). */ + SSMR3GetUInt(pSSM, &fRawRing0); + if (fRawRing0) + pRem->Env.state |= CPU_RAW_RING0; + + if (uVersion == REM_SAVED_STATE_VERSION_VER1_6) + { + /* + * Load the REM stuff. + */ + /** @todo r=bird: We should just drop all these items, restoring doesn't make + * sense. */ + rc = SSMR3GetU32(pSSM, (uint32_t *)&pRem->cInvalidatedPages); + if (RT_FAILURE(rc)) + return rc; + if (pRem->cInvalidatedPages > RT_ELEMENTS(pRem->aGCPtrInvalidatedPages)) + { + AssertMsgFailed(("cInvalidatedPages=%#x\n", pRem->cInvalidatedPages)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + for (i = 0; i < pRem->cInvalidatedPages; i++) + SSMR3GetGCPtr(pSSM, &pRem->aGCPtrInvalidatedPages[i]); + } + + rc = SSMR3GetUInt(pSSM, &pVM->rem.s.uStateLoadPendingInterrupt); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn( pVM->rem.s.uStateLoadPendingInterrupt == REM_NO_PENDING_IRQ + || pVM->rem.s.uStateLoadPendingInterrupt < 256, + ("uStateLoadPendingInterrupt=%#x\n", pVM->rem.s.uStateLoadPendingInterrupt), + VERR_SSM_UNEXPECTED_DATA); + + /* check the terminator. */ + rc = SSMR3GetU32(pSSM, &u32Sep); + if (RT_FAILURE(rc)) + return rc; + if (u32Sep != ~0U) + { + AssertMsgFailed(("u32Sep=%#x (term)\n", u32Sep)); + return VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + } + + /* + * Get the CPUID features. + */ + PVMCPU pVCpu = VMMGetCpu(pVM); + CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features); + CPUMGetGuestCpuId(pVCpu, 0x80000001, 0, &u32Dummy, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext2_features); + + /* + * Stop ignoring ignorable notifications. + */ + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); + + /* + * Sync the whole CPU state when executing code in the recompiler. + */ + for (i = 0; i < pVM->cCpus; i++) + { + PVMCPU pVCpu = &pVM->aCpus[i]; + CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_ALL); + } + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMINTLOADDONE, + * For pushing misdesigned pending-interrupt mess to TRPM where it belongs. } + */ +static DECLCALLBACK(int) remR3LoadDone(PVM pVM, PSSMHANDLE pSSM) +{ + if (pVM->rem.s.uStateLoadPendingInterrupt != REM_NO_PENDING_IRQ) + { + int rc = TRPMAssertTrap(&pVM->aCpus[0], pVM->rem.s.uStateLoadPendingInterrupt, TRPM_HARDWARE_INT); + AssertLogRelMsgReturn(rc, ("uStateLoadPendingInterrupt=%#x rc=%Rrc\n", pVM->rem.s.uStateLoadPendingInterrupt, rc), rc); + pVM->rem.s.uStateLoadPendingInterrupt = REM_NO_PENDING_IRQ; + } + return VINF_SUCCESS; +} + + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM_RUN + +/** + * Single steps an instruction in recompiled mode. + * + * Before calling this function the REM state needs to be in sync with + * the VM. Call REMR3State() to perform the sync. It's only necessary + * (and permitted) to sync at the first call to REMR3Step()/REMR3Run() + * and after calling REMR3StateBack(). + * + * @returns VBox status code. + * + * @param pVM VM Handle. + * @param pVCpu VMCPU Handle. + */ +REMR3DECL(int) REMR3Step(PVM pVM, PVMCPU pVCpu) +{ + int rc, interrupt_request; + RTGCPTR GCPtrPC; + bool fBp; + + /* + * Lock the REM - we don't wanna have anyone interrupting us + * while stepping - and enabled single stepping. We also ignore + * pending interrupts and suchlike. + */ + interrupt_request = pVM->rem.s.Env.interrupt_request; + Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER))); + pVM->rem.s.Env.interrupt_request = 0; + cpu_single_step(&pVM->rem.s.Env, 1); + + /* + * If we're standing at a breakpoint, that have to be disabled before we start stepping. + */ + GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base; + fBp = !cpu_breakpoint_remove(&pVM->rem.s.Env, GCPtrPC, BP_GDB); + + /* + * Execute and handle the return code. + * We execute without enabling the cpu tick, so on success we'll + * just flip it on and off to make sure it moves + */ + rc = cpu_exec(&pVM->rem.s.Env); + if (rc == EXCP_DEBUG) + { + TMR3NotifyResume(pVM, pVCpu); + TMR3NotifySuspend(pVM, pVCpu); + rc = VINF_EM_DBG_STEPPED; + } + else + { + switch (rc) + { + case EXCP_INTERRUPT: rc = VINF_SUCCESS; break; + case EXCP_HLT: + case EXCP_HALTED: rc = VINF_EM_HALT; break; + case EXCP_RC: + rc = pVM->rem.s.rc; + pVM->rem.s.rc = VERR_INTERNAL_ERROR; + break; + case EXCP_EXECUTE_RAW: + case EXCP_EXECUTE_HM: + /** @todo is it correct? No! */ + rc = VINF_SUCCESS; + break; + default: + AssertReleaseMsgFailed(("This really shouldn't happen, rc=%d!\n", rc)); + rc = VERR_INTERNAL_ERROR; + break; + } + } + + /* + * Restore the stuff we changed to prevent interruption. + * Unlock the REM. + */ + if (fBp) + { + int rc2 = cpu_breakpoint_insert(&pVM->rem.s.Env, GCPtrPC, BP_GDB, NULL); + Assert(rc2 == 0); NOREF(rc2); + } + cpu_single_step(&pVM->rem.s.Env, 0); + pVM->rem.s.Env.interrupt_request = interrupt_request; + + return rc; +} + + +/** + * Set a breakpoint using the REM facilities. + * + * @returns VBox status code. + * @param pVM The VM handle. + * @param Address The breakpoint address. + * @thread The emulation thread. + */ +REMR3DECL(int) REMR3BreakpointSet(PVM pVM, RTGCUINTPTR Address) +{ + VM_ASSERT_EMT(pVM); + if (!cpu_breakpoint_insert(&pVM->rem.s.Env, Address, BP_GDB, NULL)) + { + LogFlow(("REMR3BreakpointSet: Address=%RGv\n", Address)); + return VINF_SUCCESS; + } + LogFlow(("REMR3BreakpointSet: Address=%RGv - failed!\n", Address)); + return VERR_REM_NO_MORE_BP_SLOTS; +} + + +/** + * Clears a breakpoint set by REMR3BreakpointSet(). + * + * @returns VBox status code. + * @param pVM The VM handle. + * @param Address The breakpoint address. + * @thread The emulation thread. + */ +REMR3DECL(int) REMR3BreakpointClear(PVM pVM, RTGCUINTPTR Address) +{ + VM_ASSERT_EMT(pVM); + if (!cpu_breakpoint_remove(&pVM->rem.s.Env, Address, BP_GDB)) + { + LogFlow(("REMR3BreakpointClear: Address=%RGv\n", Address)); + return VINF_SUCCESS; + } + LogFlow(("REMR3BreakpointClear: Address=%RGv - not found!\n", Address)); + return VERR_REM_BP_NOT_FOUND; +} + + +/** + * Emulate an instruction. + * + * This function executes one instruction without letting anyone + * interrupt it. This is intended for being called while being in + * raw mode and thus will take care of all the state syncing between + * REM and the rest. + * + * @returns VBox status code. + * @param pVM VM handle. + * @param pVCpu VMCPU Handle. + */ +REMR3DECL(int) REMR3EmulateInstruction(PVM pVM, PVMCPU pVCpu) +{ + bool fFlushTBs; + + int rc, rc2; + Log2(("REMR3EmulateInstruction: (cs:eip=%04x:%08x)\n", CPUMGetGuestCS(pVCpu), CPUMGetGuestEIP(pVCpu))); + + /* Make sure this flag is set; we might never execute remR3CanExecuteRaw in the AMD-V case. + * CPU_RAW_HM makes sure we never execute interrupt handlers in the recompiler. + */ + if (!VM_IS_RAW_MODE_ENABLED(pVM)) + pVM->rem.s.Env.state |= CPU_RAW_HM; + + /* Skip the TB flush as that's rather expensive and not necessary for single instruction emulation. */ + fFlushTBs = pVM->rem.s.fFlushTBs; + pVM->rem.s.fFlushTBs = false; + + /* + * Sync the state and enable single instruction / single stepping. + */ + rc = REMR3State(pVM, pVCpu); + pVM->rem.s.fFlushTBs = fFlushTBs; + if (RT_SUCCESS(rc)) + { + int interrupt_request = pVM->rem.s.Env.interrupt_request; + Assert(!( interrupt_request + & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER | CPU_INTERRUPT_EXTERNAL_HARD + | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER + | CPU_INTERRUPT_EXTERNAL_DMA))); +#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING + cpu_single_step(&pVM->rem.s.Env, 0); +#endif + Assert(!pVM->rem.s.Env.singlestep_enabled); + + /* + * Now we set the execute single instruction flag and enter the cpu_exec loop. + */ + TMNotifyStartOfExecution(pVCpu); + pVM->rem.s.Env.interrupt_request = CPU_INTERRUPT_SINGLE_INSTR; + rc = cpu_exec(&pVM->rem.s.Env); + TMNotifyEndOfExecution(pVCpu); + switch (rc) + { + /* + * Executed without anything out of the way happening. + */ + case EXCP_SINGLE_INSTR: + rc = VINF_EM_RESCHEDULE; + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_SINGLE_INSTR\n")); + break; + + /* + * If we take a trap or start servicing a pending interrupt, we might end up here. + * (Timer thread or some other thread wishing EMT's attention.) + */ + case EXCP_INTERRUPT: + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_INTERRUPT\n")); + rc = VINF_EM_RESCHEDULE; + break; + + /* + * Single step, we assume! + * If there was a breakpoint there we're fucked now. + */ + case EXCP_DEBUG: + if (pVM->rem.s.Env.watchpoint_hit) + { + /** @todo deal with watchpoints */ + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_DEBUG rc=%Rrc !watchpoint_hit!\n", rc)); + rc = VINF_EM_DBG_BREAKPOINT; + } + else + { + CPUBreakpoint *pBP; + RTGCPTR GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base; + QTAILQ_FOREACH(pBP, &pVM->rem.s.Env.breakpoints, entry) + if (pBP->pc == GCPtrPC) + break; + rc = pBP ? VINF_EM_DBG_BREAKPOINT : VINF_EM_DBG_STEPPED; + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_DEBUG rc=%Rrc pBP=%p GCPtrPC=%RGv\n", rc, pBP, GCPtrPC)); + } + break; + + /* + * hlt instruction. + */ + case EXCP_HLT: + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HLT\n")); + rc = VINF_EM_HALT; + break; + + /* + * The VM has halted. + */ + case EXCP_HALTED: + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_HALTED\n")); + rc = VINF_EM_HALT; + break; + + /* + * Switch to RAW-mode. + */ + case EXCP_EXECUTE_RAW: + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_RAW\n")); + rc = VINF_EM_RESCHEDULE_RAW; + break; + + /* + * Switch to hardware accelerated RAW-mode. + */ + case EXCP_EXECUTE_HM: + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_EXECUTE_HM\n")); + rc = VINF_EM_RESCHEDULE_HM; + break; + + /* + * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error). + */ + case EXCP_RC: + Log2(("REMR3EmulateInstruction: cpu_exec -> EXCP_RC\n")); + rc = pVM->rem.s.rc; + pVM->rem.s.rc = VERR_INTERNAL_ERROR; + break; + + /* + * Figure out the rest when they arrive.... + */ + default: + AssertMsgFailed(("rc=%d\n", rc)); + Log2(("REMR3EmulateInstruction: cpu_exec -> %d\n", rc)); + rc = VINF_EM_RESCHEDULE; + break; + } + + /* + * Switch back the state. + */ + pVM->rem.s.Env.interrupt_request = interrupt_request; + rc2 = REMR3StateBack(pVM, pVCpu); + AssertRC(rc2); + } + + Log2(("REMR3EmulateInstruction: returns %Rrc (cs:eip=%04x:%RGv)\n", + rc, pVM->rem.s.Env.segs[R_CS].selector, (RTGCPTR)pVM->rem.s.Env.eip)); + return rc; +} + + +/** + * Used by REMR3Run to handle the case where CPU_EMULATE_SINGLE_STEP is set. + * + * @returns VBox status code. + * + * @param pVM The VM handle. + * @param pVCpu The Virtual CPU handle. + */ +static int remR3RunLoggingStep(PVM pVM, PVMCPU pVCpu) +{ + int rc; + + Assert(pVM->rem.s.fInREM); +#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING + cpu_single_step(&pVM->rem.s.Env, 1); +#else + Assert(!pVM->rem.s.Env.singlestep_enabled); +#endif + + /* + * Now we set the execute single instruction flag and enter the cpu_exec loop. + */ + for (;;) + { + char szBuf[256]; + + /* + * Log the current registers state and instruction. + */ + remR3StateUpdate(pVM, pVCpu); + DBGFR3Info(pVM->pUVM, "cpumguest", NULL, NULL); + szBuf[0] = '\0'; + rc = DBGFR3DisasInstrEx(pVM->pUVM, + pVCpu->idCpu, + 0, /* Sel */ 0, /* GCPtr */ + DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE, + szBuf, + sizeof(szBuf), + NULL); + if (RT_FAILURE(rc)) + RTStrPrintf(szBuf, sizeof(szBuf), "DBGFR3DisasInstrEx failed with rc=%Rrc\n", rc); + RTLogPrintf("CPU%d: %s\n", pVCpu->idCpu, szBuf); + + /* + * Execute the instruction. + */ + TMNotifyStartOfExecution(pVCpu); + + if ( pVM->rem.s.Env.exception_index < 0 + || pVM->rem.s.Env.exception_index > 256) + pVM->rem.s.Env.exception_index = -1; /** @todo We need to do similar stuff elsewhere, I think. */ + +#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING + pVM->rem.s.Env.interrupt_request = 0; +#else + pVM->rem.s.Env.interrupt_request = CPU_INTERRUPT_SINGLE_INSTR; +#endif + if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_UPDATE_APIC | VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)) + pVM->rem.s.Env.interrupt_request |= CPU_INTERRUPT_HARD; + RTLogPrintf("remR3RunLoggingStep: interrupt_request=%#x halted=%d exception_index=%#x\n", + pVM->rem.s.Env.interrupt_request, + pVM->rem.s.Env.halted, + pVM->rem.s.Env.exception_index + ); + + rc = cpu_exec(&pVM->rem.s.Env); + + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> %#x interrupt_request=%#x halted=%d exception_index=%#x\n", rc, + pVM->rem.s.Env.interrupt_request, + pVM->rem.s.Env.halted, + pVM->rem.s.Env.exception_index + ); + + TMNotifyEndOfExecution(pVCpu); + + switch (rc) + { +#ifndef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING + /* + * The normal exit. + */ + case EXCP_SINGLE_INSTR: + if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK) + && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK)) + continue; + RTLogPrintf("remR3RunLoggingStep: rc=VINF_SUCCESS w/ FFs (%#x/%#RX64)\n", + pVM->fGlobalForcedActions, (uint64_t)pVCpu->fLocalForcedActions); + rc = VINF_SUCCESS; + break; + +#else + /* + * The normal exit, check for breakpoints at PC just to be sure. + */ +#endif + case EXCP_DEBUG: + if (pVM->rem.s.Env.watchpoint_hit) + { + /** @todo deal with watchpoints */ + Log2(("remR3RunLoggingStep: cpu_exec -> EXCP_DEBUG rc=%Rrc !watchpoint_hit!\n", rc)); + rc = VINF_EM_DBG_BREAKPOINT; + } + else + { + CPUBreakpoint *pBP; + RTGCPTR GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base; + QTAILQ_FOREACH(pBP, &pVM->rem.s.Env.breakpoints, entry) + if (pBP->pc == GCPtrPC) + break; + rc = pBP ? VINF_EM_DBG_BREAKPOINT : VINF_EM_DBG_STEPPED; + Log2(("remR3RunLoggingStep: cpu_exec -> EXCP_DEBUG rc=%Rrc pBP=%p GCPtrPC=%RGv\n", rc, pBP, GCPtrPC)); + } +#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING + if (rc == VINF_EM_DBG_STEPPED) + { + if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_REM_MASK) + && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_REM_MASK)) + continue; + + RTLogPrintf("remR3RunLoggingStep: rc=VINF_SUCCESS w/ FFs (%#x/%#RX64)\n", + pVM->fGlobalForcedActions, (uint64_t)pVCpu->fLocalForcedActions); + rc = VINF_SUCCESS; + } +#endif + break; + + /* + * If we take a trap or start servicing a pending interrupt, we might end up here. + * (Timer thread or some other thread wishing EMT's attention.) + */ + case EXCP_INTERRUPT: + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_INTERRUPT rc=VINF_SUCCESS\n"); + rc = VINF_SUCCESS; + break; + + /* + * hlt instruction. + */ + case EXCP_HLT: + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_HLT rc=VINF_EM_HALT\n"); + rc = VINF_EM_HALT; + break; + + /* + * The VM has halted. + */ + case EXCP_HALTED: + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_HALTED rc=VINF_EM_HALT\n"); + rc = VINF_EM_HALT; + break; + + /* + * Switch to RAW-mode. + */ + case EXCP_EXECUTE_RAW: + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_EXECUTE_RAW rc=VINF_EM_RESCHEDULE_RAW\n"); + rc = VINF_EM_RESCHEDULE_RAW; + break; + + /* + * Switch to hardware accelerated RAW-mode. + */ + case EXCP_EXECUTE_HM: + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_EXECUTE_HM rc=VINF_EM_RESCHEDULE_HM\n"); + rc = VINF_EM_RESCHEDULE_HM; + break; + + /* + * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error). + */ + case EXCP_RC: + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> EXCP_RC rc=%Rrc\n", pVM->rem.s.rc); + rc = pVM->rem.s.rc; + pVM->rem.s.rc = VERR_INTERNAL_ERROR; + break; + + /* + * Figure out the rest when they arrive.... + */ + default: + AssertMsgFailed(("rc=%d\n", rc)); + RTLogPrintf("remR3RunLoggingStep: cpu_exec -> %d rc=VINF_EM_RESCHEDULE\n", rc); + rc = VINF_EM_RESCHEDULE; + break; + } + break; + } + +#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING +// cpu_single_step(&pVM->rem.s.Env, 0); +#else + pVM->rem.s.Env.interrupt_request &= ~(CPU_INTERRUPT_SINGLE_INSTR | CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT); +#endif + return rc; +} + + +/** + * Runs code in recompiled mode. + * + * Before calling this function the REM state needs to be in sync with + * the VM. Call REMR3State() to perform the sync. It's only necessary + * (and permitted) to sync at the first call to REMR3Step()/REMR3Run() + * and after calling REMR3StateBack(). + * + * @returns VBox status code. + * + * @param pVM VM Handle. + * @param pVCpu VMCPU Handle. + */ +REMR3DECL(int) REMR3Run(PVM pVM, PVMCPU pVCpu) +{ + int rc; + + if (RT_UNLIKELY(pVM->rem.s.Env.state & CPU_EMULATE_SINGLE_STEP)) + return remR3RunLoggingStep(pVM, pVCpu); + + Assert(pVM->rem.s.fInREM); + Log2(("REMR3Run: (cs:eip=%04x:%RGv)\n", pVM->rem.s.Env.segs[R_CS].selector, (RTGCPTR)pVM->rem.s.Env.eip)); + + TMNotifyStartOfExecution(pVCpu); + rc = cpu_exec(&pVM->rem.s.Env); + TMNotifyEndOfExecution(pVCpu); + switch (rc) + { + /* + * This happens when the execution was interrupted + * by an external event, like pending timers. + */ + case EXCP_INTERRUPT: + Log2(("REMR3Run: cpu_exec -> EXCP_INTERRUPT\n")); + rc = VINF_SUCCESS; + break; + + /* + * hlt instruction. + */ + case EXCP_HLT: + Log2(("REMR3Run: cpu_exec -> EXCP_HLT\n")); + rc = VINF_EM_HALT; + break; + + /* + * The VM has halted. + */ + case EXCP_HALTED: + Log2(("REMR3Run: cpu_exec -> EXCP_HALTED\n")); + rc = VINF_EM_HALT; + break; + + /* + * Breakpoint/single step. + */ + case EXCP_DEBUG: + if (pVM->rem.s.Env.watchpoint_hit) + { + /** @todo deal with watchpoints */ + Log2(("REMR3Run: cpu_exec -> EXCP_DEBUG rc=%Rrc !watchpoint_hit!\n", rc)); + rc = VINF_EM_DBG_BREAKPOINT; + } + else + { + CPUBreakpoint *pBP; + RTGCPTR GCPtrPC = pVM->rem.s.Env.eip + pVM->rem.s.Env.segs[R_CS].base; + QTAILQ_FOREACH(pBP, &pVM->rem.s.Env.breakpoints, entry) + if (pBP->pc == GCPtrPC) + break; + rc = pBP ? VINF_EM_DBG_BREAKPOINT : VINF_EM_DBG_STEPPED; + Log2(("REMR3Run: cpu_exec -> EXCP_DEBUG rc=%Rrc pBP=%p GCPtrPC=%RGv\n", rc, pBP, GCPtrPC)); + } + break; + + /* + * Switch to RAW-mode. + */ + case EXCP_EXECUTE_RAW: + Log2(("REMR3Run: cpu_exec -> EXCP_EXECUTE_RAW pc=%RGv\n", pVM->rem.s.Env.eip)); + rc = VINF_EM_RESCHEDULE_RAW; + break; + + /* + * Switch to hardware accelerated RAW-mode. + */ + case EXCP_EXECUTE_HM: + Log2(("REMR3Run: cpu_exec -> EXCP_EXECUTE_HM\n")); + rc = VINF_EM_RESCHEDULE_HM; + break; + + /* + * An EM RC was raised (VMR3Reset/Suspend/PowerOff/some-fatal-error). + */ + case EXCP_RC: + Log2(("REMR3Run: cpu_exec -> EXCP_RC rc=%Rrc\n", pVM->rem.s.rc)); + rc = pVM->rem.s.rc; + pVM->rem.s.rc = VERR_INTERNAL_ERROR; + break; + + /* + * Figure out the rest when they arrive.... + */ + default: + AssertMsgFailed(("rc=%d\n", rc)); + Log2(("REMR3Run: cpu_exec -> %d\n", rc)); + rc = VINF_SUCCESS; + break; + } + + Log2(("REMR3Run: returns %Rrc (cs:eip=%04x:%RGv)\n", rc, pVM->rem.s.Env.segs[R_CS].selector, (RTGCPTR)pVM->rem.s.Env.eip)); + return rc; +} + + +/** + * Check if the cpu state is suitable for Raw execution. + * + * @returns true if RAW/HWACC mode is ok, false if we should stay in REM. + * + * @param env The CPU env struct. + * @param eip The EIP to check this for (might differ from env->eip). + * @param fFlags hflags OR'ed with IOPL, TF and VM from eflags. + * @param piException Stores EXCP_EXECUTE_RAW/HWACC in case raw mode is supported in this context + * + * @remark This function must be kept in perfect sync with the scheduler in EM.cpp! + */ +bool remR3CanExecuteRaw(CPUX86State *env, RTGCPTR eip, unsigned fFlags, int *piException) +{ + /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */ + /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */ + /* !!! THIS MUST BE IN SYNC WITH emR3Reschedule !!! */ + uint32_t u32CR0; + + /* Update counter. */ + env->pVM->rem.s.cCanExecuteRaw++; + + /* Never when single stepping+logging guest code. */ + if (env->state & CPU_EMULATE_SINGLE_STEP) + return false; + + if (!VM_IS_RAW_MODE_ENABLED(env->pVM)) + { +#ifdef RT_OS_WINDOWS + PCPUMCTX pCtx = alloca(sizeof(*pCtx)); +#else + CPUMCTX Ctx; + PCPUMCTX pCtx = &Ctx; +#endif + /** @todo NEM: scheduling. */ + + env->state |= CPU_RAW_HM; + + /* + * The simple check first... + */ + if (!EMIsHwVirtExecutionEnabled(env->pVM)) + return false; + + /* + * Create partial context for HMCanExecuteGuest. + */ + pCtx->cr0 = env->cr[0]; + pCtx->cr3 = env->cr[3]; + pCtx->cr4 = env->cr[4]; + + pCtx->tr.Sel = env->tr.selector; + pCtx->tr.ValidSel = env->tr.selector; + pCtx->tr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->tr.u64Base = env->tr.base; + pCtx->tr.u32Limit = env->tr.limit; + pCtx->tr.Attr.u = (env->tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->ldtr.Sel = env->ldt.selector; + pCtx->ldtr.ValidSel = env->ldt.selector; + pCtx->ldtr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ldtr.u64Base = env->ldt.base; + pCtx->ldtr.u32Limit = env->ldt.limit; + pCtx->ldtr.Attr.u = (env->ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->idtr.cbIdt = env->idt.limit; + pCtx->idtr.pIdt = env->idt.base; + + pCtx->gdtr.cbGdt = env->gdt.limit; + pCtx->gdtr.pGdt = env->gdt.base; + + pCtx->rsp = env->regs[R_ESP]; + pCtx->rip = env->eip; + + pCtx->eflags.u32 = env->eflags; + + pCtx->cs.Sel = env->segs[R_CS].selector; + pCtx->cs.ValidSel = env->segs[R_CS].selector; + pCtx->cs.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->cs.u64Base = env->segs[R_CS].base; + pCtx->cs.u32Limit = env->segs[R_CS].limit; + pCtx->cs.Attr.u = (env->segs[R_CS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->ds.Sel = env->segs[R_DS].selector; + pCtx->ds.ValidSel = env->segs[R_DS].selector; + pCtx->ds.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ds.u64Base = env->segs[R_DS].base; + pCtx->ds.u32Limit = env->segs[R_DS].limit; + pCtx->ds.Attr.u = (env->segs[R_DS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->es.Sel = env->segs[R_ES].selector; + pCtx->es.ValidSel = env->segs[R_ES].selector; + pCtx->es.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->es.u64Base = env->segs[R_ES].base; + pCtx->es.u32Limit = env->segs[R_ES].limit; + pCtx->es.Attr.u = (env->segs[R_ES].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->fs.Sel = env->segs[R_FS].selector; + pCtx->fs.ValidSel = env->segs[R_FS].selector; + pCtx->fs.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->fs.u64Base = env->segs[R_FS].base; + pCtx->fs.u32Limit = env->segs[R_FS].limit; + pCtx->fs.Attr.u = (env->segs[R_FS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->gs.Sel = env->segs[R_GS].selector; + pCtx->gs.ValidSel = env->segs[R_GS].selector; + pCtx->gs.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->gs.u64Base = env->segs[R_GS].base; + pCtx->gs.u32Limit = env->segs[R_GS].limit; + pCtx->gs.Attr.u = (env->segs[R_GS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->ss.Sel = env->segs[R_SS].selector; + pCtx->ss.ValidSel = env->segs[R_SS].selector; + pCtx->ss.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ss.u64Base = env->segs[R_SS].base; + pCtx->ss.u32Limit = env->segs[R_SS].limit; + pCtx->ss.Attr.u = (env->segs[R_SS].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + + pCtx->msrEFER = env->efer; + + /* + * Hardware accelerated mode: + * Typically only 32-bits protected mode, with paging enabled, code is allowed here. + */ + PVMCPU pVCpu = &env->pVM->aCpus[0]; + if (HMCanExecuteGuest(pVCpu, pCtx)) + { + *piException = EXCP_EXECUTE_HM; + return true; + } + return false; + } + + /* + * Here we only support 16 & 32 bits protected mode ring 3 code that has no IO privileges + * or 32 bits protected mode ring 0 code + * + * The tests are ordered by the likelihood of being true during normal execution. + */ + if (fFlags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) + { + STAM_COUNTER_INC(&gStatRefuseTFInhibit); + Log2(("raw mode refused: fFlags=%#x\n", fFlags)); + return false; + } + +#ifndef VBOX_RAW_V86 + if (fFlags & VM_MASK) { + STAM_COUNTER_INC(&gStatRefuseVM86); + Log2(("raw mode refused: VM_MASK\n")); + return false; + } +#endif + + if (env->state & CPU_EMULATE_SINGLE_INSTR) + { +#ifndef DEBUG_bird + Log2(("raw mode refused: CPU_EMULATE_SINGLE_INSTR\n")); +#endif + return false; + } + + if (env->singlestep_enabled) + { + //Log2(("raw mode refused: Single step\n")); + return false; + } + + if (!QTAILQ_EMPTY(&env->breakpoints)) + { + //Log2(("raw mode refused: Breakpoints\n")); + return false; + } + + if (!QTAILQ_EMPTY(&env->watchpoints)) + { + //Log2(("raw mode refused: Watchpoints\n")); + return false; + } + + u32CR0 = env->cr[0]; + if ((u32CR0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE)) + { + STAM_COUNTER_INC(&gStatRefusePaging); + //Log2(("raw mode refused: %s%s%s\n", (u32CR0 & X86_CR0_PG) ? "" : " !PG", (u32CR0 & X86_CR0_PE) ? "" : " !PE", (u32CR0 & X86_CR0_AM) ? "" : " !AM")); + return false; + } + + if (env->cr[4] & CR4_PAE_MASK) + { + if (!(env->cpuid_features & X86_CPUID_FEATURE_EDX_PAE)) + { + STAM_COUNTER_INC(&gStatRefusePAE); + return false; + } + } + + if (((fFlags >> HF_CPL_SHIFT) & 3) == 3) + { + if (!EMIsRawRing3Enabled(env->pVM)) + return false; + + if (!(env->eflags & IF_MASK)) + { + STAM_COUNTER_INC(&gStatRefuseIF0); + Log2(("raw mode refused: IF (RawR3)\n")); + return false; + } + + if (!(u32CR0 & CR0_WP_MASK) && EMIsRawRing0Enabled(env->pVM)) + { + STAM_COUNTER_INC(&gStatRefuseWP0); + Log2(("raw mode refused: CR0.WP + RawR0\n")); + return false; + } + } + else + { + if (!EMIsRawRing0Enabled(env->pVM)) + return false; + + // Let's start with pure 32 bits ring 0 code first + if ((fFlags & (HF_SS32_MASK | HF_CS32_MASK)) != (HF_SS32_MASK | HF_CS32_MASK)) + { + STAM_COUNTER_INC(&gStatRefuseCode16); + Log2(("raw r0 mode refused: HF_[S|C]S32_MASK fFlags=%#x\n", fFlags)); + return false; + } + + if (EMIsRawRing1Enabled(env->pVM)) + { + /* Only ring 0 and 1 supervisor code. */ + if (((fFlags >> HF_CPL_SHIFT) & 3) == 2) /* ring 1 code is moved into ring 2, so we can't support ring-2 in that case. */ + { + Log2(("raw r0 mode refused: CPL %d\n", (fFlags >> HF_CPL_SHIFT) & 3)); + return false; + } + } + /* Only R0. */ + else if (((fFlags >> HF_CPL_SHIFT) & 3) != 0) + { + STAM_COUNTER_INC(&gStatRefuseRing1or2); + Log2(("raw r0 mode refused: CPL %d\n", ((fFlags >> HF_CPL_SHIFT) & 3) )); + return false; + } + + if (!(u32CR0 & CR0_WP_MASK)) + { + STAM_COUNTER_INC(&gStatRefuseWP0); + Log2(("raw r0 mode refused: CR0.WP=0!\n")); + return false; + } + +#ifdef VBOX_WITH_RAW_MODE + if (PATMIsPatchGCAddr(env->pVM, eip)) + { + Log2(("raw r0 mode forced: patch code\n")); + *piException = EXCP_EXECUTE_RAW; + return true; + } +#endif + +#if !defined(VBOX_ALLOW_IF0) && !defined(VBOX_RUN_INTERRUPT_GATE_HANDLERS) + if (!(env->eflags & IF_MASK)) + { + STAM_COUNTER_INC(&gStatRefuseIF0); + ////Log2(("R0: IF=0 VIF=%d %08X\n", eip, *env->pVMeflags)); + //Log2(("RR0: Interrupts turned off; fall back to emulation\n")); + return false; + } +#endif + +#ifndef VBOX_WITH_RAW_RING1 + if (((env->eflags >> IOPL_SHIFT) & 3) != 0) + { + Log2(("raw r0 mode refused: IOPL %d\n", ((env->eflags >> IOPL_SHIFT) & 3))); + return false; + } +#endif + env->state |= CPU_RAW_RING0; + } + + /* + * Don't reschedule the first time we're called, because there might be + * special reasons why we're here that is not covered by the above checks. + */ + if (env->pVM->rem.s.cCanExecuteRaw == 1) + { + Log2(("raw mode refused: first scheduling\n")); + STAM_COUNTER_INC(&gStatRefuseCanExecute); + return false; + } + + /* + * Stale hidden selectors means raw-mode is unsafe (being very careful). + */ + if (env->segs[R_CS].fVBoxFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale CS (%#x)\n", env->segs[R_CS].selector)); + STAM_COUNTER_INC(&gaStatRefuseStale[R_CS]); + return false; + } + if (env->segs[R_SS].fVBoxFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale SS (%#x)\n", env->segs[R_SS].selector)); + STAM_COUNTER_INC(&gaStatRefuseStale[R_SS]); + return false; + } + if (env->segs[R_DS].fVBoxFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale DS (%#x)\n", env->segs[R_DS].selector)); + STAM_COUNTER_INC(&gaStatRefuseStale[R_DS]); + return false; + } + if (env->segs[R_ES].fVBoxFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale ES (%#x)\n", env->segs[R_ES].selector)); + STAM_COUNTER_INC(&gaStatRefuseStale[R_ES]); + return false; + } + if (env->segs[R_FS].fVBoxFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale FS (%#x)\n", env->segs[R_FS].selector)); + STAM_COUNTER_INC(&gaStatRefuseStale[R_FS]); + return false; + } + if (env->segs[R_GS].fVBoxFlags & CPUMSELREG_FLAGS_STALE) + { + Log2(("raw mode refused: stale GS (%#x)\n", env->segs[R_GS].selector)); + STAM_COUNTER_INC(&gaStatRefuseStale[R_GS]); + return false; + } + +/* Assert(env->pVCpu && PGMPhysIsA20Enabled(env->pVCpu));*/ + *piException = EXCP_EXECUTE_RAW; + return true; +} + + +#ifdef VBOX_WITH_RAW_MODE +/** + * Fetches a code byte. + * + * @returns Success indicator (bool) for ease of use. + * @param env The CPU environment structure. + * @param GCPtrInstr Where to fetch code. + * @param pu8Byte Where to store the byte on success + */ +bool remR3GetOpcode(CPUX86State *env, RTGCPTR GCPtrInstr, uint8_t *pu8Byte) +{ + int rc = PATMR3QueryOpcode(env->pVM, GCPtrInstr, pu8Byte); + if (RT_SUCCESS(rc)) + return true; + return false; +} +#endif /* VBOX_WITH_RAW_MODE */ + + +/** + * Flush (or invalidate if you like) page table/dir entry. + * + * (invlpg instruction; tlb_flush_page) + * + * @param env Pointer to cpu environment. + * @param GCPtr The virtual address which page table/dir entry should be invalidated. + */ +void remR3FlushPage(CPUX86State *env, RTGCPTR GCPtr) +{ + PVM pVM = env->pVM; + PCPUMCTX pCtx; + int rc; + + Assert(EMRemIsLockOwner(env->pVM)); + + /* + * When we're replaying invlpg instructions or restoring a saved + * state we disable this path. + */ + if (pVM->rem.s.fIgnoreInvlPg || pVM->rem.s.cIgnoreAll) + return; + LogFlow(("remR3FlushPage: GCPtr=%RGv\n", GCPtr)); + Assert(pVM->rem.s.fInREM || pVM->rem.s.fInStateSync); + + //RAWEx_ProfileStop(env, STATS_QEMU_TOTAL); + + /* + * Update the control registers before calling PGMFlushPage. + */ + pCtx = (PCPUMCTX)pVM->rem.s.pCtx; + Assert(pCtx); + pCtx->cr0 = env->cr[0]; + pCtx->cr3 = env->cr[3]; +#ifdef VBOX_WITH_RAW_MODE + if (((env->cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(env->pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + pCtx->cr4 = env->cr[4]; + + /* + * Let PGM do the rest. + */ + Assert(env->pVCpu); + rc = PGMInvalidatePage(env->pVCpu, GCPtr); + if (RT_FAILURE(rc)) + { + AssertMsgFailed(("remR3FlushPage %RGv failed with %d!!\n", GCPtr, rc)); + VMCPU_FF_SET(env->pVCpu, VMCPU_FF_PGM_SYNC_CR3); + } + //RAWEx_ProfileStart(env, STATS_QEMU_TOTAL); +} + + +#ifndef REM_PHYS_ADDR_IN_TLB +/** Wrapper for PGMR3PhysTlbGCPhys2Ptr. */ +void *remR3TlbGCPhys2Ptr(CPUX86State *env1, target_ulong physAddr, int fWritable) +{ + void *pv; + int rc; + + + /* Address must be aligned enough to fiddle with lower bits */ + Assert((physAddr & 0x3) == 0); + /*AssertMsg((env1->a20_mask & physAddr) == physAddr, ("%llx\n", (uint64_t)physAddr));*/ + + STAM_PROFILE_START(&gStatGCPhys2HCVirt, a); + rc = PGMR3PhysTlbGCPhys2Ptr(env1->pVM, physAddr, true /*fWritable*/, &pv); + STAM_PROFILE_STOP(&gStatGCPhys2HCVirt, a); + Assert( rc == VINF_SUCCESS + || rc == VINF_PGM_PHYS_TLB_CATCH_WRITE + || rc == VERR_PGM_PHYS_TLB_CATCH_ALL + || rc == VERR_PGM_PHYS_TLB_UNASSIGNED); + if (RT_FAILURE(rc)) + return (void *)1; + if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE) + return (void *)((uintptr_t)pv | 2); + return pv; +} +#endif /* REM_PHYS_ADDR_IN_TLB */ + + +/** + * Called from tlb_protect_code in order to write monitor a code page. + * + * @param env Pointer to the CPU environment. + * @param GCPtr Code page to monitor + */ +void remR3ProtectCode(CPUX86State *env, RTGCPTR GCPtr) +{ +#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC + Assert(env->pVM->rem.s.fInREM); + if ( (env->cr[0] & X86_CR0_PG) /* paging must be enabled */ + && !(env->state & CPU_EMULATE_SINGLE_INSTR) /* ignore during single instruction execution */ + && (((env->hflags >> HF_CPL_SHIFT) & 3) == 0) /* supervisor mode only */ + && !(env->eflags & VM_MASK) /* no V86 mode */ + && VM_IS_RAW_MODE_ENABLED(env->pVM)) + CSAMR3MonitorPage(env->pVM, GCPtr, CSAM_TAG_REM); +#endif +} + + +/** + * Called from tlb_unprotect_code in order to clear write monitoring for a code page. + * + * @param env Pointer to the CPU environment. + * @param GCPtr Code page to monitor + */ +void remR3UnprotectCode(CPUX86State *env, RTGCPTR GCPtr) +{ + Assert(env->pVM->rem.s.fInREM); +#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC + if ( (env->cr[0] & X86_CR0_PG) /* paging must be enabled */ + && !(env->state & CPU_EMULATE_SINGLE_INSTR) /* ignore during single instruction execution */ + && (((env->hflags >> HF_CPL_SHIFT) & 3) == 0) /* supervisor mode only */ + && !(env->eflags & VM_MASK) /* no V86 mode */ + && VM_IS_RAW_MODE_ENABLED(env->pVM)) + CSAMR3UnmonitorPage(env->pVM, GCPtr, CSAM_TAG_REM); +#endif +} + + +/** + * Called when the CPU is initialized, any of the CRx registers are changed or + * when the A20 line is modified. + * + * @param env Pointer to the CPU environment. + * @param fGlobal Set if the flush is global. + */ +void remR3FlushTLB(CPUX86State *env, bool fGlobal) +{ + PVM pVM = env->pVM; + PCPUMCTX pCtx; + Assert(EMRemIsLockOwner(pVM)); + + /* + * When we're replaying invlpg instructions or restoring a saved + * state we disable this path. + */ + if (pVM->rem.s.fIgnoreCR3Load || pVM->rem.s.cIgnoreAll) + return; + Assert(pVM->rem.s.fInREM); + + /* + * The caller doesn't check cr4, so we have to do that for ourselves. + */ + if (!fGlobal && !(env->cr[4] & X86_CR4_PGE)) + fGlobal = true; + Log(("remR3FlushTLB: CR0=%08RX64 CR3=%08RX64 CR4=%08RX64 %s\n", (uint64_t)env->cr[0], (uint64_t)env->cr[3], (uint64_t)env->cr[4], fGlobal ? " global" : "")); + + /* + * Update the control registers before calling PGMR3FlushTLB. + */ + pCtx = (PCPUMCTX)pVM->rem.s.pCtx; + Assert(pCtx); + pCtx->cr0 = env->cr[0]; + pCtx->cr3 = env->cr[3]; +#ifdef VBOX_WITH_RAW_MODE + if (((env->cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(env->pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + pCtx->cr4 = env->cr[4]; + + /* + * Let PGM do the rest. + */ + Assert(env->pVCpu); + PGMFlushTLB(env->pVCpu, env->cr[3], fGlobal); +} + + +/** + * Called when any of the cr0, cr4 or efer registers is updated. + * + * @param env Pointer to the CPU environment. + */ +void remR3ChangeCpuMode(CPUX86State *env) +{ + PVM pVM = env->pVM; + uint64_t efer; + PCPUMCTX pCtx; + int rc; + + /* + * When we're replaying loads or restoring a saved + * state this path is disabled. + */ + if (pVM->rem.s.fIgnoreCpuMode || pVM->rem.s.cIgnoreAll) + return; + Assert(pVM->rem.s.fInREM); + + pCtx = (PCPUMCTX)pVM->rem.s.pCtx; + Assert(pCtx); + + /* + * Notify PGM about WP0 being enabled (like CPUSetGuestCR0 does). + */ + if (((env->cr[0] ^ pCtx->cr0) & X86_CR0_WP) && (env->cr[0] & X86_CR0_WP)) + PGMCr0WpEnabled(env->pVCpu); + + /* + * Update the control registers before calling PGMChangeMode() + * as it may need to map whatever cr3 is pointing to. + */ + pCtx->cr0 = env->cr[0]; + pCtx->cr3 = env->cr[3]; +#ifdef VBOX_WITH_RAW_MODE + if (((env->cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(env->pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + pCtx->cr4 = env->cr[4]; +#ifdef TARGET_X86_64 + efer = env->efer; + pCtx->msrEFER = efer; +#else + efer = 0; +#endif + Assert(env->pVCpu); + rc = PGMChangeMode(env->pVCpu, env->cr[0], env->cr[4], efer); + if (rc != VINF_SUCCESS) + { + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + Log(("PGMChangeMode(, %RX64, %RX64, %RX64) -> %Rrc -> remR3RaiseRC\n", env->cr[0], env->cr[4], efer, rc)); + remR3RaiseRC(env->pVM, rc); + } + else + cpu_abort(env, "PGMChangeMode(, %RX64, %RX64, %RX64) -> %Rrc\n", env->cr[0], env->cr[4], efer, rc); + } +} + + +/** + * Called from compiled code to run dma. + * + * @param env Pointer to the CPU environment. + */ +void remR3DmaRun(CPUX86State *env) +{ + remR3ProfileStop(STATS_QEMU_RUN_EMULATED_CODE); + PDMR3DmaRun(env->pVM); + remR3ProfileStart(STATS_QEMU_RUN_EMULATED_CODE); +} + + +/** + * Called from compiled code to schedule pending timers in VMM + * + * @param env Pointer to the CPU environment. + */ +void remR3TimersRun(CPUX86State *env) +{ + LogFlow(("remR3TimersRun:\n")); + LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("remR3TimersRun\n")); + remR3ProfileStop(STATS_QEMU_RUN_EMULATED_CODE); + remR3ProfileStart(STATS_QEMU_RUN_TIMERS); + TMR3TimerQueuesDo(env->pVM); + remR3ProfileStop(STATS_QEMU_RUN_TIMERS); + remR3ProfileStart(STATS_QEMU_RUN_EMULATED_CODE); +} + + +/** + * Record trap occurrence + * + * @returns VBox status code + * @param env Pointer to the CPU environment. + * @param uTrap Trap nr + * @param uErrorCode Error code + * @param pvNextEIP Next EIP + */ +int remR3NotifyTrap(CPUX86State *env, uint32_t uTrap, uint32_t uErrorCode, RTGCPTR pvNextEIP) +{ + PVM pVM = env->pVM; +#ifdef VBOX_WITH_STATISTICS + static STAMCOUNTER s_aStatTrap[255]; + static bool s_aRegisters[RT_ELEMENTS(s_aStatTrap)]; +#endif + +#ifdef VBOX_WITH_STATISTICS + if (uTrap < 255) + { + if (!s_aRegisters[uTrap]) + { + char szStatName[64]; + s_aRegisters[uTrap] = true; + RTStrPrintf(szStatName, sizeof(szStatName), "/REM/Trap/0x%02X", uTrap); + STAM_REG(env->pVM, &s_aStatTrap[uTrap], STAMTYPE_COUNTER, szStatName, STAMUNIT_OCCURENCES, "Trap stats."); + } + STAM_COUNTER_INC(&s_aStatTrap[uTrap]); + } +#endif + Log(("remR3NotifyTrap: uTrap=%x error=%x next_eip=%RGv eip=%RGv cr2=%RGv\n", uTrap, uErrorCode, pvNextEIP, (RTGCPTR)env->eip, (RTGCPTR)env->cr[2])); + if( uTrap < 0x20 + && (env->cr[0] & X86_CR0_PE) + && !(env->eflags & X86_EFL_VM)) + { +#ifdef DEBUG + remR3DisasInstr(env, 1, "remR3NotifyTrap: "); +#endif + if(pVM->rem.s.uPendingException == uTrap && ++pVM->rem.s.cPendingExceptions > 512) + { + LogRel(("VERR_REM_TOO_MANY_TRAPS -> uTrap=%x error=%x next_eip=%RGv eip=%RGv cr2=%RGv\n", uTrap, uErrorCode, pvNextEIP, (RTGCPTR)env->eip, (RTGCPTR)env->cr[2])); + remR3RaiseRC(env->pVM, VERR_REM_TOO_MANY_TRAPS); + return VERR_REM_TOO_MANY_TRAPS; + } + if(pVM->rem.s.uPendingException != uTrap || pVM->rem.s.uPendingExcptEIP != env->eip || pVM->rem.s.uPendingExcptCR2 != env->cr[2]) + { + Log(("remR3NotifyTrap: uTrap=%#x set as pending\n", uTrap)); + pVM->rem.s.cPendingExceptions = 1; + } + pVM->rem.s.uPendingException = uTrap; + pVM->rem.s.uPendingExcptEIP = env->eip; + pVM->rem.s.uPendingExcptCR2 = env->cr[2]; + } + else + { + pVM->rem.s.cPendingExceptions = 0; + pVM->rem.s.uPendingException = uTrap; + pVM->rem.s.uPendingExcptEIP = env->eip; + pVM->rem.s.uPendingExcptCR2 = env->cr[2]; + } + return VINF_SUCCESS; +} + + +/* + * Clear current active trap + * + * @param pVM VM Handle. + */ +void remR3TrapClear(PVM pVM) +{ + pVM->rem.s.cPendingExceptions = 0; + pVM->rem.s.uPendingException = 0; + pVM->rem.s.uPendingExcptEIP = 0; + pVM->rem.s.uPendingExcptCR2 = 0; +} + + +/* + * Record previous call instruction addresses + * + * @param env Pointer to the CPU environment. + */ +void remR3RecordCall(CPUX86State *env) +{ +#ifdef VBOX_WITH_RAW_MODE + CSAMR3RecordCallAddress(env->pVM, env->eip); +#endif +} + + +/** + * Syncs the internal REM state with the VM. + * + * This must be called before REMR3Run() is invoked whenever when the REM + * state is not up to date. Calling it several times in a row is not + * permitted. + * + * @returns VBox status code. + * + * @param pVM VM Handle. + * @param pVCpu VMCPU Handle. + * + * @remark The caller has to check for important FFs before calling REMR3Run. REMR3State will + * no do this since the majority of the callers don't want any unnecessary of events + * pending that would immediately interrupt execution. + */ +REMR3DECL(int) REMR3State(PVM pVM, PVMCPU pVCpu) +{ + register const CPUMCTX *pCtx; + register unsigned fFlags; + unsigned i; + TRPMEVENT enmType; + uint8_t u8TrapNo; + uint32_t uCpl; + int rc; + + STAM_PROFILE_START(&pVM->rem.s.StatsState, a); + Log2(("REMR3State:\n")); + + pVM->rem.s.Env.pVCpu = pVCpu; + pCtx = pVM->rem.s.pCtx = CPUMQueryGuestCtxPtr(pVCpu); + + Assert(pCtx); + if ( CPUMIsGuestInSvmNestedHwVirtMode(pCtx) + || CPUMIsGuestInVmxNonRootMode(pCtx)) + { + AssertMsgFailed(("Bad scheduling - can't exec. nested-guest in REM!\n")); + return VERR_EM_CANNOT_EXEC_GUEST; + } + + Assert(!pVM->rem.s.fInREM); + pVM->rem.s.fInStateSync = true; + + /* + * If we have to flush TBs, do that immediately. + */ + if (pVM->rem.s.fFlushTBs) + { + STAM_COUNTER_INC(&gStatFlushTBs); + tb_flush(&pVM->rem.s.Env); + pVM->rem.s.fFlushTBs = false; + } + + /* + * Copy the registers which require no special handling. + */ +#ifdef TARGET_X86_64 + /* Note that the high dwords of 64 bits registers are undefined in 32 bits mode and are undefined after a mode change. */ + Assert(R_EAX == 0); + pVM->rem.s.Env.regs[R_EAX] = pCtx->rax; + Assert(R_ECX == 1); + pVM->rem.s.Env.regs[R_ECX] = pCtx->rcx; + Assert(R_EDX == 2); + pVM->rem.s.Env.regs[R_EDX] = pCtx->rdx; + Assert(R_EBX == 3); + pVM->rem.s.Env.regs[R_EBX] = pCtx->rbx; + Assert(R_ESP == 4); + pVM->rem.s.Env.regs[R_ESP] = pCtx->rsp; + Assert(R_EBP == 5); + pVM->rem.s.Env.regs[R_EBP] = pCtx->rbp; + Assert(R_ESI == 6); + pVM->rem.s.Env.regs[R_ESI] = pCtx->rsi; + Assert(R_EDI == 7); + pVM->rem.s.Env.regs[R_EDI] = pCtx->rdi; + pVM->rem.s.Env.regs[8] = pCtx->r8; + pVM->rem.s.Env.regs[9] = pCtx->r9; + pVM->rem.s.Env.regs[10] = pCtx->r10; + pVM->rem.s.Env.regs[11] = pCtx->r11; + pVM->rem.s.Env.regs[12] = pCtx->r12; + pVM->rem.s.Env.regs[13] = pCtx->r13; + pVM->rem.s.Env.regs[14] = pCtx->r14; + pVM->rem.s.Env.regs[15] = pCtx->r15; + + pVM->rem.s.Env.eip = pCtx->rip; + + pVM->rem.s.Env.eflags = pCtx->rflags.u64; +#else + Assert(R_EAX == 0); + pVM->rem.s.Env.regs[R_EAX] = pCtx->eax; + Assert(R_ECX == 1); + pVM->rem.s.Env.regs[R_ECX] = pCtx->ecx; + Assert(R_EDX == 2); + pVM->rem.s.Env.regs[R_EDX] = pCtx->edx; + Assert(R_EBX == 3); + pVM->rem.s.Env.regs[R_EBX] = pCtx->ebx; + Assert(R_ESP == 4); + pVM->rem.s.Env.regs[R_ESP] = pCtx->esp; + Assert(R_EBP == 5); + pVM->rem.s.Env.regs[R_EBP] = pCtx->ebp; + Assert(R_ESI == 6); + pVM->rem.s.Env.regs[R_ESI] = pCtx->esi; + Assert(R_EDI == 7); + pVM->rem.s.Env.regs[R_EDI] = pCtx->edi; + pVM->rem.s.Env.eip = pCtx->eip; + + pVM->rem.s.Env.eflags = pCtx->eflags.u32; +#endif + + pVM->rem.s.Env.cr[2] = pCtx->cr2; + + /** @todo we could probably benefit from using a CPUM_CHANGED_DRx flag too! */ + for (i=0;i<8;i++) + pVM->rem.s.Env.dr[i] = pCtx->dr[i]; + +#ifdef HF_HALTED_MASK /** @todo remove me when we're up to date again. */ + /* + * Clear the halted hidden flag (the interrupt waking up the CPU can + * have been dispatched in raw mode). + */ + pVM->rem.s.Env.hflags &= ~HF_HALTED_MASK; +#endif + + /* + * Replay invlpg? Only if we're not flushing the TLB. + */ + fFlags = CPUMR3RemEnter(pVCpu, &uCpl); + LogFlow(("CPUMR3RemEnter %x %x\n", fFlags, uCpl)); + if (pVM->rem.s.cInvalidatedPages) + { + if (!(fFlags & CPUM_CHANGED_GLOBAL_TLB_FLUSH)) + { + RTUINT i; + + pVM->rem.s.fIgnoreCR3Load = true; + pVM->rem.s.fIgnoreInvlPg = true; + for (i = 0; i < pVM->rem.s.cInvalidatedPages; i++) + { + Log2(("REMR3State: invlpg %RGv\n", pVM->rem.s.aGCPtrInvalidatedPages[i])); + tlb_flush_page(&pVM->rem.s.Env, pVM->rem.s.aGCPtrInvalidatedPages[i]); + } + pVM->rem.s.fIgnoreInvlPg = false; + pVM->rem.s.fIgnoreCR3Load = false; + } + pVM->rem.s.cInvalidatedPages = 0; + } + + /* Replay notification changes. */ + REMR3ReplayHandlerNotifications(pVM); + + /* Update MSRs; before CRx registers! */ + pVM->rem.s.Env.efer = pCtx->msrEFER; + pVM->rem.s.Env.star = pCtx->msrSTAR; + pVM->rem.s.Env.pat = pCtx->msrPAT; +#ifdef TARGET_X86_64 + pVM->rem.s.Env.lstar = pCtx->msrLSTAR; + pVM->rem.s.Env.cstar = pCtx->msrCSTAR; + pVM->rem.s.Env.fmask = pCtx->msrSFMASK; + pVM->rem.s.Env.kernelgsbase = pCtx->msrKERNELGSBASE; + + /* Update the internal long mode activate flag according to the new EFER value. */ + if (pCtx->msrEFER & MSR_K6_EFER_LMA) + pVM->rem.s.Env.hflags |= HF_LMA_MASK; + else + pVM->rem.s.Env.hflags &= ~(HF_LMA_MASK | HF_CS64_MASK); +#endif + + /* Update the inhibit IRQ mask. */ + pVM->rem.s.Env.hflags &= ~HF_INHIBIT_IRQ_MASK; + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) + { + RTGCPTR InhibitPC = EMGetInhibitInterruptsPC(pVCpu); + if (InhibitPC == pCtx->rip) + pVM->rem.s.Env.hflags |= HF_INHIBIT_IRQ_MASK; + else + { + Log(("Clearing VMCPU_FF_INHIBIT_INTERRUPTS at %RGv - successor %RGv (REM#1)\n", (RTGCPTR)pCtx->rip, InhibitPC)); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + } + } + + /* Update the inhibit NMI mask. */ + pVM->rem.s.Env.hflags2 &= ~HF2_NMI_MASK; + if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)) + pVM->rem.s.Env.hflags2 |= HF2_NMI_MASK; + + /* + * Sync the A20 gate. + */ + bool fA20State = PGMPhysIsA20Enabled(pVCpu); + if (fA20State != RT_BOOL(pVM->rem.s.Env.a20_mask & RT_BIT(20))) + { + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + cpu_x86_set_a20(&pVM->rem.s.Env, fA20State); + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); + } + + /* + * Registers which are rarely changed and require special handling / order when changed. + */ + if (fFlags & ( CPUM_CHANGED_GLOBAL_TLB_FLUSH + | CPUM_CHANGED_CR4 + | CPUM_CHANGED_CR0 + | CPUM_CHANGED_CR3 + | CPUM_CHANGED_GDTR + | CPUM_CHANGED_IDTR + | CPUM_CHANGED_SYSENTER_MSR + | CPUM_CHANGED_LDTR + | CPUM_CHANGED_CPUID + | CPUM_CHANGED_FPU_REM + ) + ) + { + if (fFlags & CPUM_CHANGED_GLOBAL_TLB_FLUSH) + { + pVM->rem.s.fIgnoreCR3Load = true; + tlb_flush(&pVM->rem.s.Env, true); + pVM->rem.s.fIgnoreCR3Load = false; + } + + /* CR4 before CR0! */ + if (fFlags & CPUM_CHANGED_CR4) + { + pVM->rem.s.fIgnoreCR3Load = true; + pVM->rem.s.fIgnoreCpuMode = true; + cpu_x86_update_cr4(&pVM->rem.s.Env, pCtx->cr4); + pVM->rem.s.fIgnoreCpuMode = false; + pVM->rem.s.fIgnoreCR3Load = false; + } + + if (fFlags & CPUM_CHANGED_CR0) + { + pVM->rem.s.fIgnoreCR3Load = true; + pVM->rem.s.fIgnoreCpuMode = true; + cpu_x86_update_cr0(&pVM->rem.s.Env, pCtx->cr0); + pVM->rem.s.fIgnoreCpuMode = false; + pVM->rem.s.fIgnoreCR3Load = false; + } + + if (fFlags & CPUM_CHANGED_CR3) + { + pVM->rem.s.fIgnoreCR3Load = true; + cpu_x86_update_cr3(&pVM->rem.s.Env, pCtx->cr3); + pVM->rem.s.fIgnoreCR3Load = false; + } + + if (fFlags & CPUM_CHANGED_GDTR) + { + pVM->rem.s.Env.gdt.base = pCtx->gdtr.pGdt; + pVM->rem.s.Env.gdt.limit = pCtx->gdtr.cbGdt; + } + + if (fFlags & CPUM_CHANGED_IDTR) + { + pVM->rem.s.Env.idt.base = pCtx->idtr.pIdt; + pVM->rem.s.Env.idt.limit = pCtx->idtr.cbIdt; + } + + if (fFlags & CPUM_CHANGED_SYSENTER_MSR) + { + pVM->rem.s.Env.sysenter_cs = pCtx->SysEnter.cs; + pVM->rem.s.Env.sysenter_eip = pCtx->SysEnter.eip; + pVM->rem.s.Env.sysenter_esp = pCtx->SysEnter.esp; + } + + if (fFlags & CPUM_CHANGED_LDTR) + { + if (pCtx->ldtr.fFlags & CPUMSELREG_FLAGS_VALID) + { + pVM->rem.s.Env.ldt.selector = pCtx->ldtr.Sel; + pVM->rem.s.Env.ldt.newselector = 0; + pVM->rem.s.Env.ldt.fVBoxFlags = pCtx->ldtr.fFlags; + pVM->rem.s.Env.ldt.base = pCtx->ldtr.u64Base; + pVM->rem.s.Env.ldt.limit = pCtx->ldtr.u32Limit; + pVM->rem.s.Env.ldt.flags = (pCtx->ldtr.Attr.u & SEL_FLAGS_SMASK) << SEL_FLAGS_SHIFT; + } + else + { + AssertFailed(); /* Shouldn't happen, see cpumR3LoadExec. */ + sync_ldtr(&pVM->rem.s.Env, pCtx->ldtr.Sel); + } + } + + if (fFlags & CPUM_CHANGED_CPUID) + { + uint32_t u32Dummy; + + /* + * Get the CPUID features. + */ + CPUMGetGuestCpuId(pVCpu, 1, 0, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext_features, &pVM->rem.s.Env.cpuid_features); + CPUMGetGuestCpuId(pVCpu, 0x80000001, 0, &u32Dummy, &u32Dummy, &u32Dummy, &pVM->rem.s.Env.cpuid_ext2_features); + } + + /* Sync FPU state after CR4, CPUID and EFER (!). */ + if (fFlags & CPUM_CHANGED_FPU_REM) + save_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)&pCtx->pXStateR3->x87); /* 'save' is an excellent name. */ + } + + /* + * Sync TR unconditionally to make life simpler. + */ + pVM->rem.s.Env.tr.selector = pCtx->tr.Sel; + pVM->rem.s.Env.tr.newselector = 0; + pVM->rem.s.Env.tr.fVBoxFlags = pCtx->tr.fFlags; + pVM->rem.s.Env.tr.base = pCtx->tr.u64Base; + pVM->rem.s.Env.tr.limit = pCtx->tr.u32Limit; + pVM->rem.s.Env.tr.flags = (pCtx->tr.Attr.u & SEL_FLAGS_SMASK) << SEL_FLAGS_SHIFT; + + /* + * Update selector registers. + * + * This must be done *after* we've synced gdt, ldt and crX registers + * since we're reading the GDT/LDT om sync_seg. This will happen with + * saved state which takes a quick dip into rawmode for instance. + * + * CPL/Stack; Note first check this one as the CPL might have changed. + * The wrong CPL can cause QEmu to raise an exception in sync_seg!! + */ + cpu_x86_set_cpl(&pVM->rem.s.Env, uCpl); + /* Note! QEmu saves the 2nd dword of the descriptor; we should convert the attribute word back! */ +#define SYNC_IN_SREG(a_pEnv, a_SReg, a_pRemSReg, a_pVBoxSReg) \ + do \ + { \ + if (CPUMSELREG_ARE_HIDDEN_PARTS_VALID(pVCpu, a_pVBoxSReg)) \ + { \ + cpu_x86_load_seg_cache(a_pEnv, R_##a_SReg, \ + (a_pVBoxSReg)->Sel, \ + (a_pVBoxSReg)->u64Base, \ + (a_pVBoxSReg)->u32Limit, \ + ((a_pVBoxSReg)->Attr.u & SEL_FLAGS_SMASK) << SEL_FLAGS_SHIFT); \ + (a_pRemSReg)->fVBoxFlags = (a_pVBoxSReg)->fFlags; \ + } \ + /* This only-reload-if-changed stuff is the old approach, we should ditch it. */ \ + else if ((a_pRemSReg)->selector != (a_pVBoxSReg)->Sel) \ + { \ + Log2(("REMR3State: " #a_SReg " changed from %04x to %04x!\n", \ + (a_pRemSReg)->selector, (a_pVBoxSReg)->Sel)); \ + sync_seg(a_pEnv, R_##a_SReg, (a_pVBoxSReg)->Sel); \ + if ((a_pRemSReg)->newselector) \ + STAM_COUNTER_INC(&gStatSelOutOfSync[R_##a_SReg]); \ + } \ + else \ + (a_pRemSReg)->newselector = 0; \ + } while (0) + + SYNC_IN_SREG(&pVM->rem.s.Env, CS, &pVM->rem.s.Env.segs[R_CS], &pCtx->cs); + SYNC_IN_SREG(&pVM->rem.s.Env, SS, &pVM->rem.s.Env.segs[R_SS], &pCtx->ss); + SYNC_IN_SREG(&pVM->rem.s.Env, DS, &pVM->rem.s.Env.segs[R_DS], &pCtx->ds); + SYNC_IN_SREG(&pVM->rem.s.Env, ES, &pVM->rem.s.Env.segs[R_ES], &pCtx->es); + SYNC_IN_SREG(&pVM->rem.s.Env, FS, &pVM->rem.s.Env.segs[R_FS], &pCtx->fs); + SYNC_IN_SREG(&pVM->rem.s.Env, GS, &pVM->rem.s.Env.segs[R_GS], &pCtx->gs); + /** @todo need to find a way to communicate potential GDT/LDT changes and thread switches. The selector might + * be the same but not the base/limit. */ + + /* + * Check for traps. + */ + pVM->rem.s.Env.exception_index = -1; /** @todo this won't work :/ */ + rc = TRPMQueryTrap(pVCpu, &u8TrapNo, &enmType); + if (RT_SUCCESS(rc)) + { +#ifdef DEBUG + if (u8TrapNo == 0x80) + { + remR3DumpLnxSyscall(pVCpu); + remR3DumpOBsdSyscall(pVCpu); + } +#endif + + pVM->rem.s.Env.exception_index = u8TrapNo; + if (enmType != TRPM_SOFTWARE_INT) + { + pVM->rem.s.Env.exception_is_int = enmType == TRPM_HARDWARE_INT + ? EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ : 0; /* HACK ALERT! */ + pVM->rem.s.Env.exception_next_eip = pVM->rem.s.Env.eip; + } + else + { + /* + * The there are two 1 byte opcodes and one 2 byte opcode for software interrupts. + * We ASSUME that there are no prefixes and sets the default to 2 byte, and checks + * for int03 and into. + */ + pVM->rem.s.Env.exception_is_int = 1; + pVM->rem.s.Env.exception_next_eip = pCtx->rip + 2; + /* int 3 may be generated by one-byte 0xcc */ + if (u8TrapNo == 3) + { + if (read_byte(&pVM->rem.s.Env, pVM->rem.s.Env.segs[R_CS].base + pCtx->rip) == 0xcc) + pVM->rem.s.Env.exception_next_eip = pCtx->rip + 1; + } + /* int 4 may be generated by one-byte 0xce */ + else if (u8TrapNo == 4) + { + if (read_byte(&pVM->rem.s.Env, pVM->rem.s.Env.segs[R_CS].base + pCtx->rip) == 0xce) + pVM->rem.s.Env.exception_next_eip = pCtx->rip + 1; + } + } + + /* get error code and cr2 if needed. */ + if (enmType == TRPM_TRAP) + { + switch (u8TrapNo) + { + case X86_XCPT_PF: + pVM->rem.s.Env.cr[2] = TRPMGetFaultAddress(pVCpu); + /* fallthru */ + case X86_XCPT_TS: case X86_XCPT_NP: case X86_XCPT_SS: case X86_XCPT_GP: + pVM->rem.s.Env.error_code = TRPMGetErrorCode(pVCpu); + break; + + case X86_XCPT_AC: case X86_XCPT_DF: + default: + pVM->rem.s.Env.error_code = 0; + break; + } + } + else + pVM->rem.s.Env.error_code = 0; + + /* + * We can now reset the active trap since the recompiler is gonna have a go at it. + */ + rc = TRPMResetTrap(pVCpu); + AssertRC(rc); + Log2(("REMR3State: trap=%02x errcd=%RGv cr2=%RGv nexteip=%RGv%s\n", pVM->rem.s.Env.exception_index, (RTGCPTR)pVM->rem.s.Env.error_code, + (RTGCPTR)pVM->rem.s.Env.cr[2], (RTGCPTR)pVM->rem.s.Env.exception_next_eip, pVM->rem.s.Env.exception_is_int ? " software" : "")); + } + + /* + * Clear old interrupt request flags; Check for pending hardware interrupts. + * (See @remark for why we don't check for other FFs.) + */ + pVM->rem.s.Env.interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER); + if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC)) + APICUpdatePendingInterrupts(pVCpu); + if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)) + pVM->rem.s.Env.interrupt_request |= CPU_INTERRUPT_HARD; + + /* + * We're now in REM mode. + */ + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_REM); + pVM->rem.s.fInREM = true; + pVM->rem.s.fInStateSync = false; + pVM->rem.s.cCanExecuteRaw = 0; + STAM_PROFILE_STOP(&pVM->rem.s.StatsState, a); + Log2(("REMR3State: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * Syncs back changes in the REM state to the VM state. + * + * This must be called after invoking REMR3Run(). + * Calling it several times in a row is not permitted. + * + * @returns VBox status code. + * + * @param pVM VM Handle. + * @param pVCpu VMCPU Handle. + */ +REMR3DECL(int) REMR3StateBack(PVM pVM, PVMCPU pVCpu) +{ + register PCPUMCTX pCtx = pVM->rem.s.pCtx; + Assert(pCtx); + unsigned i; + + STAM_PROFILE_START(&pVM->rem.s.StatsStateBack, a); + Log2(("REMR3StateBack:\n")); + Assert(pVM->rem.s.fInREM); + + /* + * Copy back the registers. + * This is done in the order they are declared in the CPUMCTX structure. + */ + + /** @todo FOP */ + /** @todo FPUIP */ + /** @todo CS */ + /** @todo FPUDP */ + /** @todo DS */ + + /** @todo check if FPU/XMM was actually used in the recompiler */ + restore_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)&pCtx->pXStateR3->x87); +//// dprintf2(("FPU state CW=%04X TT=%04X SW=%04X (%04X)\n", env->fpuc, env->fpstt, env->fpus, pVMCtx->fpu.FSW)); + +#ifdef TARGET_X86_64 + /* Note that the high dwords of 64 bits registers are undefined in 32 bits mode and are undefined after a mode change. */ + pCtx->rdi = pVM->rem.s.Env.regs[R_EDI]; + pCtx->rsi = pVM->rem.s.Env.regs[R_ESI]; + pCtx->rbp = pVM->rem.s.Env.regs[R_EBP]; + pCtx->rax = pVM->rem.s.Env.regs[R_EAX]; + pCtx->rbx = pVM->rem.s.Env.regs[R_EBX]; + pCtx->rdx = pVM->rem.s.Env.regs[R_EDX]; + pCtx->rcx = pVM->rem.s.Env.regs[R_ECX]; + pCtx->r8 = pVM->rem.s.Env.regs[8]; + pCtx->r9 = pVM->rem.s.Env.regs[9]; + pCtx->r10 = pVM->rem.s.Env.regs[10]; + pCtx->r11 = pVM->rem.s.Env.regs[11]; + pCtx->r12 = pVM->rem.s.Env.regs[12]; + pCtx->r13 = pVM->rem.s.Env.regs[13]; + pCtx->r14 = pVM->rem.s.Env.regs[14]; + pCtx->r15 = pVM->rem.s.Env.regs[15]; + + pCtx->rsp = pVM->rem.s.Env.regs[R_ESP]; + +#else + pCtx->edi = pVM->rem.s.Env.regs[R_EDI]; + pCtx->esi = pVM->rem.s.Env.regs[R_ESI]; + pCtx->ebp = pVM->rem.s.Env.regs[R_EBP]; + pCtx->eax = pVM->rem.s.Env.regs[R_EAX]; + pCtx->ebx = pVM->rem.s.Env.regs[R_EBX]; + pCtx->edx = pVM->rem.s.Env.regs[R_EDX]; + pCtx->ecx = pVM->rem.s.Env.regs[R_ECX]; + + pCtx->esp = pVM->rem.s.Env.regs[R_ESP]; +#endif + +#define SYNC_BACK_SREG(a_sreg, a_SREG) \ + do \ + { \ + pCtx->a_sreg.Sel = pVM->rem.s.Env.segs[R_##a_SREG].selector; \ + if (!pVM->rem.s.Env.segs[R_SS].newselector) \ + { \ + pCtx->a_sreg.ValidSel = pVM->rem.s.Env.segs[R_##a_SREG].selector; \ + pCtx->a_sreg.fFlags = CPUMSELREG_FLAGS_VALID; \ + pCtx->a_sreg.u64Base = pVM->rem.s.Env.segs[R_##a_SREG].base; \ + pCtx->a_sreg.u32Limit = pVM->rem.s.Env.segs[R_##a_SREG].limit; \ + /* Note! QEmu saves the 2nd dword of the descriptor; we (VT-x/AMD-V) keep only the attributes! */ \ + pCtx->a_sreg.Attr.u = (pVM->rem.s.Env.segs[R_##a_SREG].flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; \ + } \ + else \ + { \ + pCtx->a_sreg.fFlags = 0; \ + STAM_COUNTER_INC(&gStatSelOutOfSyncStateBack[R_##a_SREG]); \ + } \ + } while (0) + + SYNC_BACK_SREG(es, ES); + SYNC_BACK_SREG(cs, CS); + SYNC_BACK_SREG(ss, SS); + SYNC_BACK_SREG(ds, DS); + SYNC_BACK_SREG(fs, FS); + SYNC_BACK_SREG(gs, GS); + +#ifdef TARGET_X86_64 + pCtx->rip = pVM->rem.s.Env.eip; + pCtx->rflags.u64 = pVM->rem.s.Env.eflags; +#else + pCtx->eip = pVM->rem.s.Env.eip; + pCtx->eflags.u32 = pVM->rem.s.Env.eflags; +#endif + + pCtx->cr0 = pVM->rem.s.Env.cr[0]; + pCtx->cr2 = pVM->rem.s.Env.cr[2]; + pCtx->cr3 = pVM->rem.s.Env.cr[3]; +#ifdef VBOX_WITH_RAW_MODE + if (((pVM->rem.s.Env.cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + pCtx->cr4 = pVM->rem.s.Env.cr[4]; + + for (i = 0; i < 8; i++) + pCtx->dr[i] = pVM->rem.s.Env.dr[i]; + + pCtx->gdtr.cbGdt = pVM->rem.s.Env.gdt.limit; + if (pCtx->gdtr.pGdt != pVM->rem.s.Env.gdt.base) + { + pCtx->gdtr.pGdt = pVM->rem.s.Env.gdt.base; + STAM_COUNTER_INC(&gStatREMGDTChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT); +#endif + } + + pCtx->idtr.cbIdt = pVM->rem.s.Env.idt.limit; + if (pCtx->idtr.pIdt != pVM->rem.s.Env.idt.base) + { + pCtx->idtr.pIdt = pVM->rem.s.Env.idt.base; + STAM_COUNTER_INC(&gStatREMIDTChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); +#endif + } + + if ( pCtx->ldtr.Sel != pVM->rem.s.Env.ldt.selector + || pCtx->ldtr.ValidSel != pVM->rem.s.Env.ldt.selector + || pCtx->ldtr.u64Base != pVM->rem.s.Env.ldt.base + || pCtx->ldtr.u32Limit != pVM->rem.s.Env.ldt.limit + || pCtx->ldtr.Attr.u != ((pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK) + || !(pCtx->ldtr.fFlags & CPUMSELREG_FLAGS_VALID) + ) + { + pCtx->ldtr.Sel = pVM->rem.s.Env.ldt.selector; + pCtx->ldtr.ValidSel = pVM->rem.s.Env.ldt.selector; + pCtx->ldtr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ldtr.u64Base = pVM->rem.s.Env.ldt.base; + pCtx->ldtr.u32Limit = pVM->rem.s.Env.ldt.limit; + pCtx->ldtr.Attr.u = (pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + STAM_COUNTER_INC(&gStatREMLDTRChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT); +#endif + } + + if ( pCtx->tr.Sel != pVM->rem.s.Env.tr.selector + || pCtx->tr.ValidSel != pVM->rem.s.Env.tr.selector + || pCtx->tr.u64Base != pVM->rem.s.Env.tr.base + || pCtx->tr.u32Limit != pVM->rem.s.Env.tr.limit + || pCtx->tr.Attr.u != ((pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK) + || !(pCtx->tr.fFlags & CPUMSELREG_FLAGS_VALID) + ) + { + Log(("REM: TR changed! %#x{%#llx,%#x,%#x} -> %#x{%llx,%#x,%#x}\n", + pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u, + pVM->rem.s.Env.tr.selector, (uint64_t)pVM->rem.s.Env.tr.base, pVM->rem.s.Env.tr.limit, + pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT)); + pCtx->tr.Sel = pVM->rem.s.Env.tr.selector; + pCtx->tr.ValidSel = pVM->rem.s.Env.tr.selector; + pCtx->tr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->tr.u64Base = pVM->rem.s.Env.tr.base; + pCtx->tr.u32Limit = pVM->rem.s.Env.tr.limit; + pCtx->tr.Attr.u = (pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + Assert(pCtx->tr.Attr.u & ~DESC_INTEL_UNUSABLE); + STAM_COUNTER_INC(&gStatREMTRChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + } + + /* Sysenter MSR */ + pCtx->SysEnter.cs = pVM->rem.s.Env.sysenter_cs; + pCtx->SysEnter.eip = pVM->rem.s.Env.sysenter_eip; + pCtx->SysEnter.esp = pVM->rem.s.Env.sysenter_esp; + + /* System MSRs. */ + pCtx->msrEFER = pVM->rem.s.Env.efer; + pCtx->msrSTAR = pVM->rem.s.Env.star; + pCtx->msrPAT = pVM->rem.s.Env.pat; +#ifdef TARGET_X86_64 + pCtx->msrLSTAR = pVM->rem.s.Env.lstar; + pCtx->msrCSTAR = pVM->rem.s.Env.cstar; + pCtx->msrSFMASK = pVM->rem.s.Env.fmask; + pCtx->msrKERNELGSBASE = pVM->rem.s.Env.kernelgsbase; +#endif + + /* Inhibit interrupt flag. */ + if (pVM->rem.s.Env.hflags & HF_INHIBIT_IRQ_MASK) + { + Log(("Settings VMCPU_FF_INHIBIT_INTERRUPTS at %RGv (REM)\n", (RTGCPTR)pCtx->rip)); + EMSetInhibitInterruptsPC(pVCpu, pCtx->rip); + VMCPU_FF_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + } + else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) + { + Log(("Clearing VMCPU_FF_INHIBIT_INTERRUPTS at %RGv - successor %RGv (REM#2)\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu))); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); + } + + /* Inhibit NMI flag. */ + if (pVM->rem.s.Env.hflags2 & HF2_NMI_MASK) + { + Log(("Settings VMCPU_FF_BLOCK_NMIS at %RGv (REM)\n", (RTGCPTR)pCtx->rip)); + VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS); + } + else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)) + { + Log(("Clearing VMCPU_FF_BLOCK_NMIS at %RGv (REM)\n", (RTGCPTR)pCtx->rip)); + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS); + } + + remR3TrapClear(pVM); + + /* + * Check for traps. + */ + if ( pVM->rem.s.Env.exception_index >= 0 + && pVM->rem.s.Env.exception_index < 256) + { + /* This cannot be a hardware-interrupt because exception_index < EXCP_INTERRUPT. */ + int rc; + + Log(("REMR3StateBack: Pending trap %x %d\n", pVM->rem.s.Env.exception_index, pVM->rem.s.Env.exception_is_int)); + TRPMEVENT enmType = pVM->rem.s.Env.exception_is_int == 0 ? TRPM_TRAP + : pVM->rem.s.Env.exception_is_int == EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ ? TRPM_HARDWARE_INT + : TRPM_SOFTWARE_INT; + rc = TRPMAssertTrap(pVCpu, pVM->rem.s.Env.exception_index, enmType); + AssertRC(rc); + if (enmType == TRPM_TRAP) + { + switch (pVM->rem.s.Env.exception_index) + { + case X86_XCPT_PF: + TRPMSetFaultAddress(pVCpu, pCtx->cr2); + /* fallthru */ + case X86_XCPT_TS: case X86_XCPT_NP: case X86_XCPT_SS: case X86_XCPT_GP: + case X86_XCPT_AC: case X86_XCPT_DF: /* 0 */ + TRPMSetErrorCode(pVCpu, pVM->rem.s.Env.error_code); + break; + } + } + } + + /* + * We're not longer in REM mode. + */ + CPUMR3RemLeave(pVCpu, + !VM_IS_RAW_MODE_ENABLED(pVM) + || ( pVM->rem.s.Env.segs[R_SS].newselector + | pVM->rem.s.Env.segs[R_GS].newselector + | pVM->rem.s.Env.segs[R_FS].newselector + | pVM->rem.s.Env.segs[R_ES].newselector + | pVM->rem.s.Env.segs[R_DS].newselector + | pVM->rem.s.Env.segs[R_CS].newselector) == 0 + ); + VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_REM); + pVM->rem.s.fInREM = false; + pVM->rem.s.pCtx = NULL; + pVM->rem.s.Env.pVCpu = NULL; + STAM_PROFILE_STOP(&pVM->rem.s.StatsStateBack, a); + Log2(("REMR3StateBack: returns VINF_SUCCESS\n")); + return VINF_SUCCESS; +} + + +/** + * This is called by the disassembler when it wants to update the cpu state + * before for instance doing a register dump. + */ +static void remR3StateUpdate(PVM pVM, PVMCPU pVCpu) +{ + register PCPUMCTX pCtx = pVM->rem.s.pCtx; + unsigned i; + + Assert(pVM->rem.s.fInREM); + + /* + * Copy back the registers. + * This is done in the order they are declared in the CPUMCTX structure. + */ + + PX86FXSTATE pFpuCtx = &pCtx->pXStateR3->x87; + /** @todo FOP */ + /** @todo FPUIP */ + /** @todo CS */ + /** @todo FPUDP */ + /** @todo DS */ + /** @todo Fix MXCSR support in QEMU so we don't overwrite MXCSR with 0 when we shouldn't! */ + pFpuCtx->MXCSR = 0; + pFpuCtx->MXCSR_MASK = 0; + + /** @todo check if FPU/XMM was actually used in the recompiler */ + restore_raw_fp_state(&pVM->rem.s.Env, (uint8_t *)pFpuCtx); +//// dprintf2(("FPU state CW=%04X TT=%04X SW=%04X (%04X)\n", env->fpuc, env->fpstt, env->fpus, pVMCtx->fpu.FSW)); + +#ifdef TARGET_X86_64 + pCtx->rdi = pVM->rem.s.Env.regs[R_EDI]; + pCtx->rsi = pVM->rem.s.Env.regs[R_ESI]; + pCtx->rbp = pVM->rem.s.Env.regs[R_EBP]; + pCtx->rax = pVM->rem.s.Env.regs[R_EAX]; + pCtx->rbx = pVM->rem.s.Env.regs[R_EBX]; + pCtx->rdx = pVM->rem.s.Env.regs[R_EDX]; + pCtx->rcx = pVM->rem.s.Env.regs[R_ECX]; + pCtx->r8 = pVM->rem.s.Env.regs[8]; + pCtx->r9 = pVM->rem.s.Env.regs[9]; + pCtx->r10 = pVM->rem.s.Env.regs[10]; + pCtx->r11 = pVM->rem.s.Env.regs[11]; + pCtx->r12 = pVM->rem.s.Env.regs[12]; + pCtx->r13 = pVM->rem.s.Env.regs[13]; + pCtx->r14 = pVM->rem.s.Env.regs[14]; + pCtx->r15 = pVM->rem.s.Env.regs[15]; + + pCtx->rsp = pVM->rem.s.Env.regs[R_ESP]; +#else + pCtx->edi = pVM->rem.s.Env.regs[R_EDI]; + pCtx->esi = pVM->rem.s.Env.regs[R_ESI]; + pCtx->ebp = pVM->rem.s.Env.regs[R_EBP]; + pCtx->eax = pVM->rem.s.Env.regs[R_EAX]; + pCtx->ebx = pVM->rem.s.Env.regs[R_EBX]; + pCtx->edx = pVM->rem.s.Env.regs[R_EDX]; + pCtx->ecx = pVM->rem.s.Env.regs[R_ECX]; + + pCtx->esp = pVM->rem.s.Env.regs[R_ESP]; +#endif + + SYNC_BACK_SREG(es, ES); + SYNC_BACK_SREG(cs, CS); + SYNC_BACK_SREG(ss, SS); + SYNC_BACK_SREG(ds, DS); + SYNC_BACK_SREG(fs, FS); + SYNC_BACK_SREG(gs, GS); + +#ifdef TARGET_X86_64 + pCtx->rip = pVM->rem.s.Env.eip; + pCtx->rflags.u64 = pVM->rem.s.Env.eflags; +#else + pCtx->eip = pVM->rem.s.Env.eip; + pCtx->eflags.u32 = pVM->rem.s.Env.eflags; +#endif + + pCtx->cr0 = pVM->rem.s.Env.cr[0]; + pCtx->cr2 = pVM->rem.s.Env.cr[2]; + pCtx->cr3 = pVM->rem.s.Env.cr[3]; +#ifdef VBOX_WITH_RAW_MODE + if (((pVM->rem.s.Env.cr[4] ^ pCtx->cr4) & X86_CR4_VME) && VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + pCtx->cr4 = pVM->rem.s.Env.cr[4]; + + for (i = 0; i < 8; i++) + pCtx->dr[i] = pVM->rem.s.Env.dr[i]; + + pCtx->gdtr.cbGdt = pVM->rem.s.Env.gdt.limit; + if (pCtx->gdtr.pGdt != (RTGCPTR)pVM->rem.s.Env.gdt.base) + { + pCtx->gdtr.pGdt = (RTGCPTR)pVM->rem.s.Env.gdt.base; + STAM_COUNTER_INC(&gStatREMGDTChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_GDT); +#endif + } + + pCtx->idtr.cbIdt = pVM->rem.s.Env.idt.limit; + if (pCtx->idtr.pIdt != (RTGCPTR)pVM->rem.s.Env.idt.base) + { + pCtx->idtr.pIdt = (RTGCPTR)pVM->rem.s.Env.idt.base; + STAM_COUNTER_INC(&gStatREMIDTChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_TRPM_SYNC_IDT); +#endif + } + + if ( pCtx->ldtr.Sel != pVM->rem.s.Env.ldt.selector + || pCtx->ldtr.ValidSel != pVM->rem.s.Env.ldt.selector + || pCtx->ldtr.u64Base != pVM->rem.s.Env.ldt.base + || pCtx->ldtr.u32Limit != pVM->rem.s.Env.ldt.limit + || pCtx->ldtr.Attr.u != ((pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK) + || !(pCtx->ldtr.fFlags & CPUMSELREG_FLAGS_VALID) + ) + { + pCtx->ldtr.Sel = pVM->rem.s.Env.ldt.selector; + pCtx->ldtr.ValidSel = pVM->rem.s.Env.ldt.selector; + pCtx->ldtr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->ldtr.u64Base = pVM->rem.s.Env.ldt.base; + pCtx->ldtr.u32Limit = pVM->rem.s.Env.ldt.limit; + pCtx->ldtr.Attr.u = (pVM->rem.s.Env.ldt.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + STAM_COUNTER_INC(&gStatREMLDTRChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_LDT); +#endif + } + + if ( pCtx->tr.Sel != pVM->rem.s.Env.tr.selector + || pCtx->tr.ValidSel != pVM->rem.s.Env.tr.selector + || pCtx->tr.u64Base != pVM->rem.s.Env.tr.base + || pCtx->tr.u32Limit != pVM->rem.s.Env.tr.limit + || pCtx->tr.Attr.u != ((pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK) + || !(pCtx->tr.fFlags & CPUMSELREG_FLAGS_VALID) + ) + { + Log(("REM: TR changed! %#x{%#llx,%#x,%#x} -> %#x{%llx,%#x,%#x}\n", + pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u, + pVM->rem.s.Env.tr.selector, (uint64_t)pVM->rem.s.Env.tr.base, pVM->rem.s.Env.tr.limit, + pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT)); + pCtx->tr.Sel = pVM->rem.s.Env.tr.selector; + pCtx->tr.ValidSel = pVM->rem.s.Env.tr.selector; + pCtx->tr.fFlags = CPUMSELREG_FLAGS_VALID; + pCtx->tr.u64Base = pVM->rem.s.Env.tr.base; + pCtx->tr.u32Limit = pVM->rem.s.Env.tr.limit; + pCtx->tr.Attr.u = (pVM->rem.s.Env.tr.flags >> SEL_FLAGS_SHIFT) & SEL_FLAGS_SMASK; + Assert(pCtx->tr.Attr.u & ~DESC_INTEL_UNUSABLE); + STAM_COUNTER_INC(&gStatREMTRChange); +#ifdef VBOX_WITH_RAW_MODE + if (VM_IS_RAW_MODE_ENABLED(pVM)) + VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS); +#endif + } + + /* Sysenter MSR */ + pCtx->SysEnter.cs = pVM->rem.s.Env.sysenter_cs; + pCtx->SysEnter.eip = pVM->rem.s.Env.sysenter_eip; + pCtx->SysEnter.esp = pVM->rem.s.Env.sysenter_esp; + + /* System MSRs. */ + pCtx->msrEFER = pVM->rem.s.Env.efer; + pCtx->msrSTAR = pVM->rem.s.Env.star; + pCtx->msrPAT = pVM->rem.s.Env.pat; +#ifdef TARGET_X86_64 + pCtx->msrLSTAR = pVM->rem.s.Env.lstar; + pCtx->msrCSTAR = pVM->rem.s.Env.cstar; + pCtx->msrSFMASK = pVM->rem.s.Env.fmask; + pCtx->msrKERNELGSBASE = pVM->rem.s.Env.kernelgsbase; +#endif + +} + + +/** + * Update the VMM state information if we're currently in REM. + * + * This method is used by the DBGF and PDMDevice when there is any uncertainty of whether + * we're currently executing in REM and the VMM state is invalid. This method will of + * course check that we're executing in REM before syncing any data over to the VMM. + * + * @param pVM The VM handle. + * @param pVCpu The VMCPU handle. + */ +REMR3DECL(void) REMR3StateUpdate(PVM pVM, PVMCPU pVCpu) +{ + if (pVM->rem.s.fInREM) + remR3StateUpdate(pVM, pVCpu); +} + + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM + + +/** + * Notify the recompiler about Address Gate 20 state change. + * + * This notification is required since A20 gate changes are + * initialized from a device driver and the VM might just as + * well be in REM mode as in RAW mode. + * + * @param pVM VM handle. + * @param pVCpu VMCPU handle. + * @param fEnable True if the gate should be enabled. + * False if the gate should be disabled. + */ +REMR3DECL(void) REMR3A20Set(PVM pVM, PVMCPU pVCpu, bool fEnable) +{ + LogFlow(("REMR3A20Set: fEnable=%d\n", fEnable)); + VM_ASSERT_EMT(pVM); + + /** @todo SMP and the A20 gate... */ + if (pVM->rem.s.Env.pVCpu == pVCpu) + { + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + cpu_x86_set_a20(&pVM->rem.s.Env, fEnable); + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); + } +} + + +/** + * Replays the handler notification changes + * Called in response to VM_FF_REM_HANDLER_NOTIFY from the RAW execution loop. + * + * @param pVM VM handle. + */ +REMR3DECL(void) REMR3ReplayHandlerNotifications(PVM pVM) +{ + /* + * Replay the flushes. + */ + LogFlow(("REMR3ReplayHandlerNotifications:\n")); + VM_ASSERT_EMT(pVM); + + /** @todo this isn't ensuring correct replay order. */ + if (VM_FF_TEST_AND_CLEAR(pVM, VM_FF_REM_HANDLER_NOTIFY)) + { + uint32_t idxNext; + uint32_t idxRevHead; + uint32_t idxHead; +#ifdef VBOX_STRICT + int32_t c = 0; +#endif + + /* Lockless purging of pending notifications. */ + idxHead = ASMAtomicXchgU32(&pVM->rem.s.idxPendingList, UINT32_MAX); + if (idxHead == UINT32_MAX) + return; + Assert(idxHead < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications)); + + /* + * Reverse the list to process it in FIFO order. + */ + idxRevHead = UINT32_MAX; + do + { + /* Save the index of the next rec. */ + idxNext = pVM->rem.s.aHandlerNotifications[idxHead].idxNext; + Assert(idxNext < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications) || idxNext == UINT32_MAX); + /* Push the record onto the reversed list. */ + pVM->rem.s.aHandlerNotifications[idxHead].idxNext = idxRevHead; + idxRevHead = idxHead; + Assert(++c <= RT_ELEMENTS(pVM->rem.s.aHandlerNotifications)); + /* Advance. */ + idxHead = idxNext; + } while (idxHead != UINT32_MAX); + + /* + * Loop thru the list, reinserting the record into the free list as they are + * processed to avoid having other EMTs running out of entries while we're flushing. + */ + idxHead = idxRevHead; + do + { + PREMHANDLERNOTIFICATION pCur = &pVM->rem.s.aHandlerNotifications[idxHead]; + uint32_t idxCur; + Assert(--c >= 0); + + switch (pCur->enmKind) + { + case REMHANDLERNOTIFICATIONKIND_PHYSICAL_REGISTER: + remR3NotifyHandlerPhysicalRegister(pVM, + pCur->u.PhysicalRegister.enmKind, + pCur->u.PhysicalRegister.GCPhys, + pCur->u.PhysicalRegister.cb, + pCur->u.PhysicalRegister.fHasHCHandler); + break; + + case REMHANDLERNOTIFICATIONKIND_PHYSICAL_DEREGISTER: + remR3NotifyHandlerPhysicalDeregister(pVM, + pCur->u.PhysicalDeregister.enmKind, + pCur->u.PhysicalDeregister.GCPhys, + pCur->u.PhysicalDeregister.cb, + pCur->u.PhysicalDeregister.fHasHCHandler, + pCur->u.PhysicalDeregister.fRestoreAsRAM); + break; + + case REMHANDLERNOTIFICATIONKIND_PHYSICAL_MODIFY: + remR3NotifyHandlerPhysicalModify(pVM, + pCur->u.PhysicalModify.enmKind, + pCur->u.PhysicalModify.GCPhysOld, + pCur->u.PhysicalModify.GCPhysNew, + pCur->u.PhysicalModify.cb, + pCur->u.PhysicalModify.fHasHCHandler, + pCur->u.PhysicalModify.fRestoreAsRAM); + break; + + default: + AssertReleaseMsgFailed(("enmKind=%d\n", pCur->enmKind)); + break; + } + + /* + * Advance idxHead. + */ + idxCur = idxHead; + idxHead = pCur->idxNext; + Assert(idxHead < RT_ELEMENTS(pVM->rem.s.aHandlerNotifications) || (idxHead == UINT32_MAX && c == 0)); + + /* + * Put the record back into the free list. + */ + do + { + idxNext = ASMAtomicUoReadU32(&pVM->rem.s.idxFreeList); + ASMAtomicWriteU32(&pCur->idxNext, idxNext); + ASMCompilerBarrier(); + } while (!ASMAtomicCmpXchgU32(&pVM->rem.s.idxFreeList, idxCur, idxNext)); + } while (idxHead != UINT32_MAX); + +#ifdef VBOX_STRICT + if (pVM->cCpus == 1) + { + unsigned c; + /* Check that all records are now on the free list. */ + for (c = 0, idxNext = pVM->rem.s.idxFreeList; idxNext != UINT32_MAX; + idxNext = pVM->rem.s.aHandlerNotifications[idxNext].idxNext) + c++; + AssertReleaseMsg(c == RT_ELEMENTS(pVM->rem.s.aHandlerNotifications), ("%#x != %#x, idxFreeList=%#x\n", c, RT_ELEMENTS(pVM->rem.s.aHandlerNotifications), pVM->rem.s.idxFreeList)); + } +#endif + } +} + + +/** + * Notify REM about changed code page. + * + * @returns VBox status code. + * @param pVM VM handle. + * @param pVCpu VMCPU handle. + * @param pvCodePage Code page address + */ +REMR3DECL(int) REMR3NotifyCodePageChanged(PVM pVM, PVMCPU pVCpu, RTGCPTR pvCodePage) +{ +#ifdef VBOX_REM_PROTECT_PAGES_FROM_SMC + int rc; + RTGCPHYS PhysGC; + uint64_t flags; + + VM_ASSERT_EMT(pVM); + + /* + * Get the physical page address. + */ + rc = PGMGstGetPage(pVM, pvCodePage, &flags, &PhysGC); + if (rc == VINF_SUCCESS) + { + /* + * Sync the required registers and flush the whole page. + * (Easier to do the whole page than notifying it about each physical + * byte that was changed. + */ + pVM->rem.s.Env.cr[0] = pVM->rem.s.pCtx->cr0; + pVM->rem.s.Env.cr[2] = pVM->rem.s.pCtx->cr2; + pVM->rem.s.Env.cr[3] = pVM->rem.s.pCtx->cr3; + pVM->rem.s.Env.cr[4] = pVM->rem.s.pCtx->cr4; + + tb_invalidate_phys_page_range(PhysGC, PhysGC + PAGE_SIZE - 1, 0); + } +#endif + return VINF_SUCCESS; +} + + +/** + * Notification about a successful MMR3PhysRegister() call. + * + * @param pVM VM handle. + * @param GCPhys The physical address the RAM. + * @param cb Size of the memory. + * @param fFlags Flags of the REM_NOTIFY_PHYS_RAM_FLAGS_* defines. + */ +REMR3DECL(void) REMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, unsigned fFlags) +{ + Log(("REMR3NotifyPhysRamRegister: GCPhys=%RGp cb=%RGp fFlags=%#x\n", GCPhys, cb, fFlags)); + VM_ASSERT_EMT(pVM); + + /* + * Validate input - we trust the caller. + */ + Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys); + Assert(cb); + Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb); + AssertMsg(fFlags == REM_NOTIFY_PHYS_RAM_FLAGS_RAM || fFlags == REM_NOTIFY_PHYS_RAM_FLAGS_MMIO2, ("%#x\n", fFlags)); + + /* + * Base ram? Update GCPhysLastRam. + */ + if (fFlags & REM_NOTIFY_PHYS_RAM_FLAGS_RAM) + { + if (GCPhys + (cb - 1) > pVM->rem.s.GCPhysLastRam) + { + AssertReleaseMsg(!pVM->rem.s.fGCPhysLastRamFixed, ("GCPhys=%RGp cb=%RGp\n", GCPhys, cb)); + pVM->rem.s.GCPhysLastRam = GCPhys + (cb - 1); + } + } + + /* + * Register the ram. + */ + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY); + cpu_register_physical_memory_offset(GCPhys, cb, GCPhys, GCPhys); + PDMCritSectLeave(&pVM->rem.s.CritSectRegister); + + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); +} + + +/** + * Notification about a successful MMR3PhysRomRegister() call. + * + * @param pVM VM handle. + * @param GCPhys The physical address of the ROM. + * @param cb The size of the ROM. + * @param pvCopy Pointer to the ROM copy. + * @param fShadow Whether it's currently writable shadow ROM or normal readonly ROM. + * This function will be called when ever the protection of the + * shadow ROM changes (at reset and end of POST). + */ +REMR3DECL(void) REMR3NotifyPhysRomRegister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb, void *pvCopy, bool fShadow) +{ + Log(("REMR3NotifyPhysRomRegister: GCPhys=%RGp cb=%d fShadow=%RTbool\n", GCPhys, cb, fShadow)); + VM_ASSERT_EMT(pVM); + + /* + * Validate input - we trust the caller. + */ + Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys); + Assert(cb); + Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb); + + /* + * Register the rom. + */ + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY); + cpu_register_physical_memory_offset(GCPhys, cb, GCPhys | (fShadow ? 0 : IO_MEM_ROM), GCPhys); + PDMCritSectLeave(&pVM->rem.s.CritSectRegister); + + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); +} + + +/** + * Notification about a successful memory deregistration or reservation. + * + * @param pVM VM Handle. + * @param GCPhys Start physical address. + * @param cb The size of the range. + */ +REMR3DECL(void) REMR3NotifyPhysRamDeregister(PVM pVM, RTGCPHYS GCPhys, RTUINT cb) +{ + Log(("REMR3NotifyPhysRamDeregister: GCPhys=%RGp cb=%d\n", GCPhys, cb)); + VM_ASSERT_EMT(pVM); + + /* + * Validate input - we trust the caller. + */ + Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys); + Assert(cb); + Assert(RT_ALIGN_Z(cb, PAGE_SIZE) == cb); + + /* + * Unassigning the memory. + */ + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY); + cpu_register_physical_memory_offset(GCPhys, cb, IO_MEM_UNASSIGNED, GCPhys); + PDMCritSectLeave(&pVM->rem.s.CritSectRegister); + + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); +} + + +/** + * Notification about a successful PGMR3HandlerPhysicalRegister() call. + * + * @param pVM VM Handle. + * @param enmKind Kind of access handler. + * @param GCPhys Handler range address. + * @param cb Size of the handler range. + * @param fHasHCHandler Set if the handler has a HC callback function. + * + * @remark MMR3PhysRomRegister assumes that this function will not apply the + * Handler memory type to memory which has no HC handler. + */ +static void remR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, + bool fHasHCHandler) +{ + Log(("REMR3NotifyHandlerPhysicalRegister: enmKind=%d GCPhys=%RGp cb=%RGp fHasHCHandler=%d\n", + enmKind, GCPhys, cb, fHasHCHandler)); + + VM_ASSERT_EMT(pVM); + Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys); + Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb); + + + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY); + if (enmKind == PGMPHYSHANDLERKIND_MMIO) + cpu_register_physical_memory_offset(GCPhys, cb, pVM->rem.s.iMMIOMemType, GCPhys); + else if (fHasHCHandler) + cpu_register_physical_memory_offset(GCPhys, cb, pVM->rem.s.iHandlerMemType, GCPhys); + PDMCritSectLeave(&pVM->rem.s.CritSectRegister); + + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); +} + +/** + * Notification about a successful PGMR3HandlerPhysicalRegister() call. + * + * @param pVM VM Handle. + * @param enmKind Kind of access handler. + * @param GCPhys Handler range address. + * @param cb Size of the handler range. + * @param fHasHCHandler Set if the handler has a HC callback function. + * + * @remark MMR3PhysRomRegister assumes that this function will not apply the + * Handler memory type to memory which has no HC handler. + */ +REMR3DECL(void) REMR3NotifyHandlerPhysicalRegister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, + bool fHasHCHandler) +{ + REMR3ReplayHandlerNotifications(pVM); + + remR3NotifyHandlerPhysicalRegister(pVM, enmKind, GCPhys, cb, fHasHCHandler); +} + +/** + * Notification about a successful PGMR3HandlerPhysicalDeregister() operation. + * + * @param pVM VM Handle. + * @param enmKind Kind of access handler. + * @param GCPhys Handler range address. + * @param cb Size of the handler range. + * @param fHasHCHandler Set if the handler has a HC callback function. + * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory. + */ +static void remR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, + bool fHasHCHandler, bool fRestoreAsRAM) +{ + Log(("REMR3NotifyHandlerPhysicalDeregister: enmKind=%d GCPhys=%RGp cb=%RGp fHasHCHandler=%RTbool fRestoreAsRAM=%RTbool RAM=%08x\n", + enmKind, GCPhys, cb, fHasHCHandler, fRestoreAsRAM, MMR3PhysGetRamSize(pVM))); + VM_ASSERT_EMT(pVM); + + + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY); + /** @todo this isn't right, MMIO can (in theory) be restored as RAM. */ + if (enmKind == PGMPHYSHANDLERKIND_MMIO) + cpu_register_physical_memory_offset(GCPhys, cb, IO_MEM_UNASSIGNED, GCPhys); + else if (fHasHCHandler) + { + if (!fRestoreAsRAM) + { + Assert(GCPhys > MMR3PhysGetRamSize(pVM)); + cpu_register_physical_memory_offset(GCPhys, cb, IO_MEM_UNASSIGNED, GCPhys); + } + else + { + Assert(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys); + Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb); + cpu_register_physical_memory_offset(GCPhys, cb, GCPhys, GCPhys); + } + } + PDMCritSectLeave(&pVM->rem.s.CritSectRegister); + + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); +} + +/** + * Notification about a successful PGMR3HandlerPhysicalDeregister() operation. + * + * @param pVM VM Handle. + * @param enmKind Kind of access handler. + * @param GCPhys Handler range address. + * @param cb Size of the handler range. + * @param fHasHCHandler Set if the handler has a HC callback function. + * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory. + */ +REMR3DECL(void) REMR3NotifyHandlerPhysicalDeregister(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM) +{ + REMR3ReplayHandlerNotifications(pVM); + remR3NotifyHandlerPhysicalDeregister(pVM, enmKind, GCPhys, cb, fHasHCHandler, fRestoreAsRAM); +} + + +/** + * Notification about a successful PGMR3HandlerPhysicalModify() call. + * + * @param pVM VM Handle. + * @param enmKind Kind of access handler. + * @param GCPhysOld Old handler range address. + * @param GCPhysNew New handler range address. + * @param cb Size of the handler range. + * @param fHasHCHandler Set if the handler has a HC callback function. + * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory. + */ +static void remR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM) +{ + Log(("REMR3NotifyHandlerPhysicalModify: enmKind=%d GCPhysOld=%RGp GCPhysNew=%RGp cb=%RGp fHasHCHandler=%RTbool fRestoreAsRAM=%RTbool\n", + enmKind, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM)); + VM_ASSERT_EMT(pVM); + AssertReleaseMsg(enmKind != PGMPHYSHANDLERKIND_MMIO, ("enmKind=%d\n", enmKind)); + + if (fHasHCHandler) + { + ASMAtomicIncU32(&pVM->rem.s.cIgnoreAll); + + /* + * Reset the old page. + */ + PDMCritSectEnter(&pVM->rem.s.CritSectRegister, VERR_SEM_BUSY); + if (!fRestoreAsRAM) + cpu_register_physical_memory_offset(GCPhysOld, cb, IO_MEM_UNASSIGNED, GCPhysOld); + else + { + /* This is not perfect, but it'll do for PD monitoring... */ + Assert(cb == PAGE_SIZE); + Assert(RT_ALIGN_T(GCPhysOld, PAGE_SIZE, RTGCPHYS) == GCPhysOld); + cpu_register_physical_memory_offset(GCPhysOld, cb, GCPhysOld, GCPhysOld); + } + + /* + * Update the new page. + */ + Assert(RT_ALIGN_T(GCPhysNew, PAGE_SIZE, RTGCPHYS) == GCPhysNew); + Assert(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb); + cpu_register_physical_memory_offset(GCPhysNew, cb, pVM->rem.s.iHandlerMemType, GCPhysNew); + PDMCritSectLeave(&pVM->rem.s.CritSectRegister); + + ASMAtomicDecU32(&pVM->rem.s.cIgnoreAll); + } +} + +/** + * Notification about a successful PGMR3HandlerPhysicalModify() call. + * + * @param pVM VM Handle. + * @param enmKind Kind of access handler. + * @param GCPhysOld Old handler range address. + * @param GCPhysNew New handler range address. + * @param cb Size of the handler range. + * @param fHasHCHandler Set if the handler has a HC callback function. + * @param fRestoreAsRAM Whether the to restore it as normal RAM or as unassigned memory. + */ +REMR3DECL(void) REMR3NotifyHandlerPhysicalModify(PVM pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld, RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fHasHCHandler, bool fRestoreAsRAM) +{ + REMR3ReplayHandlerNotifications(pVM); + + remR3NotifyHandlerPhysicalModify(pVM, enmKind, GCPhysOld, GCPhysNew, cb, fHasHCHandler, fRestoreAsRAM); +} + +/** + * Checks if we're handling access to this page or not. + * + * @returns true if we're trapping access. + * @returns false if we aren't. + * @param pVM The VM handle. + * @param GCPhys The physical address. + * + * @remark This function will only work correctly in VBOX_STRICT builds! + */ +REMR3DECL(bool) REMR3IsPageAccessHandled(PVM pVM, RTGCPHYS GCPhys) +{ +#ifdef VBOX_STRICT + ram_addr_t off; + REMR3ReplayHandlerNotifications(pVM); + + off = get_phys_page_offset(GCPhys); + return (off & PAGE_OFFSET_MASK) == pVM->rem.s.iHandlerMemType + || (off & PAGE_OFFSET_MASK) == pVM->rem.s.iMMIOMemType + || (off & PAGE_OFFSET_MASK) == IO_MEM_ROM; +#else + return false; +#endif +} + + +/** + * Deals with a rare case in get_phys_addr_code where the code + * is being monitored. + * + * It could also be an MMIO page, in which case we will raise a fatal error. + * + * @returns The physical address corresponding to addr. + * @param env The cpu environment. + * @param addr The virtual address. + * @param pTLBEntry The TLB entry. + * @param IoTlbEntry The I/O TLB entry address. + */ +target_ulong remR3PhysGetPhysicalAddressCode(CPUX86State *env, + target_ulong addr, + CPUTLBEntry *pTLBEntry, + target_phys_addr_t IoTlbEntry) +{ + PVM pVM = env->pVM; + + if ((IoTlbEntry & ~TARGET_PAGE_MASK) == pVM->rem.s.iHandlerMemType) + { + /* If code memory is being monitored, appropriate IOTLB entry will have + handler IO type, and addend will provide real physical address, no + matter if we store VA in TLB or not, as handlers are always passed PA */ + target_ulong ret = (IoTlbEntry & TARGET_PAGE_MASK) + addr; + return ret; + } + LogRel(("\nTrying to execute code with memory type addr_code=%RGv addend=%RGp at %RGv! (iHandlerMemType=%#x iMMIOMemType=%#x IOTLB=%RGp)\n" + "*** handlers\n", + (RTGCPTR)pTLBEntry->addr_code, (RTGCPHYS)pTLBEntry->addend, (RTGCPTR)addr, pVM->rem.s.iHandlerMemType, pVM->rem.s.iMMIOMemType, (RTGCPHYS)IoTlbEntry)); + DBGFR3Info(pVM->pUVM, "handlers", NULL, DBGFR3InfoLogRelHlp()); + LogRel(("*** mmio\n")); + DBGFR3Info(pVM->pUVM, "mmio", NULL, DBGFR3InfoLogRelHlp()); + LogRel(("*** phys\n")); + DBGFR3Info(pVM->pUVM, "phys", NULL, DBGFR3InfoLogRelHlp()); + cpu_abort(env, "Trying to execute code with memory type addr_code=%RGv addend=%RGp at %RGv. (iHandlerMemType=%#x iMMIOMemType=%#x)\n", + (RTGCPTR)pTLBEntry->addr_code, (RTGCPHYS)pTLBEntry->addend, (RTGCPTR)addr, pVM->rem.s.iHandlerMemType, pVM->rem.s.iMMIOMemType); + AssertFatalFailed(); +} + +/** + * Read guest RAM and ROM. + * + * @param SrcGCPhys The source address (guest physical). + * @param pvDst The destination address. + * @param cb Number of bytes + */ +void remR3PhysRead(RTGCPHYS SrcGCPhys, void *pvDst, unsigned cb) +{ + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + VBOXSTRICTRC rcStrict = PGMPhysRead(cpu_single_env->pVM, SrcGCPhys, pvDst, cb, PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); +#ifdef VBOX_DEBUG_PHYS + LogRel(("read(%d): %08x\n", cb, (uint32_t)SrcGCPhys)); +#endif + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +} + + +/** + * Read guest RAM and ROM, unsigned 8-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +RTCCUINTREG remR3PhysReadU8(RTGCPHYS SrcGCPhys) +{ + uint8_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU8(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("readu8: %x <- %08x\n", val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Read guest RAM and ROM, signed 8-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +RTCCINTREG remR3PhysReadS8(RTGCPHYS SrcGCPhys) +{ + int8_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU8(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("reads8: %x <- %08x\n", val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Read guest RAM and ROM, unsigned 16-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +RTCCUINTREG remR3PhysReadU16(RTGCPHYS SrcGCPhys) +{ + uint16_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU16(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("readu16: %x <- %08x\n", val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Read guest RAM and ROM, signed 16-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +RTCCINTREG remR3PhysReadS16(RTGCPHYS SrcGCPhys) +{ + int16_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU16(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("reads16: %x <- %08x\n", (uint16_t)val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Read guest RAM and ROM, unsigned 32-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +RTCCUINTREG remR3PhysReadU32(RTGCPHYS SrcGCPhys) +{ + uint32_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU32(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("readu32: %x <- %08x\n", val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Read guest RAM and ROM, signed 32-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +RTCCINTREG remR3PhysReadS32(RTGCPHYS SrcGCPhys) +{ + int32_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU32(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("reads32: %x <- %08x\n", val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Read guest RAM and ROM, unsigned 64-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +uint64_t remR3PhysReadU64(RTGCPHYS SrcGCPhys) +{ + uint64_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU64(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("readu64: %llx <- %08x\n", val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Read guest RAM and ROM, signed 64-bit. + * + * @param SrcGCPhys The source address (guest physical). + */ +int64_t remR3PhysReadS64(RTGCPHYS SrcGCPhys) +{ + int64_t val; + STAM_PROFILE_ADV_START(&gStatMemRead, a); + VBOX_CHECK_ADDR(SrcGCPhys); + val = PGMR3PhysReadU64(cpu_single_env->pVM, SrcGCPhys, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemRead, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("reads64: %llx <- %08x\n", val, (uint32_t)SrcGCPhys)); +#endif + return val; +} + + +/** + * Write guest RAM. + * + * @param DstGCPhys The destination address (guest physical). + * @param pvSrc The source address. + * @param cb Number of bytes to write + */ +void remR3PhysWrite(RTGCPHYS DstGCPhys, const void *pvSrc, unsigned cb) +{ + STAM_PROFILE_ADV_START(&gStatMemWrite, a); + VBOX_CHECK_ADDR(DstGCPhys); + VBOXSTRICTRC rcStrict = PGMPhysWrite(cpu_single_env->pVM, DstGCPhys, pvSrc, cb, PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); + STAM_PROFILE_ADV_STOP(&gStatMemWrite, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("write(%d): %08x\n", cb, (uint32_t)DstGCPhys)); +#endif +} + + +/** + * Write guest RAM, unsigned 8-bit. + * + * @param DstGCPhys The destination address (guest physical). + * @param val Value + */ +void remR3PhysWriteU8(RTGCPHYS DstGCPhys, uint8_t val) +{ + STAM_PROFILE_ADV_START(&gStatMemWrite, a); + VBOX_CHECK_ADDR(DstGCPhys); + PGMR3PhysWriteU8(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemWrite, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("writeu8: %x -> %08x\n", val, (uint32_t)DstGCPhys)); +#endif +} + + +/** + * Write guest RAM, unsigned 8-bit. + * + * @param DstGCPhys The destination address (guest physical). + * @param val Value + */ +void remR3PhysWriteU16(RTGCPHYS DstGCPhys, uint16_t val) +{ + STAM_PROFILE_ADV_START(&gStatMemWrite, a); + VBOX_CHECK_ADDR(DstGCPhys); + PGMR3PhysWriteU16(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemWrite, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("writeu16: %x -> %08x\n", val, (uint32_t)DstGCPhys)); +#endif +} + + +/** + * Write guest RAM, unsigned 32-bit. + * + * @param DstGCPhys The destination address (guest physical). + * @param val Value + */ +void remR3PhysWriteU32(RTGCPHYS DstGCPhys, uint32_t val) +{ + STAM_PROFILE_ADV_START(&gStatMemWrite, a); + VBOX_CHECK_ADDR(DstGCPhys); + PGMR3PhysWriteU32(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemWrite, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("writeu32: %x -> %08x\n", val, (uint32_t)DstGCPhys)); +#endif +} + + +/** + * Write guest RAM, unsigned 64-bit. + * + * @param DstGCPhys The destination address (guest physical). + * @param val Value + */ +void remR3PhysWriteU64(RTGCPHYS DstGCPhys, uint64_t val) +{ + STAM_PROFILE_ADV_START(&gStatMemWrite, a); + VBOX_CHECK_ADDR(DstGCPhys); + PGMR3PhysWriteU64(cpu_single_env->pVM, DstGCPhys, val, PGMACCESSORIGIN_REM); + STAM_PROFILE_ADV_STOP(&gStatMemWrite, a); +#ifdef VBOX_DEBUG_PHYS + LogRel(("writeu64: %llx -> %08x\n", val, (uint32_t)DstGCPhys)); +#endif +} + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM_MMIO + +/** Read MMIO memory. */ +static uint32_t remR3MMIOReadU8(void *pvEnv, target_phys_addr_t GCPhys) +{ + CPUX86State *env = (CPUX86State *)pvEnv; + uint32_t u32 = 0; + int rc = IOMMMIORead(env->pVM, env->pVCpu, GCPhys, &u32, 1); + AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc); + Log2(("remR3MMIOReadU8: GCPhys=%RGp -> %02x\n", (RTGCPHYS)GCPhys, u32)); + return u32; +} + +/** Read MMIO memory. */ +static uint32_t remR3MMIOReadU16(void *pvEnv, target_phys_addr_t GCPhys) +{ + CPUX86State *env = (CPUX86State *)pvEnv; + uint32_t u32 = 0; + int rc = IOMMMIORead(env->pVM, env->pVCpu, GCPhys, &u32, 2); + AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc); + Log2(("remR3MMIOReadU16: GCPhys=%RGp -> %04x\n", (RTGCPHYS)GCPhys, u32)); + return u32; +} + +/** Read MMIO memory. */ +static uint32_t remR3MMIOReadU32(void *pvEnv, target_phys_addr_t GCPhys) +{ + CPUX86State *env = (CPUX86State *)pvEnv; + uint32_t u32 = 0; + int rc = IOMMMIORead(env->pVM, env->pVCpu, GCPhys, &u32, 4); + AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc); + Log2(("remR3MMIOReadU32: GCPhys=%RGp -> %08x\n", (RTGCPHYS)GCPhys, u32)); + return u32; +} + +/** Write to MMIO memory. */ +static void remR3MMIOWriteU8(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32) +{ + CPUX86State *env = (CPUX86State *)pvEnv; + int rc; + Log2(("remR3MMIOWriteU8: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32)); + rc = IOMMMIOWrite(env->pVM, env->pVCpu, GCPhys, u32, 1); + AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc); +} + +/** Write to MMIO memory. */ +static void remR3MMIOWriteU16(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32) +{ + CPUX86State *env = (CPUX86State *)pvEnv; + int rc; + Log2(("remR3MMIOWriteU16: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32)); + rc = IOMMMIOWrite(env->pVM, env->pVCpu, GCPhys, u32, 2); + AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc); +} + +/** Write to MMIO memory. */ +static void remR3MMIOWriteU32(void *pvEnv, target_phys_addr_t GCPhys, uint32_t u32) +{ + CPUX86State *env = (CPUX86State *)pvEnv; + int rc; + Log2(("remR3MMIOWriteU32: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32)); + rc = IOMMMIOWrite(env->pVM, env->pVCpu, GCPhys, u32, 4); + AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc)); NOREF(rc); +} + + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM_HANDLER + +/* !!!WARNING!!! This is extremely hackish right now, we assume it's only for LFB access! !!!WARNING!!! */ + +static uint32_t remR3HandlerReadU8(void *pvVM, target_phys_addr_t GCPhys) +{ + uint8_t u8; + Log2(("remR3HandlerReadU8: GCPhys=%RGp\n", (RTGCPHYS)GCPhys)); + VBOXSTRICTRC rcStrict = PGMPhysRead((PVM)pvVM, GCPhys, &u8, sizeof(u8), PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); + return u8; +} + +static uint32_t remR3HandlerReadU16(void *pvVM, target_phys_addr_t GCPhys) +{ + uint16_t u16; + Log2(("remR3HandlerReadU16: GCPhys=%RGp\n", (RTGCPHYS)GCPhys)); + VBOXSTRICTRC rcStrict = PGMPhysRead((PVM)pvVM, GCPhys, &u16, sizeof(u16), PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); + return u16; +} + +static uint32_t remR3HandlerReadU32(void *pvVM, target_phys_addr_t GCPhys) +{ + uint32_t u32; + Log2(("remR3HandlerReadU32: GCPhys=%RGp\n", (RTGCPHYS)GCPhys)); + VBOXSTRICTRC rcStrict = PGMPhysRead((PVM)pvVM, GCPhys, &u32, sizeof(u32), PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); + return u32; +} + +static void remR3HandlerWriteU8(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32) +{ + Log2(("remR3HandlerWriteU8: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32)); + VBOXSTRICTRC rcStrict = PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint8_t), PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); +} + +static void remR3HandlerWriteU16(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32) +{ + Log2(("remR3HandlerWriteU16: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32)); + VBOXSTRICTRC rcStrict = PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint16_t), PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); +} + +static void remR3HandlerWriteU32(void *pvVM, target_phys_addr_t GCPhys, uint32_t u32) +{ + Log2(("remR3HandlerWriteU32: GCPhys=%RGp u32=%#x\n", (RTGCPHYS)GCPhys, u32)); + VBOXSTRICTRC rcStrict = PGMPhysWrite((PVM)pvVM, GCPhys, &u32, sizeof(uint32_t), PGMACCESSORIGIN_REM); + AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); NOREF(rcStrict); +} + +/* -+- disassembly -+- */ + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM_DISAS + + +/** + * Enables or disables singled stepped disassembly. + * + * @returns VBox status code. + * @param pVM VM handle. + * @param fEnable To enable set this flag, to disable clear it. + */ +static DECLCALLBACK(int) remR3DisasEnableStepping(PVM pVM, bool fEnable) +{ + LogFlow(("remR3DisasEnableStepping: fEnable=%d\n", fEnable)); + VM_ASSERT_EMT(pVM); + + if (fEnable) + pVM->rem.s.Env.state |= CPU_EMULATE_SINGLE_STEP; + else + pVM->rem.s.Env.state &= ~CPU_EMULATE_SINGLE_STEP; +#ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING + cpu_single_step(&pVM->rem.s.Env, fEnable); +#endif + return VINF_SUCCESS; +} + + +/** + * Enables or disables singled stepped disassembly. + * + * @returns VBox status code. + * @param pVM VM handle. + * @param fEnable To enable set this flag, to disable clear it. + */ +REMR3DECL(int) REMR3DisasEnableStepping(PVM pVM, bool fEnable) +{ + int rc; + + LogFlow(("REMR3DisasEnableStepping: fEnable=%d\n", fEnable)); + if (VM_IS_EMT(pVM)) + return remR3DisasEnableStepping(pVM, fEnable); + + rc = VMR3ReqPriorityCallWait(pVM, VMCPUID_ANY, (PFNRT)remR3DisasEnableStepping, 2, pVM, fEnable); + AssertRC(rc); + return rc; +} + + +#ifdef VBOX_WITH_DEBUGGER +/** + * External Debugger Command: .remstep [on|off|1|0] + */ +static DECLCALLBACK(int) remR3CmdDisasEnableStepping(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PUVM pUVM, + PCDBGCVAR paArgs, unsigned cArgs) +{ + int rc; + PVM pVM = pUVM->pVM; + + if (cArgs == 0) + /* + * Print the current status. + */ + rc = DBGCCmdHlpPrintf(pCmdHlp, "DisasStepping is %s\n", + pVM->rem.s.Env.state & CPU_EMULATE_SINGLE_STEP ? "enabled" : "disabled"); + else + { + /* + * Convert the argument and change the mode. + */ + bool fEnable; + rc = DBGCCmdHlpVarToBool(pCmdHlp, &paArgs[0], &fEnable); + if (RT_SUCCESS(rc)) + { + rc = REMR3DisasEnableStepping(pVM, fEnable); + if (RT_SUCCESS(rc)) + rc = DBGCCmdHlpPrintf(pCmdHlp, "DisasStepping was %s\n", fEnable ? "enabled" : "disabled"); + else + rc = DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "REMR3DisasEnableStepping"); + } + else + rc = DBGCCmdHlpFailRc(pCmdHlp, pCmd, rc, "DBGCCmdHlpVarToBool"); + } + return rc; +} +#endif /* VBOX_WITH_DEBUGGER */ + + +/** + * Disassembles one instruction and prints it to the log. + * + * @returns Success indicator. + * @param env Pointer to the recompiler CPU structure. + * @param f32BitCode Indicates that whether or not the code should + * be disassembled as 16 or 32 bit. If -1 the CS + * selector will be inspected. + * @param pszPrefix + */ +bool remR3DisasInstr(CPUX86State *env, int f32BitCode, char *pszPrefix) +{ + PVM pVM = env->pVM; + const bool fLog = LogIsEnabled(); + const bool fLog2 = LogIs2Enabled(); + int rc = VINF_SUCCESS; + + /* + * Don't bother if there ain't any log output to do. + */ + if (!fLog && !fLog2) + return true; + + /* + * Update the state so DBGF reads the correct register values. + */ + remR3StateUpdate(pVM, env->pVCpu); + + /* + * Log registers if requested. + */ + if (fLog2) + DBGFR3_INFO_LOG(pVM, env->pVCpu, "cpumguest", pszPrefix); + + /* + * Disassemble to log. + */ + if (fLog) + { + PVMCPU pVCpu = VMMGetCpu(pVM); + char szBuf[256]; + szBuf[0] = '\0'; + int rc = DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, + pVCpu->idCpu, + 0, /* Sel */ 0, /* GCPtr */ + DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE, + szBuf, + sizeof(szBuf), + NULL); + if (RT_FAILURE(rc)) + RTStrPrintf(szBuf, sizeof(szBuf), "DBGFR3DisasInstrEx failed with rc=%Rrc\n", rc); + if (pszPrefix && *pszPrefix) + RTLogPrintf("%s-CPU%d: %s\n", pszPrefix, pVCpu->idCpu, szBuf); + else + RTLogPrintf("CPU%d: %s\n", pVCpu->idCpu, szBuf); + } + + return RT_SUCCESS(rc); +} + + +/** + * Disassemble recompiled code. + * + * @param phFileIgnored Ignored, logfile usually. + * @param pvCode Pointer to the code block. + * @param cb Size of the code block. + */ +void disas(FILE *phFileIgnored, void *pvCode, unsigned long cb) +{ + if (LogIs2Enabled()) + { + unsigned off = 0; + char szOutput[256]; + DISCPUSTATE Cpu; +#ifdef RT_ARCH_X86 + DISCPUMODE enmCpuMode = DISCPUMODE_32BIT; +#else + DISCPUMODE enmCpuMode = DISCPUMODE_64BIT; +#endif + + RTLogPrintf("Recompiled Code: %p %#lx (%ld) bytes\n", pvCode, cb, cb); + while (off < cb) + { + uint32_t cbInstr; + int rc = DISInstrToStr((uint8_t const *)pvCode + off, enmCpuMode, + &Cpu, &cbInstr, szOutput, sizeof(szOutput)); + if (RT_SUCCESS(rc)) + RTLogPrintf("%s", szOutput); + else + { + RTLogPrintf("disas error %Rrc\n", rc); + cbInstr = 1; + } + off += cbInstr; + } + } +} + + +/** + * Disassemble guest code. + * + * @param phFileIgnored Ignored, logfile usually. + * @param uCode The guest address of the code to disassemble. (flat?) + * @param cb Number of bytes to disassemble. + * @param fFlags Flags, probably something which tells if this is 16, 32 or 64 bit code. + */ +void target_disas(FILE *phFileIgnored, target_ulong uCode, target_ulong cb, int fFlags) +{ + if (LogIs2Enabled()) + { + PVM pVM = cpu_single_env->pVM; + PVMCPU pVCpu = cpu_single_env->pVCpu; + RTSEL cs; + RTGCUINTPTR eip; + + Assert(pVCpu); + + /* + * Update the state so DBGF reads the correct register values (flags). + */ + remR3StateUpdate(pVM, pVCpu); + + /* + * Do the disassembling. + */ + RTLogPrintf("Guest Code: PC=%llx %llx bytes fFlags=%d\n", (uint64_t)uCode, (uint64_t)cb, fFlags); + cs = cpu_single_env->segs[R_CS].selector; + eip = uCode - cpu_single_env->segs[R_CS].base; + for (;;) + { + char szBuf[256]; + uint32_t cbInstr; + int rc = DBGFR3DisasInstrEx(pVM->pUVM, + pVCpu->idCpu, + cs, + eip, + DBGF_DISAS_FLAGS_DEFAULT_MODE, + szBuf, sizeof(szBuf), + &cbInstr); + if (RT_SUCCESS(rc)) + RTLogPrintf("%llx %s\n", (uint64_t)uCode, szBuf); + else + { + RTLogPrintf("%llx %04x:%llx: %s\n", (uint64_t)uCode, cs, (uint64_t)eip, szBuf); + cbInstr = 1; + } + + /* next */ + if (cb <= cbInstr) + break; + cb -= cbInstr; + uCode += cbInstr; + eip += cbInstr; + } + } +} + + +/** + * Looks up a guest symbol. + * + * @returns Pointer to symbol name. This is a static buffer. + * @param orig_addr The address in question. + */ +const char *lookup_symbol(target_ulong orig_addr) +{ + PVM pVM = cpu_single_env->pVM; + RTGCINTPTR off = 0; + RTDBGSYMBOL Sym; + DBGFADDRESS Addr; + + int rc = DBGFR3AsSymbolByAddr(pVM->pUVM, DBGF_AS_GLOBAL, DBGFR3AddrFromFlat(pVM->pUVM, &Addr, orig_addr), + RTDBGSYMADDR_FLAGS_LESS_OR_EQUAL | RTDBGSYMADDR_FLAGS_SKIP_ABS_IN_DEFERRED, + &off, &Sym, NULL /*phMod*/); + if (RT_SUCCESS(rc)) + { + static char szSym[sizeof(Sym.szName) + 48]; + if (!off) + RTStrPrintf(szSym, sizeof(szSym), "%s\n", Sym.szName); + else if (off > 0) + RTStrPrintf(szSym, sizeof(szSym), "%s+%x\n", Sym.szName, off); + else + RTStrPrintf(szSym, sizeof(szSym), "%s-%x\n", Sym.szName, -off); + return szSym; + } + return "<N/A>"; +} + + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM + + +/* -+- FF notifications -+- */ + +/** + * Notification about the interrupt FF being set. + * + * @param pVM VM Handle. + * @param pVCpu VMCPU Handle. + * @thread The emulation thread. + */ +REMR3DECL(void) REMR3NotifyInterruptSet(PVM pVM, PVMCPU pVCpu) +{ + LogFlow(("REMR3NotifyInterruptSet: fInRem=%d interrupts %s\n", pVM->rem.s.fInREM, + (pVM->rem.s.Env.eflags & IF_MASK) && !(pVM->rem.s.Env.hflags & HF_INHIBIT_IRQ_MASK) ? "enabled" : "disabled")); + if (pVM->rem.s.fInREM) + ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_HARD); +} + + +/** + * Notification about the interrupt FF being set. + * + * @param pVM VM Handle. + * @param pVCpu VMCPU Handle. + * @thread Any. + */ +REMR3DECL(void) REMR3NotifyInterruptClear(PVM pVM, PVMCPU pVCpu) +{ + LogFlow(("REMR3NotifyInterruptClear:\n")); + if (pVM->rem.s.fInREM) + cpu_reset_interrupt(cpu_single_env, CPU_INTERRUPT_HARD); +} + + +/** + * Notification about pending timer(s). + * + * @param pVM VM Handle. + * @param pVCpuDst The target cpu for this notification. + * TM will not broadcast pending timer events, but use + * a dedicated EMT for them. So, only interrupt REM + * execution if the given CPU is executing in REM. + * @thread Any. + */ +REMR3DECL(void) REMR3NotifyTimerPending(PVM pVM, PVMCPU pVCpuDst) +{ +#ifndef DEBUG_bird + LogFlow(("REMR3NotifyTimerPending: fInRem=%d\n", pVM->rem.s.fInREM)); +#endif + if (pVM->rem.s.fInREM) + { + if (pVM->rem.s.Env.pVCpu == pVCpuDst) + { + LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("REMR3NotifyTimerPending: setting\n")); + ASMAtomicOrS32((int32_t volatile *)&pVM->rem.s.Env.interrupt_request, + CPU_INTERRUPT_EXTERNAL_TIMER); + } + else + LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("REMR3NotifyTimerPending: pVCpu:%p != pVCpuDst:%p\n", pVM->rem.s.Env.pVCpu, pVCpuDst)); + } + else + LogIt(RTLOGGRPFLAGS_LEVEL_5, LOG_GROUP_TM, ("REMR3NotifyTimerPending: !fInREM; cpu state=%d\n", VMCPU_GET_STATE(pVCpuDst))); +} + + +/** + * Notification about pending DMA transfers. + * + * @param pVM VM Handle. + * @thread Any. + */ +REMR3DECL(void) REMR3NotifyDmaPending(PVM pVM) +{ + LogFlow(("REMR3NotifyDmaPending: fInRem=%d\n", pVM->rem.s.fInREM)); + if (pVM->rem.s.fInREM) + ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_DMA); +} + + +/** + * Notification about pending timer(s). + * + * @param pVM VM Handle. + * @thread Any. + */ +REMR3DECL(void) REMR3NotifyQueuePending(PVM pVM) +{ + LogFlow(("REMR3NotifyQueuePending: fInRem=%d\n", pVM->rem.s.fInREM)); + if (pVM->rem.s.fInREM) + ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_EXIT); +} + + +/** + * Notification about pending FF set by an external thread. + * + * @param pVM VM handle. + * @thread Any. + */ +REMR3DECL(void) REMR3NotifyFF(PVM pVM) +{ + LogFlow(("REMR3NotifyFF: fInRem=%d\n", pVM->rem.s.fInREM)); + if (pVM->rem.s.fInREM) + ASMAtomicOrS32((int32_t volatile *)&cpu_single_env->interrupt_request, CPU_INTERRUPT_EXTERNAL_EXIT); +} + + +#ifdef VBOX_WITH_STATISTICS +void remR3ProfileStart(int statcode) +{ + STAMPROFILEADV *pStat; + switch(statcode) + { + case STATS_EMULATE_SINGLE_INSTR: + pStat = &gStatExecuteSingleInstr; + break; + case STATS_QEMU_COMPILATION: + pStat = &gStatCompilationQEmu; + break; + case STATS_QEMU_RUN_EMULATED_CODE: + pStat = &gStatRunCodeQEmu; + break; + case STATS_QEMU_TOTAL: + pStat = &gStatTotalTimeQEmu; + break; + case STATS_QEMU_RUN_TIMERS: + pStat = &gStatTimers; + break; + case STATS_TLB_LOOKUP: + pStat= &gStatTBLookup; + break; + case STATS_IRQ_HANDLING: + pStat= &gStatIRQ; + break; + case STATS_RAW_CHECK: + pStat = &gStatRawCheck; + break; + + default: + AssertMsgFailed(("unknown stat %d\n", statcode)); + return; + } + STAM_PROFILE_ADV_START(pStat, a); +} + + +void remR3ProfileStop(int statcode) +{ + STAMPROFILEADV *pStat; + switch(statcode) + { + case STATS_EMULATE_SINGLE_INSTR: + pStat = &gStatExecuteSingleInstr; + break; + case STATS_QEMU_COMPILATION: + pStat = &gStatCompilationQEmu; + break; + case STATS_QEMU_RUN_EMULATED_CODE: + pStat = &gStatRunCodeQEmu; + break; + case STATS_QEMU_TOTAL: + pStat = &gStatTotalTimeQEmu; + break; + case STATS_QEMU_RUN_TIMERS: + pStat = &gStatTimers; + break; + case STATS_TLB_LOOKUP: + pStat= &gStatTBLookup; + break; + case STATS_IRQ_HANDLING: + pStat= &gStatIRQ; + break; + case STATS_RAW_CHECK: + pStat = &gStatRawCheck; + break; + default: + AssertMsgFailed(("unknown stat %d\n", statcode)); + return; + } + STAM_PROFILE_ADV_STOP(pStat, a); +} +#endif + +/** + * Raise an RC, force rem exit. + * + * @param pVM VM handle. + * @param rc The rc. + */ +void remR3RaiseRC(PVM pVM, int rc) +{ + Log(("remR3RaiseRC: rc=%Rrc\n", rc)); + Assert(pVM->rem.s.fInREM); + VM_ASSERT_EMT(pVM); + pVM->rem.s.rc = rc; + cpu_interrupt(&pVM->rem.s.Env, CPU_INTERRUPT_RC); +} + + +/* -+- timers -+- */ + +uint64_t cpu_get_tsc(CPUX86State *env) +{ + STAM_COUNTER_INC(&gStatCpuGetTSC); + return TMCpuTickGet(env->pVCpu); +} + + +/* -+- interrupts -+- */ + +void cpu_set_ferr(CPUX86State *env) +{ + int rc = PDMIsaSetIrq(env->pVM, 13, 1, 0 /*uTagSrc*/); + LogFlow(("cpu_set_ferr: rc=%d\n", rc)); NOREF(rc); +} + +int cpu_get_pic_interrupt(CPUX86State *env) +{ + uint8_t u8Interrupt; + int rc; + + if (VMCPU_FF_TEST_AND_CLEAR(env->pVCpu, VMCPU_FF_UPDATE_APIC)) + APICUpdatePendingInterrupts(env->pVCpu); + + /* When we fail to forward interrupts directly in raw mode, we fall back to the recompiler. + * In that case we can't call PDMGetInterrupt anymore, because it has already cleared the interrupt + * with the (a)pic. + */ + /* Note! We assume we will go directly to the recompiler to handle the pending interrupt! */ + rc = PDMGetInterrupt(env->pVCpu, &u8Interrupt); + LogFlow(("cpu_get_pic_interrupt: u8Interrupt=%d rc=%Rrc pc=%04x:%08llx ~flags=%08llx\n", + u8Interrupt, rc, env->segs[R_CS].selector, (uint64_t)env->eip, (uint64_t)env->eflags)); + if (RT_SUCCESS(rc)) + { + if (VMCPU_FF_IS_ANY_SET(env->pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)) + env->interrupt_request |= CPU_INTERRUPT_HARD; + return u8Interrupt; + } + return -1; +} + + +/* -+- local apic -+- */ + +#if 0 /* CPUMSetGuestMsr does this now. */ +void cpu_set_apic_base(CPUX86State *env, uint64_t val) +{ + int rc = PDMApicSetBase(env->pVM, val); + LogFlow(("cpu_set_apic_base: val=%#llx rc=%Rrc\n", val, rc)); NOREF(rc); +} +#endif + +uint64_t cpu_get_apic_base(CPUX86State *env) +{ + uint64_t u64; + VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(env->pVCpu, MSR_IA32_APICBASE, &u64); + if (RT_SUCCESS(rcStrict)) + { + LogFlow(("cpu_get_apic_base: returns %#llx \n", u64)); + return u64; + } + LogFlow(("cpu_get_apic_base: returns 0 (rc=%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict))); + return 0; +} + +void cpu_set_apic_tpr(CPUX86State *env, uint8_t val) +{ + int rc = APICSetTpr(env->pVCpu, val << 4); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ + LogFlow(("cpu_set_apic_tpr: val=%#x rc=%Rrc\n", val, rc)); NOREF(rc); +} + +uint8_t cpu_get_apic_tpr(CPUX86State *env) +{ + uint8_t u8; + int rc = APICGetTpr(env->pVCpu, &u8, NULL, NULL); + if (RT_SUCCESS(rc)) + { + LogFlow(("cpu_get_apic_tpr: returns %#x\n", u8)); + return u8 >> 4; /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ + } + LogFlow(("cpu_get_apic_tpr: returns 0 (rc=%Rrc)\n", rc)); + return 0; +} + +/** + * Read an MSR. + * + * @retval 0 success. + * @retval -1 failure, raise \#GP(0). + * @param env The cpu state. + * @param idMsr The MSR to read. + * @param puValue Where to return the value. + */ +int cpu_rdmsr(CPUX86State *env, uint32_t idMsr, uint64_t *puValue) +{ + Assert(env->pVCpu); + return CPUMQueryGuestMsr(env->pVCpu, idMsr, puValue) == VINF_SUCCESS ? 0 : -1; +} + +/** + * Write to an MSR. + * + * @retval 0 success. + * @retval -1 failure, raise \#GP(0). + * @param env The cpu state. + * @param idMsr The MSR to write to. + * @param uValue The value to write. + */ +int cpu_wrmsr(CPUX86State *env, uint32_t idMsr, uint64_t uValue) +{ + Assert(env->pVCpu); + return CPUMSetGuestMsr(env->pVCpu, idMsr, uValue) == VINF_SUCCESS ? 0 : -1; +} + +/* -+- I/O Ports -+- */ + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM_IOPORT + +void cpu_outb(CPUX86State *env, pio_addr_t addr, uint8_t val) +{ + int rc; + + if (addr != 0x80 && addr != 0x70 && addr != 0x61) + Log2(("cpu_outb: addr=%#06x val=%#x\n", addr, val)); + + rc = IOMIOPortWrite(env->pVM, env->pVCpu, (RTIOPORT)addr, val, 1); + if (RT_LIKELY(rc == VINF_SUCCESS)) + return; + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + Log(("cpu_outb: addr=%#06x val=%#x -> %Rrc\n", addr, val, rc)); + remR3RaiseRC(env->pVM, rc); + return; + } + remAbort(rc, __FUNCTION__); +} + +void cpu_outw(CPUX86State *env, pio_addr_t addr, uint16_t val) +{ + //Log2(("cpu_outw: addr=%#06x val=%#x\n", addr, val)); + int rc = IOMIOPortWrite(env->pVM, env->pVCpu, (RTIOPORT)addr, val, 2); + if (RT_LIKELY(rc == VINF_SUCCESS)) + return; + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + Log(("cpu_outw: addr=%#06x val=%#x -> %Rrc\n", addr, val, rc)); + remR3RaiseRC(env->pVM, rc); + return; + } + remAbort(rc, __FUNCTION__); +} + +void cpu_outl(CPUX86State *env, pio_addr_t addr, uint32_t val) +{ + int rc; + Log2(("cpu_outl: addr=%#06x val=%#x\n", addr, val)); + rc = IOMIOPortWrite(env->pVM, env->pVCpu, (RTIOPORT)addr, val, 4); + if (RT_LIKELY(rc == VINF_SUCCESS)) + return; + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + Log(("cpu_outl: addr=%#06x val=%#x -> %Rrc\n", addr, val, rc)); + remR3RaiseRC(env->pVM, rc); + return; + } + remAbort(rc, __FUNCTION__); +} + +uint8_t cpu_inb(CPUX86State *env, pio_addr_t addr) +{ + uint32_t u32 = 0; + int rc = IOMIOPortRead(env->pVM, env->pVCpu, (RTIOPORT)addr, &u32, 1); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + if (/*addr != 0x61 && */addr != 0x71) + Log2(("cpu_inb: addr=%#06x -> %#x\n", addr, u32)); + return (uint8_t)u32; + } + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + Log(("cpu_inb: addr=%#06x -> %#x rc=%Rrc\n", addr, u32, rc)); + remR3RaiseRC(env->pVM, rc); + return (uint8_t)u32; + } + remAbort(rc, __FUNCTION__); + return UINT8_C(0xff); +} + +uint16_t cpu_inw(CPUX86State *env, pio_addr_t addr) +{ + uint32_t u32 = 0; + int rc = IOMIOPortRead(env->pVM, env->pVCpu, (RTIOPORT)addr, &u32, 2); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + Log2(("cpu_inw: addr=%#06x -> %#x\n", addr, u32)); + return (uint16_t)u32; + } + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + Log(("cpu_inw: addr=%#06x -> %#x rc=%Rrc\n", addr, u32, rc)); + remR3RaiseRC(env->pVM, rc); + return (uint16_t)u32; + } + remAbort(rc, __FUNCTION__); + return UINT16_C(0xffff); +} + +uint32_t cpu_inl(CPUX86State *env, pio_addr_t addr) +{ + uint32_t u32 = 0; + int rc = IOMIOPortRead(env->pVM, env->pVCpu, (RTIOPORT)addr, &u32, 4); + if (RT_LIKELY(rc == VINF_SUCCESS)) + { + Log2(("cpu_inl: addr=%#06x -> %#x\n", addr, u32)); + return u32; + } + if (rc >= VINF_EM_FIRST && rc <= VINF_EM_LAST) + { + Log(("cpu_inl: addr=%#06x -> %#x rc=%Rrc\n", addr, u32, rc)); + remR3RaiseRC(env->pVM, rc); + return u32; + } + remAbort(rc, __FUNCTION__); + return UINT32_C(0xffffffff); +} + +#undef LOG_GROUP +#define LOG_GROUP LOG_GROUP_REM + + +/* -+- helpers and misc other interfaces -+- */ + +/** + * Perform the CPUID instruction. + * + * @param env Pointer to the recompiler CPU structure. + * @param idx The CPUID leaf (eax). + * @param idxSub The CPUID sub-leaf (ecx) where applicable. + * @param pEAX Where to store eax. + * @param pEBX Where to store ebx. + * @param pECX Where to store ecx. + * @param pEDX Where to store edx. + */ +void cpu_x86_cpuid(CPUX86State *env, uint32_t idx, uint32_t idxSub, + uint32_t *pEAX, uint32_t *pEBX, uint32_t *pECX, uint32_t *pEDX) +{ + NOREF(idxSub); + CPUMGetGuestCpuId(env->pVCpu, idx, idxSub, pEAX, pEBX, pECX, pEDX); +} + + +#if 0 /* not used */ +/** + * Interface for qemu hardware to report back fatal errors. + */ +void hw_error(const char *pszFormat, ...) +{ + /* + * Bitch about it. + */ + /** @todo Add support for nested arg lists in the LogPrintfV routine! I've code for + * this in my Odin32 tree at home! */ + va_list args; + va_start(args, pszFormat); + RTLogPrintf("fatal error in virtual hardware:"); + RTLogPrintfV(pszFormat, args); + va_end(args); + AssertReleaseMsgFailed(("fatal error in virtual hardware: %s\n", pszFormat)); + + /* + * If we're in REM context we'll sync back the state before 'jumping' to + * the EMs failure handling. + */ + PVM pVM = cpu_single_env->pVM; + if (pVM->rem.s.fInREM) + REMR3StateBack(pVM); + EMR3FatalError(pVM, VERR_REM_VIRTUAL_HARDWARE_ERROR); + AssertMsgFailed(("EMR3FatalError returned!\n")); +} +#endif + +/** + * Interface for the qemu cpu to report unhandled situation + * raising a fatal VM error. + */ +void cpu_abort(CPUX86State *env, const char *pszFormat, ...) +{ + va_list va; + PVM pVM; + PVMCPU pVCpu; + char szMsg[256]; + + /* + * Bitch about it. + */ + RTLogFlags(NULL, "nodisabled nobuffered"); + RTLogFlush(NULL); + + va_start(va, pszFormat); +#if defined(RT_OS_WINDOWS) && ARCH_BITS == 64 + /* It's a bit complicated when mixing MSC and GCC on AMD64. This is a bit ugly, but it works. */ + unsigned cArgs = 0; + uintptr_t auArgs[6] = {0,0,0,0,0,0}; + const char *psz = strchr(pszFormat, '%'); + while (psz && cArgs < 6) + { + auArgs[cArgs++] = va_arg(va, uintptr_t); + psz = strchr(psz + 1, '%'); + } + switch (cArgs) + { + case 1: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0]); break; + case 2: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1]); break; + case 3: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2]); break; + case 4: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2], auArgs[3]); break; + case 5: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2], auArgs[3], auArgs[4]); break; + case 6: RTStrPrintf(szMsg, sizeof(szMsg), pszFormat, auArgs[0], auArgs[1], auArgs[2], auArgs[3], auArgs[4], auArgs[5]); break; + default: + case 0: RTStrPrintf(szMsg, sizeof(szMsg), "%s", pszFormat); break; + } +#else + RTStrPrintfV(szMsg, sizeof(szMsg), pszFormat, va); +#endif + va_end(va); + + RTLogPrintf("fatal error in recompiler cpu: %s\n", szMsg); + RTLogRelPrintf("fatal error in recompiler cpu: %s\n", szMsg); + + /* + * If we're in REM context we'll sync back the state before 'jumping' to + * the EMs failure handling. + */ + pVM = cpu_single_env->pVM; + pVCpu = cpu_single_env->pVCpu; + Assert(pVCpu); + + if (pVM->rem.s.fInREM) + REMR3StateBack(pVM, pVCpu); + EMR3FatalError(pVCpu, VERR_REM_VIRTUAL_CPU_ERROR); + AssertMsgFailed(("EMR3FatalError returned!\n")); +} + + +/** + * Aborts the VM. + * + * @param rc VBox error code. + * @param pszTip Hint about why/when this happened. + */ +void remAbort(int rc, const char *pszTip) +{ + PVM pVM; + PVMCPU pVCpu; + + /* + * Bitch about it. + */ + RTLogPrintf("internal REM fatal error: rc=%Rrc %s\n", rc, pszTip); + AssertReleaseMsgFailed(("internal REM fatal error: rc=%Rrc %s\n", rc, pszTip)); + + /* + * Jump back to where we entered the recompiler. + */ + pVM = cpu_single_env->pVM; + pVCpu = cpu_single_env->pVCpu; + Assert(pVCpu); + + if (pVM->rem.s.fInREM) + REMR3StateBack(pVM, pVCpu); + + EMR3FatalError(pVCpu, rc); + AssertMsgFailed(("EMR3FatalError returned!\n")); +} + + +/** + * Dumps a linux system call. + * @param pVCpu VMCPU handle. + */ +void remR3DumpLnxSyscall(PVMCPU pVCpu) +{ + static const char *apsz[] = + { + "sys_restart_syscall", /* 0 - old "setup()" system call, used for restarting */ + "sys_exit", + "sys_fork", + "sys_read", + "sys_write", + "sys_open", /* 5 */ + "sys_close", + "sys_waitpid", + "sys_creat", + "sys_link", + "sys_unlink", /* 10 */ + "sys_execve", + "sys_chdir", + "sys_time", + "sys_mknod", + "sys_chmod", /* 15 */ + "sys_lchown16", + "sys_ni_syscall", /* old break syscall holder */ + "sys_stat", + "sys_lseek", + "sys_getpid", /* 20 */ + "sys_mount", + "sys_oldumount", + "sys_setuid16", + "sys_getuid16", + "sys_stime", /* 25 */ + "sys_ptrace", + "sys_alarm", + "sys_fstat", + "sys_pause", + "sys_utime", /* 30 */ + "sys_ni_syscall", /* old stty syscall holder */ + "sys_ni_syscall", /* old gtty syscall holder */ + "sys_access", + "sys_nice", + "sys_ni_syscall", /* 35 - old ftime syscall holder */ + "sys_sync", + "sys_kill", + "sys_rename", + "sys_mkdir", + "sys_rmdir", /* 40 */ + "sys_dup", + "sys_pipe", + "sys_times", + "sys_ni_syscall", /* old prof syscall holder */ + "sys_brk", /* 45 */ + "sys_setgid16", + "sys_getgid16", + "sys_signal", + "sys_geteuid16", + "sys_getegid16", /* 50 */ + "sys_acct", + "sys_umount", /* recycled never used phys() */ + "sys_ni_syscall", /* old lock syscall holder */ + "sys_ioctl", + "sys_fcntl", /* 55 */ + "sys_ni_syscall", /* old mpx syscall holder */ + "sys_setpgid", + "sys_ni_syscall", /* old ulimit syscall holder */ + "sys_olduname", + "sys_umask", /* 60 */ + "sys_chroot", + "sys_ustat", + "sys_dup2", + "sys_getppid", + "sys_getpgrp", /* 65 */ + "sys_setsid", + "sys_sigaction", + "sys_sgetmask", + "sys_ssetmask", + "sys_setreuid16", /* 70 */ + "sys_setregid16", + "sys_sigsuspend", + "sys_sigpending", + "sys_sethostname", + "sys_setrlimit", /* 75 */ + "sys_old_getrlimit", + "sys_getrusage", + "sys_gettimeofday", + "sys_settimeofday", + "sys_getgroups16", /* 80 */ + "sys_setgroups16", + "old_select", + "sys_symlink", + "sys_lstat", + "sys_readlink", /* 85 */ + "sys_uselib", + "sys_swapon", + "sys_reboot", + "old_readdir", + "old_mmap", /* 90 */ + "sys_munmap", + "sys_truncate", + "sys_ftruncate", + "sys_fchmod", + "sys_fchown16", /* 95 */ + "sys_getpriority", + "sys_setpriority", + "sys_ni_syscall", /* old profil syscall holder */ + "sys_statfs", + "sys_fstatfs", /* 100 */ + "sys_ioperm", + "sys_socketcall", + "sys_syslog", + "sys_setitimer", + "sys_getitimer", /* 105 */ + "sys_newstat", + "sys_newlstat", + "sys_newfstat", + "sys_uname", + "sys_iopl", /* 110 */ + "sys_vhangup", + "sys_ni_syscall", /* old "idle" system call */ + "sys_vm86old", + "sys_wait4", + "sys_swapoff", /* 115 */ + "sys_sysinfo", + "sys_ipc", + "sys_fsync", + "sys_sigreturn", + "sys_clone", /* 120 */ + "sys_setdomainname", + "sys_newuname", + "sys_modify_ldt", + "sys_adjtimex", + "sys_mprotect", /* 125 */ + "sys_sigprocmask", + "sys_ni_syscall", /* old "create_module" */ + "sys_init_module", + "sys_delete_module", + "sys_ni_syscall", /* 130: old "get_kernel_syms" */ + "sys_quotactl", + "sys_getpgid", + "sys_fchdir", + "sys_bdflush", + "sys_sysfs", /* 135 */ + "sys_personality", + "sys_ni_syscall", /* reserved for afs_syscall */ + "sys_setfsuid16", + "sys_setfsgid16", + "sys_llseek", /* 140 */ + "sys_getdents", + "sys_select", + "sys_flock", + "sys_msync", + "sys_readv", /* 145 */ + "sys_writev", + "sys_getsid", + "sys_fdatasync", + "sys_sysctl", + "sys_mlock", /* 150 */ + "sys_munlock", + "sys_mlockall", + "sys_munlockall", + "sys_sched_setparam", + "sys_sched_getparam", /* 155 */ + "sys_sched_setscheduler", + "sys_sched_getscheduler", + "sys_sched_yield", + "sys_sched_get_priority_max", + "sys_sched_get_priority_min", /* 160 */ + "sys_sched_rr_get_interval", + "sys_nanosleep", + "sys_mremap", + "sys_setresuid16", + "sys_getresuid16", /* 165 */ + "sys_vm86", + "sys_ni_syscall", /* Old sys_query_module */ + "sys_poll", + "sys_nfsservctl", + "sys_setresgid16", /* 170 */ + "sys_getresgid16", + "sys_prctl", + "sys_rt_sigreturn", + "sys_rt_sigaction", + "sys_rt_sigprocmask", /* 175 */ + "sys_rt_sigpending", + "sys_rt_sigtimedwait", + "sys_rt_sigqueueinfo", + "sys_rt_sigsuspend", + "sys_pread64", /* 180 */ + "sys_pwrite64", + "sys_chown16", + "sys_getcwd", + "sys_capget", + "sys_capset", /* 185 */ + "sys_sigaltstack", + "sys_sendfile", + "sys_ni_syscall", /* reserved for streams1 */ + "sys_ni_syscall", /* reserved for streams2 */ + "sys_vfork", /* 190 */ + "sys_getrlimit", + "sys_mmap2", + "sys_truncate64", + "sys_ftruncate64", + "sys_stat64", /* 195 */ + "sys_lstat64", + "sys_fstat64", + "sys_lchown", + "sys_getuid", + "sys_getgid", /* 200 */ + "sys_geteuid", + "sys_getegid", + "sys_setreuid", + "sys_setregid", + "sys_getgroups", /* 205 */ + "sys_setgroups", + "sys_fchown", + "sys_setresuid", + "sys_getresuid", + "sys_setresgid", /* 210 */ + "sys_getresgid", + "sys_chown", + "sys_setuid", + "sys_setgid", + "sys_setfsuid", /* 215 */ + "sys_setfsgid", + "sys_pivot_root", + "sys_mincore", + "sys_madvise", + "sys_getdents64", /* 220 */ + "sys_fcntl64", + "sys_ni_syscall", /* reserved for TUX */ + "sys_ni_syscall", + "sys_gettid", + "sys_readahead", /* 225 */ + "sys_setxattr", + "sys_lsetxattr", + "sys_fsetxattr", + "sys_getxattr", + "sys_lgetxattr", /* 230 */ + "sys_fgetxattr", + "sys_listxattr", + "sys_llistxattr", + "sys_flistxattr", + "sys_removexattr", /* 235 */ + "sys_lremovexattr", + "sys_fremovexattr", + "sys_tkill", + "sys_sendfile64", + "sys_futex", /* 240 */ + "sys_sched_setaffinity", + "sys_sched_getaffinity", + "sys_set_thread_area", + "sys_get_thread_area", + "sys_io_setup", /* 245 */ + "sys_io_destroy", + "sys_io_getevents", + "sys_io_submit", + "sys_io_cancel", + "sys_fadvise64", /* 250 */ + "sys_ni_syscall", + "sys_exit_group", + "sys_lookup_dcookie", + "sys_epoll_create", + "sys_epoll_ctl", /* 255 */ + "sys_epoll_wait", + "sys_remap_file_pages", + "sys_set_tid_address", + "sys_timer_create", + "sys_timer_settime", /* 260 */ + "sys_timer_gettime", + "sys_timer_getoverrun", + "sys_timer_delete", + "sys_clock_settime", + "sys_clock_gettime", /* 265 */ + "sys_clock_getres", + "sys_clock_nanosleep", + "sys_statfs64", + "sys_fstatfs64", + "sys_tgkill", /* 270 */ + "sys_utimes", + "sys_fadvise64_64", + "sys_ni_syscall" /* sys_vserver */ + }; + + uint32_t uEAX = CPUMGetGuestEAX(pVCpu); + switch (uEAX) + { + default: + if (uEAX < RT_ELEMENTS(apsz)) + Log(("REM: linux syscall %3d: %s (eip=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x ebp=%08x)\n", + uEAX, apsz[uEAX], CPUMGetGuestEIP(pVCpu), CPUMGetGuestEBX(pVCpu), CPUMGetGuestECX(pVCpu), + CPUMGetGuestEDX(pVCpu), CPUMGetGuestESI(pVCpu), CPUMGetGuestEDI(pVCpu), CPUMGetGuestEBP(pVCpu))); + else + Log(("eip=%08x: linux syscall %d (#%x) unknown\n", CPUMGetGuestEIP(pVCpu), uEAX, uEAX)); + break; + + } +} + + +/** + * Dumps an OpenBSD system call. + * @param pVCpu VMCPU handle. + */ +void remR3DumpOBsdSyscall(PVMCPU pVCpu) +{ + static const char *apsz[] = + { + "SYS_syscall", //0 + "SYS_exit", //1 + "SYS_fork", //2 + "SYS_read", //3 + "SYS_write", //4 + "SYS_open", //5 + "SYS_close", //6 + "SYS_wait4", //7 + "SYS_8", + "SYS_link", //9 + "SYS_unlink", //10 + "SYS_11", + "SYS_chdir", //12 + "SYS_fchdir", //13 + "SYS_mknod", //14 + "SYS_chmod", //15 + "SYS_chown", //16 + "SYS_break", //17 + "SYS_18", + "SYS_19", + "SYS_getpid", //20 + "SYS_mount", //21 + "SYS_unmount", //22 + "SYS_setuid", //23 + "SYS_getuid", //24 + "SYS_geteuid", //25 + "SYS_ptrace", //26 + "SYS_recvmsg", //27 + "SYS_sendmsg", //28 + "SYS_recvfrom", //29 + "SYS_accept", //30 + "SYS_getpeername", //31 + "SYS_getsockname", //32 + "SYS_access", //33 + "SYS_chflags", //34 + "SYS_fchflags", //35 + "SYS_sync", //36 + "SYS_kill", //37 + "SYS_38", + "SYS_getppid", //39 + "SYS_40", + "SYS_dup", //41 + "SYS_opipe", //42 + "SYS_getegid", //43 + "SYS_profil", //44 + "SYS_ktrace", //45 + "SYS_sigaction", //46 + "SYS_getgid", //47 + "SYS_sigprocmask", //48 + "SYS_getlogin", //49 + "SYS_setlogin", //50 + "SYS_acct", //51 + "SYS_sigpending", //52 + "SYS_osigaltstack", //53 + "SYS_ioctl", //54 + "SYS_reboot", //55 + "SYS_revoke", //56 + "SYS_symlink", //57 + "SYS_readlink", //58 + "SYS_execve", //59 + "SYS_umask", //60 + "SYS_chroot", //61 + "SYS_62", + "SYS_63", + "SYS_64", + "SYS_65", + "SYS_vfork", //66 + "SYS_67", + "SYS_68", + "SYS_sbrk", //69 + "SYS_sstk", //70 + "SYS_61", + "SYS_vadvise", //72 + "SYS_munmap", //73 + "SYS_mprotect", //74 + "SYS_madvise", //75 + "SYS_76", + "SYS_77", + "SYS_mincore", //78 + "SYS_getgroups", //79 + "SYS_setgroups", //80 + "SYS_getpgrp", //81 + "SYS_setpgid", //82 + "SYS_setitimer", //83 + "SYS_84", + "SYS_85", + "SYS_getitimer", //86 + "SYS_87", + "SYS_88", + "SYS_89", + "SYS_dup2", //90 + "SYS_91", + "SYS_fcntl", //92 + "SYS_select", //93 + "SYS_94", + "SYS_fsync", //95 + "SYS_setpriority", //96 + "SYS_socket", //97 + "SYS_connect", //98 + "SYS_99", + "SYS_getpriority", //100 + "SYS_101", + "SYS_102", + "SYS_sigreturn", //103 + "SYS_bind", //104 + "SYS_setsockopt", //105 + "SYS_listen", //106 + "SYS_107", + "SYS_108", + "SYS_109", + "SYS_110", + "SYS_sigsuspend", //111 + "SYS_112", + "SYS_113", + "SYS_114", + "SYS_115", + "SYS_gettimeofday", //116 + "SYS_getrusage", //117 + "SYS_getsockopt", //118 + "SYS_119", + "SYS_readv", //120 + "SYS_writev", //121 + "SYS_settimeofday", //122 + "SYS_fchown", //123 + "SYS_fchmod", //124 + "SYS_125", + "SYS_setreuid", //126 + "SYS_setregid", //127 + "SYS_rename", //128 + "SYS_129", + "SYS_130", + "SYS_flock", //131 + "SYS_mkfifo", //132 + "SYS_sendto", //133 + "SYS_shutdown", //134 + "SYS_socketpair", //135 + "SYS_mkdir", //136 + "SYS_rmdir", //137 + "SYS_utimes", //138 + "SYS_139", + "SYS_adjtime", //140 + "SYS_141", + "SYS_142", + "SYS_143", + "SYS_144", + "SYS_145", + "SYS_146", + "SYS_setsid", //147 + "SYS_quotactl", //148 + "SYS_149", + "SYS_150", + "SYS_151", + "SYS_152", + "SYS_153", + "SYS_154", + "SYS_nfssvc", //155 + "SYS_156", + "SYS_157", + "SYS_158", + "SYS_159", + "SYS_160", + "SYS_getfh", //161 + "SYS_162", + "SYS_163", + "SYS_164", + "SYS_sysarch", //165 + "SYS_166", + "SYS_167", + "SYS_168", + "SYS_169", + "SYS_170", + "SYS_171", + "SYS_172", + "SYS_pread", //173 + "SYS_pwrite", //174 + "SYS_175", + "SYS_176", + "SYS_177", + "SYS_178", + "SYS_179", + "SYS_180", + "SYS_setgid", //181 + "SYS_setegid", //182 + "SYS_seteuid", //183 + "SYS_lfs_bmapv", //184 + "SYS_lfs_markv", //185 + "SYS_lfs_segclean", //186 + "SYS_lfs_segwait", //187 + "SYS_188", + "SYS_189", + "SYS_190", + "SYS_pathconf", //191 + "SYS_fpathconf", //192 + "SYS_swapctl", //193 + "SYS_getrlimit", //194 + "SYS_setrlimit", //195 + "SYS_getdirentries", //196 + "SYS_mmap", //197 + "SYS___syscall", //198 + "SYS_lseek", //199 + "SYS_truncate", //200 + "SYS_ftruncate", //201 + "SYS___sysctl", //202 + "SYS_mlock", //203 + "SYS_munlock", //204 + "SYS_205", + "SYS_futimes", //206 + "SYS_getpgid", //207 + "SYS_xfspioctl", //208 + "SYS_209", + "SYS_210", + "SYS_211", + "SYS_212", + "SYS_213", + "SYS_214", + "SYS_215", + "SYS_216", + "SYS_217", + "SYS_218", + "SYS_219", + "SYS_220", + "SYS_semget", //221 + "SYS_222", + "SYS_223", + "SYS_224", + "SYS_msgget", //225 + "SYS_msgsnd", //226 + "SYS_msgrcv", //227 + "SYS_shmat", //228 + "SYS_229", + "SYS_shmdt", //230 + "SYS_231", + "SYS_clock_gettime", //232 + "SYS_clock_settime", //233 + "SYS_clock_getres", //234 + "SYS_235", + "SYS_236", + "SYS_237", + "SYS_238", + "SYS_239", + "SYS_nanosleep", //240 + "SYS_241", + "SYS_242", + "SYS_243", + "SYS_244", + "SYS_245", + "SYS_246", + "SYS_247", + "SYS_248", + "SYS_249", + "SYS_minherit", //250 + "SYS_rfork", //251 + "SYS_poll", //252 + "SYS_issetugid", //253 + "SYS_lchown", //254 + "SYS_getsid", //255 + "SYS_msync", //256 + "SYS_257", + "SYS_258", + "SYS_259", + "SYS_getfsstat", //260 + "SYS_statfs", //261 + "SYS_fstatfs", //262 + "SYS_pipe", //263 + "SYS_fhopen", //264 + "SYS_265", + "SYS_fhstatfs", //266 + "SYS_preadv", //267 + "SYS_pwritev", //268 + "SYS_kqueue", //269 + "SYS_kevent", //270 + "SYS_mlockall", //271 + "SYS_munlockall", //272 + "SYS_getpeereid", //273 + "SYS_274", + "SYS_275", + "SYS_276", + "SYS_277", + "SYS_278", + "SYS_279", + "SYS_280", + "SYS_getresuid", //281 + "SYS_setresuid", //282 + "SYS_getresgid", //283 + "SYS_setresgid", //284 + "SYS_285", + "SYS_mquery", //286 + "SYS_closefrom", //287 + "SYS_sigaltstack", //288 + "SYS_shmget", //289 + "SYS_semop", //290 + "SYS_stat", //291 + "SYS_fstat", //292 + "SYS_lstat", //293 + "SYS_fhstat", //294 + "SYS___semctl", //295 + "SYS_shmctl", //296 + "SYS_msgctl", //297 + "SYS_MAXSYSCALL", //298 + //299 + //300 + }; + uint32_t uEAX; + if (!LogIsEnabled()) + return; + uEAX = CPUMGetGuestEAX(pVCpu); + switch (uEAX) + { + default: + if (uEAX < RT_ELEMENTS(apsz)) + { + uint32_t au32Args[8] = {0}; + PGMPhysSimpleReadGCPtr(pVCpu, au32Args, CPUMGetGuestESP(pVCpu), sizeof(au32Args)); + RTLogPrintf("REM: OpenBSD syscall %3d: %s (eip=%08x %08x %08x %08x %08x %08x %08x %08x %08x)\n", + uEAX, apsz[uEAX], CPUMGetGuestEIP(pVCpu), au32Args[0], au32Args[1], au32Args[2], au32Args[3], + au32Args[4], au32Args[5], au32Args[6], au32Args[7]); + } + else + RTLogPrintf("eip=%08x: OpenBSD syscall %d (#%x) unknown!!\n", CPUMGetGuestEIP(pVCpu), uEAX, uEAX); + break; + } +} + + +#if defined(IPRT_NO_CRT) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_X86) +/** + * The Dll main entry point (stub). + */ +bool __stdcall _DllMainCRTStartup(void *hModule, uint32_t dwReason, void *pvReserved) +{ + return true; +} + +void *memcpy(void *dst, const void *src, size_t size) +{ + uint8_t*pbDst = dst, *pbSrc = src; + while (size-- > 0) + *pbDst++ = *pbSrc++; + return dst; +} + +#endif + +void cpu_smm_update(CPUX86State *env) +{ +} diff --git a/src/recompiler/bswap.h b/src/recompiler/bswap.h new file mode 100644 index 00000000..4377c7b0 --- /dev/null +++ b/src/recompiler/bswap.h @@ -0,0 +1,225 @@ +#ifndef BSWAP_H +#define BSWAP_H + +#include "config-host.h" + +#include <inttypes.h> + +#ifdef CONFIG_MACHINE_BSWAP_H +#include <sys/endian.h> +#include <sys/types.h> +#include <machine/bswap.h> +#else + +#ifdef CONFIG_BYTESWAP_H +#include <byteswap.h> +#else + +#define bswap_16(x) __extension__ /* <- VBOX */ \ +({ \ + uint16_t __x = (x); \ + ((uint16_t)( \ + (((uint16_t)(__x) & (uint16_t)0x00ffU) << 8) | \ + (((uint16_t)(__x) & (uint16_t)0xff00U) >> 8) )); \ +}) + +#define bswap_32(x) __extension__ /* <- VBOX */ \ +({ \ + uint32_t __x = (x); \ + ((uint32_t)( \ + (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \ + (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \ + (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \ + (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \ +}) + +#define bswap_64(x) __extension__ /* <- VBOX */ \ +({ \ + uint64_t __x = (x); \ + ((uint64_t)( \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \ +}) + +#endif /* !CONFIG_BYTESWAP_H */ + +static inline uint16_t bswap16(uint16_t x) +{ + return bswap_16(x); +} + +static inline uint32_t bswap32(uint32_t x) +{ + return bswap_32(x); +} + +static inline uint64_t bswap64(uint64_t x) +{ + return bswap_64(x); +} + +#endif /* ! CONFIG_MACHINE_BSWAP_H */ + +static inline void bswap16s(uint16_t *s) +{ + *s = bswap16(*s); +} + +static inline void bswap32s(uint32_t *s) +{ + *s = bswap32(*s); +} + +static inline void bswap64s(uint64_t *s) +{ + *s = bswap64(*s); +} + +#if defined(HOST_WORDS_BIGENDIAN) +#define be_bswap(v, size) (v) +#define le_bswap(v, size) bswap ## size(v) +#define be_bswaps(v, size) +#define le_bswaps(p, size) *p = bswap ## size(*p); +#else +#define le_bswap(v, size) (v) +#define be_bswap(v, size) bswap ## size(v) +#define le_bswaps(v, size) +#define be_bswaps(p, size) *p = bswap ## size(*p); +#endif + +#define CPU_CONVERT(endian, size, type)\ +static inline type endian ## size ## _to_cpu(type v)\ +{\ + return endian ## _bswap(v, size);\ +}\ +\ +static inline type cpu_to_ ## endian ## size(type v)\ +{\ + return endian ## _bswap(v, size);\ +}\ +\ +static inline void endian ## size ## _to_cpus(type *p)\ +{\ + endian ## _bswaps(p, size)\ +}\ +\ +static inline void cpu_to_ ## endian ## size ## s(type *p)\ +{\ + endian ## _bswaps(p, size)\ +}\ +\ +static inline type endian ## size ## _to_cpup(const type *p)\ +{\ + return endian ## size ## _to_cpu(*p);\ +}\ +\ +static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\ +{\ + *p = cpu_to_ ## endian ## size(v);\ +} + +CPU_CONVERT(be, 16, uint16_t) +CPU_CONVERT(be, 32, uint32_t) +CPU_CONVERT(be, 64, uint64_t) + +CPU_CONVERT(le, 16, uint16_t) +CPU_CONVERT(le, 32, uint32_t) +CPU_CONVERT(le, 64, uint64_t) + +/* unaligned versions (optimized for frequent unaligned accesses)*/ + +#if defined(__i386__) || defined(_ARCH_PPC) + +#define cpu_to_le16wu(p, v) cpu_to_le16w(p, v) +#define cpu_to_le32wu(p, v) cpu_to_le32w(p, v) +#define le16_to_cpupu(p) le16_to_cpup(p) +#define le32_to_cpupu(p) le32_to_cpup(p) +#define be32_to_cpupu(p) be32_to_cpup(p) + +#define cpu_to_be16wu(p, v) cpu_to_be16w(p, v) +#define cpu_to_be32wu(p, v) cpu_to_be32w(p, v) + +#else + +static inline void cpu_to_le16wu(uint16_t *p, uint16_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v & 0xff; + p1[1] = v >> 8; +} + +static inline void cpu_to_le32wu(uint32_t *p, uint32_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v & 0xff; + p1[1] = v >> 8; + p1[2] = v >> 16; + p1[3] = v >> 24; +} + +static inline uint16_t le16_to_cpupu(const uint16_t *p) +{ + const uint8_t *p1 = (const uint8_t *)p; + return p1[0] | (p1[1] << 8); +} + +static inline uint32_t le32_to_cpupu(const uint32_t *p) +{ + const uint8_t *p1 = (const uint8_t *)p; + return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24); +} + +static inline uint32_t be32_to_cpupu(const uint32_t *p) +{ + const uint8_t *p1 = (const uint8_t *)p; + return p1[3] | (p1[2] << 8) | (p1[1] << 16) | (p1[0] << 24); +} + +static inline void cpu_to_be16wu(uint16_t *p, uint16_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v >> 8; + p1[1] = v & 0xff; +} + +static inline void cpu_to_be32wu(uint32_t *p, uint32_t v) +{ + uint8_t *p1 = (uint8_t *)p; + + p1[0] = v >> 24; + p1[1] = v >> 16; + p1[2] = v >> 8; + p1[3] = v & 0xff; +} + +#endif + +#ifdef HOST_WORDS_BIGENDIAN +#define cpu_to_32wu cpu_to_be32wu +#define leul_to_cpu(v) glue(glue(le,HOST_LONG_BITS),_to_cpu)(v) +#else +#define cpu_to_32wu cpu_to_le32wu +#define leul_to_cpu(v) (v) +#endif + +#undef le_bswap +#undef be_bswap +#undef le_bswaps +#undef be_bswaps + +/* len must be one of 1, 2, 4 */ +static inline uint32_t qemu_bswap_len(uint32_t value, int len) +{ + return bswap32(value) >> (32 - 8 * len); +} + +#endif /* BSWAP_H */ diff --git a/src/recompiler/cache-utils.h b/src/recompiler/cache-utils.h new file mode 100644 index 00000000..5d5d44f4 --- /dev/null +++ b/src/recompiler/cache-utils.h @@ -0,0 +1,41 @@ +#ifndef QEMU_CACHE_UTILS_H +#define QEMU_CACHE_UTILS_H + +#if defined(_ARCH_PPC) +struct qemu_cache_conf { + unsigned long dcache_bsize; + unsigned long icache_bsize; +}; + +extern struct qemu_cache_conf qemu_cache_conf; + +extern void qemu_cache_utils_init(char **envp); + +/* mildly adjusted code from tcg-dyngen.c */ +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + unsigned long p, start1, stop1; + unsigned long dsize = qemu_cache_conf.dcache_bsize; + unsigned long isize = qemu_cache_conf.icache_bsize; + + start1 = start & ~(dsize - 1); + stop1 = (stop + dsize - 1) & ~(dsize - 1); + for (p = start1; p < stop1; p += dsize) { + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + + start &= start & ~(isize - 1); + stop1 = (stop + isize - 1) & ~(isize - 1); + for (p = start1; p < stop1; p += isize) { + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + asm volatile ("isync" : : : "memory"); +} + +#else +#define qemu_cache_utils_init(envp) do { (void) (envp); } while (0) +#endif + +#endif /* QEMU_CACHE_UTILS_H */ diff --git a/src/recompiler/cpu-all.h b/src/recompiler/cpu-all.h new file mode 100644 index 00000000..128c672e --- /dev/null +++ b/src/recompiler/cpu-all.h @@ -0,0 +1,1184 @@ +/* + * defines common to all virtual CPUs + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#ifndef CPU_ALL_H +#define CPU_ALL_H + +#ifdef VBOX +# ifndef LOG_GROUP +# define LOG_GROUP LOG_GROUP_REM +# endif +# include <VBox/log.h> +# include <VBox/vmm/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */ +#endif /* VBOX */ +#include "qemu-common.h" +#include "cpu-common.h" + +/* some important defines: + * + * WORDS_ALIGNED : if defined, the host cpu can only make word aligned + * memory accesses. + * + * HOST_WORDS_BIGENDIAN : if defined, the host cpu is big endian and + * otherwise little endian. + * + * (TARGET_WORDS_ALIGNED : same for target cpu (not supported yet)) + * + * TARGET_WORDS_BIGENDIAN : same for target cpu + */ + +#include "softfloat.h" + +#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN) +#define BSWAP_NEEDED +#endif + +#ifdef BSWAP_NEEDED + +static inline uint16_t tswap16(uint16_t s) +{ + return bswap16(s); +} + +static inline uint32_t tswap32(uint32_t s) +{ + return bswap32(s); +} + +static inline uint64_t tswap64(uint64_t s) +{ + return bswap64(s); +} + +static inline void tswap16s(uint16_t *s) +{ + *s = bswap16(*s); +} + +static inline void tswap32s(uint32_t *s) +{ + *s = bswap32(*s); +} + +static inline void tswap64s(uint64_t *s) +{ + *s = bswap64(*s); +} + +#else + +static inline uint16_t tswap16(uint16_t s) +{ + return s; +} + +static inline uint32_t tswap32(uint32_t s) +{ + return s; +} + +static inline uint64_t tswap64(uint64_t s) +{ + return s; +} + +static inline void tswap16s(uint16_t *s) +{ +} + +static inline void tswap32s(uint32_t *s) +{ +} + +static inline void tswap64s(uint64_t *s) +{ +} + +#endif + +#if TARGET_LONG_SIZE == 4 +#define tswapl(s) tswap32(s) +#define tswapls(s) tswap32s((uint32_t *)(s)) +#define bswaptls(s) bswap32s(s) +#else +#define tswapl(s) tswap64(s) +#define tswapls(s) tswap64s((uint64_t *)(s)) +#define bswaptls(s) bswap64s(s) +#endif + +typedef union { + float32 f; + uint32_t l; +} CPU_FloatU; + +/* NOTE: arm FPA is horrible as double 32 bit words are stored in big + endian ! */ +typedef union { + float64 d; +#if defined(HOST_WORDS_BIGENDIAN) \ + || (defined(__arm__) && !defined(__VFP_FP__) && !defined(CONFIG_SOFTFLOAT)) + struct { + uint32_t upper; + uint32_t lower; + } l; +#else + struct { + uint32_t lower; + uint32_t upper; + } l; +#endif + uint64_t ll; +} CPU_DoubleU; + +#ifdef TARGET_SPARC +typedef union { + float128 q; +#if defined(HOST_WORDS_BIGENDIAN) \ + || (defined(__arm__) && !defined(__VFP_FP__) && !defined(CONFIG_SOFTFLOAT)) + struct { + uint32_t upmost; + uint32_t upper; + uint32_t lower; + uint32_t lowest; + } l; + struct { + uint64_t upper; + uint64_t lower; + } ll; +#else + struct { + uint32_t lowest; + uint32_t lower; + uint32_t upper; + uint32_t upmost; + } l; + struct { + uint64_t lower; + uint64_t upper; + } ll; +#endif +} CPU_QuadU; +#endif + +/* CPU memory access without any memory or io remapping */ + +/* + * the generic syntax for the memory accesses is: + * + * load: ld{type}{sign}{size}{endian}_{access_type}(ptr) + * + * store: st{type}{size}{endian}_{access_type}(ptr, val) + * + * type is: + * (empty): integer access + * f : float access + * + * sign is: + * (empty): for floats or 32 bit size + * u : unsigned + * s : signed + * + * size is: + * b: 8 bits + * w: 16 bits + * l: 32 bits + * q: 64 bits + * + * endian is: + * (empty): target cpu endianness or 8 bit access + * r : reversed target cpu endianness (not implemented yet) + * be : big endian (not implemented yet) + * le : little endian (not implemented yet) + * + * access_type is: + * raw : host memory access + * user : user mode access using soft MMU + * kernel : kernel mode access using soft MMU + */ + +#ifdef VBOX +void remAbort(int rc, const char *pszTip) __attribute__((__noreturn__)); + +void remR3PhysRead(RTGCPHYS SrcGCPhys, void *pvDst, unsigned cb); +RTCCUINTREG remR3PhysReadU8(RTGCPHYS SrcGCPhys); +RTCCINTREG remR3PhysReadS8(RTGCPHYS SrcGCPhys); +RTCCUINTREG remR3PhysReadU16(RTGCPHYS SrcGCPhys); +RTCCINTREG remR3PhysReadS16(RTGCPHYS SrcGCPhys); +RTCCUINTREG remR3PhysReadU32(RTGCPHYS SrcGCPhys); +RTCCINTREG remR3PhysReadS32(RTGCPHYS SrcGCPhys); +uint64_t remR3PhysReadU64(RTGCPHYS SrcGCPhys); +int64_t remR3PhysReadS64(RTGCPHYS SrcGCPhys); +void remR3PhysWrite(RTGCPHYS DstGCPhys, const void *pvSrc, unsigned cb); +void remR3PhysWriteU8(RTGCPHYS DstGCPhys, uint8_t val); +void remR3PhysWriteU16(RTGCPHYS DstGCPhys, uint16_t val); +void remR3PhysWriteU32(RTGCPHYS DstGCPhys, uint32_t val); +void remR3PhysWriteU64(RTGCPHYS DstGCPhys, uint64_t val); + +# ifndef REM_PHYS_ADDR_IN_TLB +void *remR3TlbGCPhys2Ptr(CPUState *env1, target_ulong physAddr, int fWritable); +# endif + +#endif /* VBOX */ + +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) + +DECLINLINE(uint8_t) ldub_p(const void *ptr) +{ + VBOX_CHECK_ADDR(ptr); + return remR3PhysReadU8((uintptr_t)ptr); +} + +DECLINLINE(int8_t) ldsb_p(const void *ptr) +{ + VBOX_CHECK_ADDR(ptr); + return remR3PhysReadS8((uintptr_t)ptr); +} + +DECLINLINE(void) stb_p(void *ptr, int v) +{ + VBOX_CHECK_ADDR(ptr); + remR3PhysWriteU8((uintptr_t)ptr, v); +} + +DECLINLINE(uint32_t) lduw_le_p(const void *ptr) +{ + VBOX_CHECK_ADDR(ptr); + return remR3PhysReadU16((uintptr_t)ptr); +} + +DECLINLINE(int32_t) ldsw_le_p(const void *ptr) +{ + VBOX_CHECK_ADDR(ptr); + return remR3PhysReadS16((uintptr_t)ptr); +} + +DECLINLINE(void) stw_le_p(void *ptr, int v) +{ + VBOX_CHECK_ADDR(ptr); + remR3PhysWriteU16((uintptr_t)ptr, v); +} + +DECLINLINE(uint32_t) ldl_le_p(const void *ptr) +{ + VBOX_CHECK_ADDR(ptr); + return remR3PhysReadU32((uintptr_t)ptr); +} + +DECLINLINE(void) stl_le_p(void *ptr, int v) +{ + VBOX_CHECK_ADDR(ptr); + remR3PhysWriteU32((uintptr_t)ptr, v); +} + +DECLINLINE(void) stq_le_p(void *ptr, uint64_t v) +{ + VBOX_CHECK_ADDR(ptr); + remR3PhysWriteU64((uintptr_t)ptr, v); +} + +DECLINLINE(uint64_t) ldq_le_p(const void *ptr) +{ + VBOX_CHECK_ADDR(ptr); + return remR3PhysReadU64((uintptr_t)ptr); +} + +# undef VBOX_CHECK_ADDR + +/* float access */ + +DECLINLINE(float32) ldfl_le_p(const void *ptr) +{ + union { + float32 f; + uint32_t i; + } u; + u.i = ldl_le_p(ptr); + return u.f; +} + +DECLINLINE(void) stfl_le_p(void *ptr, float32 v) +{ + union { + float32 f; + uint32_t i; + } u; + u.f = v; + stl_le_p(ptr, u.i); +} + +DECLINLINE(float64) ldfq_le_p(const void *ptr) +{ + CPU_DoubleU u; + u.l.lower = ldl_le_p(ptr); + u.l.upper = ldl_le_p((uint8_t*)ptr + 4); + return u.d; +} + +DECLINLINE(void) stfq_le_p(void *ptr, float64 v) +{ + CPU_DoubleU u; + u.d = v; + stl_le_p(ptr, u.l.lower); + stl_le_p((uint8_t*)ptr + 4, u.l.upper); +} + +#else /* !VBOX || !REM_PHYS_ADDR_IN_TLB */ + +static inline int ldub_p(const void *ptr) +{ + return *(uint8_t *)ptr; +} + +static inline int ldsb_p(const void *ptr) +{ + return *(int8_t *)ptr; +} + +static inline void stb_p(void *ptr, int v) +{ + *(uint8_t *)ptr = v; +} + +/* NOTE: on arm, putting 2 in /proc/sys/debug/alignment so that the + kernel handles unaligned load/stores may give better results, but + it is a system wide setting : bad */ +#if defined(HOST_WORDS_BIGENDIAN) || defined(WORDS_ALIGNED) + +/* conservative code for little endian unaligned accesses */ +static inline int lduw_le_p(const void *ptr) +{ +#ifdef _ARCH_PPC + int val; + __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr)); + return val; +#else + const uint8_t *p = ptr; + return p[0] | (p[1] << 8); +#endif +} + +static inline int ldsw_le_p(const void *ptr) +{ +#ifdef _ARCH_PPC + int val; + __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr)); + return (int16_t)val; +#else + const uint8_t *p = ptr; + return (int16_t)(p[0] | (p[1] << 8)); +#endif +} + +static inline int ldl_le_p(const void *ptr) +{ +#ifdef _ARCH_PPC + int val; + __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr)); + return val; +#else + const uint8_t *p = ptr; + return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); +#endif +} + +static inline uint64_t ldq_le_p(const void *ptr) +{ + const uint8_t *p = ptr; + uint32_t v1, v2; + v1 = ldl_le_p(p); + v2 = ldl_le_p(p + 4); + return v1 | ((uint64_t)v2 << 32); +} + +static inline void stw_le_p(void *ptr, int v) +{ +#ifdef _ARCH_PPC + __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr)); +#else + uint8_t *p = ptr; + p[0] = v; + p[1] = v >> 8; +#endif +} + +static inline void stl_le_p(void *ptr, int v) +{ +#ifdef _ARCH_PPC + __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr)); +#else + uint8_t *p = ptr; + p[0] = v; + p[1] = v >> 8; + p[2] = v >> 16; + p[3] = v >> 24; +#endif +} + +static inline void stq_le_p(void *ptr, uint64_t v) +{ + uint8_t *p = ptr; + stl_le_p(p, (uint32_t)v); + stl_le_p(p + 4, v >> 32); +} + +/* float access */ + +static inline float32 ldfl_le_p(const void *ptr) +{ + union { + float32 f; + uint32_t i; + } u; + u.i = ldl_le_p(ptr); + return u.f; +} + +static inline void stfl_le_p(void *ptr, float32 v) +{ + union { + float32 f; + uint32_t i; + } u; + u.f = v; + stl_le_p(ptr, u.i); +} + +static inline float64 ldfq_le_p(const void *ptr) +{ + CPU_DoubleU u; + u.l.lower = ldl_le_p(ptr); + u.l.upper = ldl_le_p(ptr + 4); + return u.d; +} + +static inline void stfq_le_p(void *ptr, float64 v) +{ + CPU_DoubleU u; + u.d = v; + stl_le_p(ptr, u.l.lower); + stl_le_p(ptr + 4, u.l.upper); +} + +#else + +static inline int lduw_le_p(const void *ptr) +{ + return *(uint16_t *)ptr; +} + +static inline int ldsw_le_p(const void *ptr) +{ + return *(int16_t *)ptr; +} + +static inline int ldl_le_p(const void *ptr) +{ + return *(uint32_t *)ptr; +} + +static inline uint64_t ldq_le_p(const void *ptr) +{ + return *(uint64_t *)ptr; +} + +static inline void stw_le_p(void *ptr, int v) +{ + *(uint16_t *)ptr = v; +} + +static inline void stl_le_p(void *ptr, int v) +{ + *(uint32_t *)ptr = v; +} + +static inline void stq_le_p(void *ptr, uint64_t v) +{ + *(uint64_t *)ptr = v; +} + +/* float access */ + +static inline float32 ldfl_le_p(const void *ptr) +{ + return *(float32 *)ptr; +} + +static inline float64 ldfq_le_p(const void *ptr) +{ + return *(float64 *)ptr; +} + +static inline void stfl_le_p(void *ptr, float32 v) +{ + *(float32 *)ptr = v; +} + +static inline void stfq_le_p(void *ptr, float64 v) +{ + *(float64 *)ptr = v; +} +#endif + +#endif /* !VBOX || !REM_PHYS_ADDR_IN_TLB */ + +#if !defined(HOST_WORDS_BIGENDIAN) || defined(WORDS_ALIGNED) + +static inline int lduw_be_p(const void *ptr) +{ +#if defined(__i386__) + int val; + asm volatile ("movzwl %1, %0\n" + "xchgb %b0, %h0\n" + : "=q" (val) + : "m" (*(uint16_t *)ptr)); + return val; +#else + const uint8_t *b = ptr; + return ((b[0] << 8) | b[1]); +#endif +} + +static inline int ldsw_be_p(const void *ptr) +{ +#if defined(__i386__) + int val; + asm volatile ("movzwl %1, %0\n" + "xchgb %b0, %h0\n" + : "=q" (val) + : "m" (*(uint16_t *)ptr)); + return (int16_t)val; +#else + const uint8_t *b = ptr; + return (int16_t)((b[0] << 8) | b[1]); +#endif +} + +static inline int ldl_be_p(const void *ptr) +{ +#if defined(__i386__) || defined(__x86_64__) + int val; + asm volatile ("movl %1, %0\n" + "bswap %0\n" + : "=r" (val) + : "m" (*(uint32_t *)ptr)); + return val; +#else + const uint8_t *b = ptr; + return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; +#endif +} + +static inline uint64_t ldq_be_p(const void *ptr) +{ + uint32_t a,b; + a = ldl_be_p(ptr); + b = ldl_be_p((uint8_t *)ptr + 4); + return (((uint64_t)a<<32)|b); +} + +static inline void stw_be_p(void *ptr, int v) +{ +#if defined(__i386__) + asm volatile ("xchgb %b0, %h0\n" + "movw %w0, %1\n" + : "=q" (v) + : "m" (*(uint16_t *)ptr), "0" (v)); +#else + uint8_t *d = (uint8_t *) ptr; + d[0] = v >> 8; + d[1] = v; +#endif +} + +static inline void stl_be_p(void *ptr, int v) +{ +#if defined(__i386__) || defined(__x86_64__) + asm volatile ("bswap %0\n" + "movl %0, %1\n" + : "=r" (v) + : "m" (*(uint32_t *)ptr), "0" (v)); +#else + uint8_t *d = (uint8_t *) ptr; + d[0] = v >> 24; + d[1] = v >> 16; + d[2] = v >> 8; + d[3] = v; +#endif +} + +static inline void stq_be_p(void *ptr, uint64_t v) +{ + stl_be_p(ptr, v >> 32); + stl_be_p((uint8_t *)ptr + 4, v); +} + +/* float access */ + +static inline float32 ldfl_be_p(const void *ptr) +{ + union { + float32 f; + uint32_t i; + } u; + u.i = ldl_be_p(ptr); + return u.f; +} + +static inline void stfl_be_p(void *ptr, float32 v) +{ + union { + float32 f; + uint32_t i; + } u; + u.f = v; + stl_be_p(ptr, u.i); +} + +static inline float64 ldfq_be_p(const void *ptr) +{ + CPU_DoubleU u; + u.l.upper = ldl_be_p(ptr); + u.l.lower = ldl_be_p((uint8_t *)ptr + 4); + return u.d; +} + +static inline void stfq_be_p(void *ptr, float64 v) +{ + CPU_DoubleU u; + u.d = v; + stl_be_p(ptr, u.l.upper); + stl_be_p((uint8_t *)ptr + 4, u.l.lower); +} + +#else + +static inline int lduw_be_p(const void *ptr) +{ + return *(uint16_t *)ptr; +} + +static inline int ldsw_be_p(const void *ptr) +{ + return *(int16_t *)ptr; +} + +static inline int ldl_be_p(const void *ptr) +{ + return *(uint32_t *)ptr; +} + +static inline uint64_t ldq_be_p(const void *ptr) +{ + return *(uint64_t *)ptr; +} + +static inline void stw_be_p(void *ptr, int v) +{ + *(uint16_t *)ptr = v; +} + +static inline void stl_be_p(void *ptr, int v) +{ + *(uint32_t *)ptr = v; +} + +static inline void stq_be_p(void *ptr, uint64_t v) +{ + *(uint64_t *)ptr = v; +} + +/* float access */ + +static inline float32 ldfl_be_p(const void *ptr) +{ + return *(float32 *)ptr; +} + +static inline float64 ldfq_be_p(const void *ptr) +{ + return *(float64 *)ptr; +} + +static inline void stfl_be_p(void *ptr, float32 v) +{ + *(float32 *)ptr = v; +} + +static inline void stfq_be_p(void *ptr, float64 v) +{ + *(float64 *)ptr = v; +} + +#endif + +/* target CPU memory access functions */ +#if defined(TARGET_WORDS_BIGENDIAN) +#define lduw_p(p) lduw_be_p(p) +#define ldsw_p(p) ldsw_be_p(p) +#define ldl_p(p) ldl_be_p(p) +#define ldq_p(p) ldq_be_p(p) +#define ldfl_p(p) ldfl_be_p(p) +#define ldfq_p(p) ldfq_be_p(p) +#define stw_p(p, v) stw_be_p(p, v) +#define stl_p(p, v) stl_be_p(p, v) +#define stq_p(p, v) stq_be_p(p, v) +#define stfl_p(p, v) stfl_be_p(p, v) +#define stfq_p(p, v) stfq_be_p(p, v) +#else +#define lduw_p(p) lduw_le_p(p) +#define ldsw_p(p) ldsw_le_p(p) +#define ldl_p(p) ldl_le_p(p) +#define ldq_p(p) ldq_le_p(p) +#define ldfl_p(p) ldfl_le_p(p) +#define ldfq_p(p) ldfq_le_p(p) +#define stw_p(p, v) stw_le_p(p, v) +#define stl_p(p, v) stl_le_p(p, v) +#define stq_p(p, v) stq_le_p(p, v) +#define stfl_p(p, v) stfl_le_p(p, v) +#define stfq_p(p, v) stfq_le_p(p, v) +#endif + +/* MMU memory access macros */ + +#if defined(CONFIG_USER_ONLY) +#include <assert.h> +#include "qemu-types.h" + +/* On some host systems the guest address space is reserved on the host. + * This allows the guest address space to be offset to a convenient location. + */ +#if defined(CONFIG_USE_GUEST_BASE) +extern uintptr_t guest_base; +extern int have_guest_base; +extern uintptr_t reserved_va; +#define GUEST_BASE guest_base +#define RESERVED_VA reserved_va +#else +#define GUEST_BASE 0ul +#define RESERVED_VA 0ul +#endif + +/* All direct uses of g2h and h2g need to go away for usermode softmmu. */ +#define g2h(x) ((void *)((uintptr_t)(x) + GUEST_BASE)) + +#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS +#define h2g_valid(x) 1 +#else +#define h2g_valid(x) ({ \ + uintptr_t __guest = (uintptr_t)(x) - GUEST_BASE; \ + __guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS); \ +}) +#endif + +#define h2g(x) ({ \ + uintptr_t __ret = (uintptr_t)(x) - GUEST_BASE; \ + /* Check if given address fits target address space */ \ + assert(h2g_valid(x)); \ + (abi_ulong)__ret; \ +}) + +#define saddr(x) g2h(x) +#define laddr(x) g2h(x) + +#else /* !CONFIG_USER_ONLY */ +/* NOTE: we use double casts if pointers and target_ulong have + different sizes */ +#define saddr(x) (uint8_t *)(intptr_t)(x) +#define laddr(x) (uint8_t *)(intptr_t)(x) +#endif + +#define ldub_raw(p) ldub_p(laddr((p))) +#define ldsb_raw(p) ldsb_p(laddr((p))) +#define lduw_raw(p) lduw_p(laddr((p))) +#define ldsw_raw(p) ldsw_p(laddr((p))) +#define ldl_raw(p) ldl_p(laddr((p))) +#define ldq_raw(p) ldq_p(laddr((p))) +#define ldfl_raw(p) ldfl_p(laddr((p))) +#define ldfq_raw(p) ldfq_p(laddr((p))) +#define stb_raw(p, v) stb_p(saddr((p)), v) +#define stw_raw(p, v) stw_p(saddr((p)), v) +#define stl_raw(p, v) stl_p(saddr((p)), v) +#define stq_raw(p, v) stq_p(saddr((p)), v) +#define stfl_raw(p, v) stfl_p(saddr((p)), v) +#define stfq_raw(p, v) stfq_p(saddr((p)), v) + + +#if defined(CONFIG_USER_ONLY) + +/* if user mode, no other memory access functions */ +#define ldub(p) ldub_raw(p) +#define ldsb(p) ldsb_raw(p) +#define lduw(p) lduw_raw(p) +#define ldsw(p) ldsw_raw(p) +#define ldl(p) ldl_raw(p) +#define ldq(p) ldq_raw(p) +#define ldfl(p) ldfl_raw(p) +#define ldfq(p) ldfq_raw(p) +#define stb(p, v) stb_raw(p, v) +#define stw(p, v) stw_raw(p, v) +#define stl(p, v) stl_raw(p, v) +#define stq(p, v) stq_raw(p, v) +#define stfl(p, v) stfl_raw(p, v) +#define stfq(p, v) stfq_raw(p, v) + +#define ldub_code(p) ldub_raw(p) +#define ldsb_code(p) ldsb_raw(p) +#define lduw_code(p) lduw_raw(p) +#define ldsw_code(p) ldsw_raw(p) +#define ldl_code(p) ldl_raw(p) +#define ldq_code(p) ldq_raw(p) + +#define ldub_kernel(p) ldub_raw(p) +#define ldsb_kernel(p) ldsb_raw(p) +#define lduw_kernel(p) lduw_raw(p) +#define ldsw_kernel(p) ldsw_raw(p) +#define ldl_kernel(p) ldl_raw(p) +#define ldq_kernel(p) ldq_raw(p) +#define ldfl_kernel(p) ldfl_raw(p) +#define ldfq_kernel(p) ldfq_raw(p) +#define stb_kernel(p, v) stb_raw(p, v) +#define stw_kernel(p, v) stw_raw(p, v) +#define stl_kernel(p, v) stl_raw(p, v) +#define stq_kernel(p, v) stq_raw(p, v) +#define stfl_kernel(p, v) stfl_raw(p, v) +#define stfq_kernel(p, vt) stfq_raw(p, v) + +#endif /* defined(CONFIG_USER_ONLY) */ + +/* page related stuff */ + +#define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS) +#define TARGET_PAGE_MASK ~(TARGET_PAGE_SIZE - 1) +#define TARGET_PAGE_ALIGN(addr) (((addr) + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK) + +/* ??? These should be the larger of uintptr_t and target_ulong. */ +extern size_t qemu_real_host_page_size; +extern size_t qemu_host_page_bits; +extern size_t qemu_host_page_size; +extern uintptr_t qemu_host_page_mask; + +#define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask) + +/* same as PROT_xxx */ +#define PAGE_READ 0x0001 +#define PAGE_WRITE 0x0002 +#define PAGE_EXEC 0x0004 +#define PAGE_BITS (PAGE_READ | PAGE_WRITE | PAGE_EXEC) +#define PAGE_VALID 0x0008 +/* original state of the write flag (used when tracking self-modifying + code */ +#define PAGE_WRITE_ORG 0x0010 +#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) +/* FIXME: Code that sets/uses this is broken and needs to go away. */ +#define PAGE_RESERVED 0x0020 +#endif + +#if defined(CONFIG_USER_ONLY) +void page_dump(FILE *f); + +typedef int (*walk_memory_regions_fn)(void *, abi_ulong, + abi_ulong, uintptr_t); +int walk_memory_regions(void *, walk_memory_regions_fn); + +int page_get_flags(target_ulong address); +void page_set_flags(target_ulong start, target_ulong end, int flags); +int page_check_range(target_ulong start, target_ulong len, int flags); +#endif + +CPUState *cpu_copy(CPUState *env); +CPUState *qemu_get_cpu(int cpu); + +void cpu_dump_state(CPUState *env, FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + int flags); +void cpu_dump_statistics (CPUState *env, FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + int flags); + +void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...) +#ifndef VBOX + __attribute__ ((__format__ (__printf__, 2, 3))); +#else /* VBOX */ + ; +#endif /* VBOX */ +extern CPUState *first_cpu; +extern CPUState *cpu_single_env; + +#define CPU_INTERRUPT_HARD 0x02 /* hardware interrupt pending */ +#define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */ +#define CPU_INTERRUPT_TIMER 0x08 /* internal timer exception pending */ +#define CPU_INTERRUPT_FIQ 0x10 /* Fast interrupt pending. */ +#define CPU_INTERRUPT_HALT 0x20 /* CPU halt wanted */ +#define CPU_INTERRUPT_SMI 0x40 /* (x86 only) SMI interrupt pending */ +#define CPU_INTERRUPT_DEBUG 0x80 /* Debug event occured. */ +#define CPU_INTERRUPT_VIRQ 0x100 /* virtual interrupt pending. */ +#define CPU_INTERRUPT_NMI 0x200 /* NMI pending. */ +#define CPU_INTERRUPT_INIT 0x400 /* INIT pending. */ +#define CPU_INTERRUPT_SIPI 0x800 /* SIPI pending. */ +#define CPU_INTERRUPT_MCE 0x1000 /* (x86 only) MCE pending. */ + +#ifdef VBOX +/** Executes a single instruction. cpu_exec() will normally return EXCP_SINGLE_INSTR. */ +# define CPU_INTERRUPT_SINGLE_INSTR 0x01000000 +/** Executing a CPU_INTERRUPT_SINGLE_INSTR request, quit the cpu_loop. (for exceptions and suchlike) */ +# define CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT 0x02000000 +/** VM execution was interrupted by VMR3Reset, VMR3Suspend or VMR3PowerOff. */ +# define CPU_INTERRUPT_RC 0x04000000 +/** Exit current TB to process an external request. */ +# define CPU_INTERRUPT_EXTERNAL_FLUSH_TLB 0x08000000 +/** Exit current TB to process an external request. */ +# define CPU_INTERRUPT_EXTERNAL_EXIT 0x10000000 +/** Exit current TB to process an external interrupt request. */ +# define CPU_INTERRUPT_EXTERNAL_HARD 0x20000000 +/** Exit current TB to process an external timer request. */ +# define CPU_INTERRUPT_EXTERNAL_TIMER 0x40000000 +/** Exit current TB to process an external DMA request. */ +# define CPU_INTERRUPT_EXTERNAL_DMA 0x80000000 +#endif /* VBOX */ +void cpu_interrupt(CPUState *s, int mask); +void cpu_reset_interrupt(CPUState *env, int mask); + +void cpu_exit(CPUState *s); + +int qemu_cpu_has_work(CPUState *env); + +/* Breakpoint/watchpoint flags */ +#define BP_MEM_READ 0x01 +#define BP_MEM_WRITE 0x02 +#define BP_MEM_ACCESS (BP_MEM_READ | BP_MEM_WRITE) +#define BP_STOP_BEFORE_ACCESS 0x04 +#define BP_WATCHPOINT_HIT 0x08 +#define BP_GDB 0x10 +#define BP_CPU 0x20 + +int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags, + CPUBreakpoint **breakpoint); +int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags); +void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint); +void cpu_breakpoint_remove_all(CPUState *env, int mask); +int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len, + int flags, CPUWatchpoint **watchpoint); +int cpu_watchpoint_remove(CPUState *env, target_ulong addr, + target_ulong len, int flags); +void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint); +void cpu_watchpoint_remove_all(CPUState *env, int mask); + +#define SSTEP_ENABLE 0x1 /* Enable simulated HW single stepping */ +#define SSTEP_NOIRQ 0x2 /* Do not use IRQ while single stepping */ +#define SSTEP_NOTIMER 0x4 /* Do not Timers while single stepping */ + +void cpu_single_step(CPUState *env, int enabled); +void cpu_reset(CPUState *s); +int cpu_is_stopped(CPUState *env); +void run_on_cpu(CPUState *env, void (*func)(void *data), void *data); + +#define CPU_LOG_TB_OUT_ASM (1 << 0) +#define CPU_LOG_TB_IN_ASM (1 << 1) +#define CPU_LOG_TB_OP (1 << 2) +#define CPU_LOG_TB_OP_OPT (1 << 3) +#define CPU_LOG_INT (1 << 4) +#define CPU_LOG_EXEC (1 << 5) +#define CPU_LOG_PCALL (1 << 6) +#define CPU_LOG_IOPORT (1 << 7) +#define CPU_LOG_TB_CPU (1 << 8) +#define CPU_LOG_RESET (1 << 9) + +/* define log items */ +typedef struct CPULogItem { + int mask; + const char *name; + const char *help; +} CPULogItem; + +extern const CPULogItem cpu_log_items[]; + +void cpu_set_log(int log_flags); +void cpu_set_log_filename(const char *filename); +int cpu_str_to_log_mask(const char *str); + +#if !defined(CONFIG_USER_ONLY) + +/* Return the physical page corresponding to a virtual one. Use it + only for debugging because no protection checks are done. Return -1 + if no page found. */ +target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr); + +/* memory API */ + +#ifndef VBOX +extern int phys_ram_fd; +extern ram_addr_t ram_size; +#endif /* !VBOX */ + +typedef struct RAMBlock { + uint8_t *host; + ram_addr_t offset; + ram_addr_t length; + char idstr[256]; + QLIST_ENTRY(RAMBlock) next; +#if defined(__linux__) && !defined(TARGET_S390X) + int fd; +#endif +} RAMBlock; + +typedef struct RAMList { + uint8_t *phys_dirty; +#ifdef VBOX + /** This is required for bounds checking the phys_ram_dirty accesses. + * We have memory ranges (the high PC-BIOS mapping) which causes some pages + * to fall outside the dirty map. */ + RTGCPHYS phys_dirty_size; +#if 1 +# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr,rv) \ + do { \ + if (RT_UNLIKELY( ((addr) >> TARGET_PAGE_BITS) >= ram_list.phys_dirty_size)) { \ + Log(("%s: %RGp\n", __FUNCTION__, (RTGCPHYS)addr)); \ + return (rv); \ + } \ + } while (0) +# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr) \ + do { \ + if (RT_UNLIKELY( ((addr) >> TARGET_PAGE_BITS) >= ram_list.phys_dirty_size)) { \ + Log(("%s: %RGp\n", __FUNCTION__, (RTGCPHYS)addr)); \ + return; \ + } \ + } while (0) +#else +# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr,rv) \ + AssertMsgReturn(((addr) >> TARGET_PAGE_BITS) < ram_list.phys_dirty_size, ("%#RGp\n", (RTGCPHYS)(addr)), (rv)); +# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr) \ + AssertMsgReturnVoid(((addr) >> TARGET_PAGE_BITS) < ram_list.phys_dirty_size, ("%#RGp\n", (RTGCPHYS)(addr))); +# endif +#else +# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr,rv) do {} while() +# define VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr) do {} while() +#endif /* VBOX */ + QLIST_HEAD(ram, RAMBlock) blocks; +} RAMList; +extern RAMList ram_list; + +extern const char *mem_path; +extern int mem_prealloc; + +/* physical memory access */ + +/* MMIO pages are identified by a combination of an IO device index and + 3 flags. The ROMD code stores the page ram offset in iotlb entry, + so only a limited number of ids are avaiable. */ + +#define IO_MEM_NB_ENTRIES (1 << (TARGET_PAGE_BITS - IO_MEM_SHIFT)) + +/* Flags stored in the low bits of the TLB virtual address. These are + defined so that fast path ram access is all zeros. */ +/* Zero if TLB entry is valid. */ +#define TLB_INVALID_MASK (1 << 3) +/* Set if TLB entry references a clean RAM page. The iotlb entry will + contain the page physical address. */ +#define TLB_NOTDIRTY (1 << 4) +/* Set if TLB entry is an IO callback. */ +#define TLB_MMIO (1 << 5) + +#define VGA_DIRTY_FLAG 0x01 +#define CODE_DIRTY_FLAG 0x02 +#define MIGRATION_DIRTY_FLAG 0x08 + +/* read dirty bit (return 0 or 1) */ +static inline int cpu_physical_memory_is_dirty(ram_addr_t addr) +{ + VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0); + return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff; +} + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ + VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0xff); + return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS]; +} + +static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, + int dirty_flags) +{ + VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0xff & dirty_flags); + return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags; +} + +static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) +{ + VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(addr); + ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff; +} + +static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr, + int dirty_flags) +{ + VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RET(addr, 0xff); + return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags; +} + +static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start, + int length, + int dirty_flags) +{ + int i, mask, len; + uint8_t *p; + + VBOX_RAMLIST_DIRTY_BOUNDS_CHECK_RETV(start); + len = length >> TARGET_PAGE_BITS; + mask = ~dirty_flags; + p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS); + for (i = 0; i < len; i++) { + p[i] &= mask; + } +} + +void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, + int dirty_flags); +void cpu_tlb_update_dirty(CPUState *env); + +int cpu_physical_memory_set_dirty_tracking(int enable); + +int cpu_physical_memory_get_dirty_tracking(void); + +int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, + target_phys_addr_t end_addr); + +void dump_exec_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); +#endif /* !CONFIG_USER_ONLY */ + +int cpu_memory_rw_debug(CPUState *env, target_ulong addr, + uint8_t *buf, int len, int is_write); + +void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, + uint64_t mcg_status, uint64_t addr, uint64_t misc); + +#ifdef VBOX +void tb_invalidate_virt(CPUState *env, uint32_t eip); +#endif /* VBOX */ + +#endif /* CPU_ALL_H */ diff --git a/src/recompiler/cpu-common.h b/src/recompiler/cpu-common.h new file mode 100644 index 00000000..b523d527 --- /dev/null +++ b/src/recompiler/cpu-common.h @@ -0,0 +1,135 @@ +#ifndef CPU_COMMON_H +#define CPU_COMMON_H 1 + +/* CPU interfaces that are target indpendent. */ + +#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__ia64__) +#define WORDS_ALIGNED +#endif + +#ifdef TARGET_PHYS_ADDR_BITS +#include "targphys.h" +#endif + +#ifndef NEED_CPU_H +#include "poison.h" +#endif + +#include "bswap.h" +#include "qemu-queue.h" + +#if !defined(CONFIG_USER_ONLY) + +/* address in the RAM (different from a physical address) */ +typedef uintptr_t ram_addr_t; + +/* memory API */ + +typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value); +typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr); + +void cpu_register_physical_memory_offset(target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset, + ram_addr_t region_offset); +static inline void cpu_register_physical_memory(target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset) +{ + cpu_register_physical_memory_offset(start_addr, size, phys_offset, 0); +} + +ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr); +#ifndef VBOX +ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, + ram_addr_t size, void *host); +ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size); +void qemu_ram_free(ram_addr_t addr); +/* This should only be used for ram local to a device. */ +void *qemu_get_ram_ptr(ram_addr_t addr); +/* This should not be used by devices. */ +ram_addr_t qemu_ram_addr_from_host(void *ptr); +#endif /* !VBOX */ + +int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read, + CPUWriteMemoryFunc * const *mem_write, + void *opaque); +void cpu_unregister_io_memory(int table_address); + +void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, + int len, int is_write); +static inline void cpu_physical_memory_read(target_phys_addr_t addr, + uint8_t *buf, int len) +{ + cpu_physical_memory_rw(addr, buf, len, 0); +} +static inline void cpu_physical_memory_write(target_phys_addr_t addr, + const uint8_t *buf, int len) +{ + cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1); +} +void *cpu_physical_memory_map(target_phys_addr_t addr, + target_phys_addr_t *plen, + int is_write); +void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, + int is_write, target_phys_addr_t access_len); +void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque)); +void cpu_unregister_map_client(void *cookie); + +struct CPUPhysMemoryClient; +typedef struct CPUPhysMemoryClient CPUPhysMemoryClient; +struct CPUPhysMemoryClient { + void (*set_memory)(struct CPUPhysMemoryClient *client, + target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset); + int (*sync_dirty_bitmap)(struct CPUPhysMemoryClient *client, + target_phys_addr_t start_addr, + target_phys_addr_t end_addr); + int (*migration_log)(struct CPUPhysMemoryClient *client, + int enable); + QLIST_ENTRY(CPUPhysMemoryClient) list; +}; + +void cpu_register_phys_memory_client(CPUPhysMemoryClient *); +void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *); + +/* Coalesced MMIO regions are areas where write operations can be reordered. + * This usually implies that write operations are side-effect free. This allows + * batching which can make a major impact on performance when using + * virtualization. + */ +void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size); + +void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size); + +void qemu_flush_coalesced_mmio_buffer(void); + +uint32_t ldub_phys(target_phys_addr_t addr); +uint32_t lduw_phys(target_phys_addr_t addr); +uint32_t ldl_phys(target_phys_addr_t addr); +uint64_t ldq_phys(target_phys_addr_t addr); +void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val); +void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val); +void stb_phys(target_phys_addr_t addr, uint32_t val); +void stw_phys(target_phys_addr_t addr, uint32_t val); +void stl_phys(target_phys_addr_t addr, uint32_t val); +void stq_phys(target_phys_addr_t addr, uint64_t val); + +void cpu_physical_memory_write_rom(target_phys_addr_t addr, + const uint8_t *buf, int len); + +#define IO_MEM_SHIFT 3 + +#define IO_MEM_RAM (0 << IO_MEM_SHIFT) /* hardcoded offset */ +#define IO_MEM_ROM (1 << IO_MEM_SHIFT) /* hardcoded offset */ +#define IO_MEM_UNASSIGNED (2 << IO_MEM_SHIFT) +#define IO_MEM_NOTDIRTY (3 << IO_MEM_SHIFT) + +/* Acts like a ROM when read and like a device when written. */ +#define IO_MEM_ROMD (1) +#define IO_MEM_SUBPAGE (2) + +#endif + +#endif /* !CPU_COMMON_H */ diff --git a/src/recompiler/cpu-defs.h b/src/recompiler/cpu-defs.h new file mode 100644 index 00000000..65d57063 --- /dev/null +++ b/src/recompiler/cpu-defs.h @@ -0,0 +1,235 @@ +/* + * common defines for all CPUs + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#ifndef CPU_DEFS_H +#define CPU_DEFS_H + +#ifndef NEED_CPU_H +#error cpu.h included from common code +#endif + +#include "config.h" +#include <setjmp.h> +#include <inttypes.h> +#ifndef VBOX +#include <signal.h> +#else /* VBOX */ +# define sig_atomic_t int32_t +#endif /* VBOX */ +#include "osdep.h" +#include "qemu-queue.h" +#include "targphys.h" + +#ifndef TARGET_LONG_BITS +#error TARGET_LONG_BITS must be defined before including this header +#endif + +#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8) + +/* target_ulong is the type of a virtual address */ +#if TARGET_LONG_SIZE == 4 +typedef int32_t target_long; +typedef uint32_t target_ulong; +#define TARGET_FMT_lx "%08x" +#define TARGET_FMT_ld "%d" +#define TARGET_FMT_lu "%u" +#elif TARGET_LONG_SIZE == 8 +typedef int64_t target_long; +typedef uint64_t target_ulong; +#define TARGET_FMT_lx "%016" PRIx64 +#define TARGET_FMT_ld "%" PRId64 +#define TARGET_FMT_lu "%" PRIu64 +#else +#error TARGET_LONG_SIZE undefined +#endif + +#define HOST_LONG_SIZE (HOST_LONG_BITS / 8) + +#define EXCP_INTERRUPT 0x10000 /* async interruption */ +#define EXCP_HLT 0x10001 /* hlt instruction reached */ +#define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */ +#define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */ +#ifdef VBOX +# define EXCP_EXECUTE_RAW 0x11024 /**< execute raw mode. */ +# define EXCP_EXECUTE_HM 0x11025 /**< execute hardware accelerated raw mode. */ +# define EXCP_SINGLE_INSTR 0x11026 /**< executed single instruction. */ +# define EXCP_RC 0x11027 /**< a EM rc was raised (VMR3Reset/Suspend/PowerOff). */ +#endif /* VBOX */ + +#define TB_JMP_CACHE_BITS 12 +#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) + +/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for + addresses on the same page. The top bits are the same. This allows + TLB invalidation to quickly clear a subset of the hash table. */ +#define TB_JMP_PAGE_BITS (TB_JMP_CACHE_BITS / 2) +#define TB_JMP_PAGE_SIZE (1 << TB_JMP_PAGE_BITS) +#define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1) +#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE) + +#if !defined(CONFIG_USER_ONLY) +#define CPU_TLB_BITS 8 +#define CPU_TLB_SIZE (1 << CPU_TLB_BITS) + +#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32 +#define CPU_TLB_ENTRY_BITS 4 +#else +#define CPU_TLB_ENTRY_BITS 5 +#endif + +typedef struct CPUTLBEntry { + /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address + bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not + go directly to ram. + bit 3 : indicates that the entry is invalid + bit 2..0 : zero + */ + target_ulong addr_read; + target_ulong addr_write; + target_ulong addr_code; + /* Addend to virtual address to get host address. IO accesses + use the corresponding iotlb value. */ + uintptr_t addend; + /* padding to get a power of two size */ + uint8_t dummy[(1 << CPU_TLB_ENTRY_BITS) - + (sizeof(target_ulong) * 3 + + ((-sizeof(target_ulong) * 3) & (sizeof(uintptr_t) - 1)) + + sizeof(uintptr_t))]; +} CPUTLBEntry; + +extern int CPUTLBEntry_wrong_size[sizeof(CPUTLBEntry) == (1 << CPU_TLB_ENTRY_BITS) ? 1 : -1]; + +#define CPU_COMMON_TLB \ + /* The meaning of the MMU modes is defined in the target code. */ \ + CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \ + target_phys_addr_t iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \ + target_ulong tlb_flush_addr; \ + target_ulong tlb_flush_mask; + +#else + +#define CPU_COMMON_TLB + +#endif + + +#ifdef HOST_WORDS_BIGENDIAN +typedef struct icount_decr_u16 { + uint16_t high; + uint16_t low; +} icount_decr_u16; +#else +typedef struct icount_decr_u16 { + uint16_t low; + uint16_t high; +} icount_decr_u16; +#endif + +struct kvm_run; +struct KVMState; +struct qemu_work_item; + +typedef struct CPUBreakpoint { + target_ulong pc; + int flags; /* BP_* */ + QTAILQ_ENTRY(CPUBreakpoint) entry; +} CPUBreakpoint; + +typedef struct CPUWatchpoint { + target_ulong vaddr; + target_ulong len_mask; + int flags; /* BP_* */ + QTAILQ_ENTRY(CPUWatchpoint) entry; +} CPUWatchpoint; + +#define CPU_TEMP_BUF_NLONGS 128 +#define CPU_COMMON \ + struct TranslationBlock *current_tb; /* currently executing TB */ \ + /* soft mmu support */ \ + /* in order to avoid passing too many arguments to the MMIO \ + helpers, we store some rarely used information in the CPU \ + context) */ \ + uintptr_t mem_io_pc; /* host pc at which the memory was \ + accessed */ \ + target_ulong mem_io_vaddr; /* target virtual addr at which the \ + memory was accessed */ \ + uint32_t halted; /* Nonzero if the CPU is in suspend state */ \ + uint32_t interrupt_request; \ + volatile sig_atomic_t exit_request; \ + CPU_COMMON_TLB \ + struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; \ + /* buffer for temporaries in the code generator */ \ + long temp_buf[CPU_TEMP_BUF_NLONGS]; \ + \ + int64_t icount_extra; /* Instructions until next timer event. */ \ + /* Number of cycles left, with interrupt flag in high bit. \ + This allows a single read-compare-cbranch-write sequence to test \ + for both decrementer underflow and exceptions. */ \ + union { \ + uint32_t u32; \ + icount_decr_u16 u16; \ + } icount_decr; \ + uint32_t can_do_io; /* nonzero if memory mapped IO is safe. */ \ + \ + /* from this point: preserved by CPU reset */ \ + /* ice debug support */ \ + QTAILQ_HEAD(breakpoints_head, CPUBreakpoint) breakpoints; \ + int singlestep_enabled; \ + \ + QTAILQ_HEAD(watchpoints_head, CPUWatchpoint) watchpoints; \ + CPUWatchpoint *watchpoint_hit; \ + \ + struct GDBRegisterState *gdb_regs; \ + \ + /* Core interrupt code */ \ + jmp_buf jmp_env; \ + int exception_index; \ + \ + CPUState *next_cpu; /* next CPU sharing TB cache */ \ + int cpu_index; /* CPU index (informative) */ \ + uint32_t host_tid; /* host thread ID */ \ + int numa_node; /* NUMA node this cpu is belonging to */ \ + int nr_cores; /* number of cores within this CPU package */ \ + int nr_threads;/* number of threads within this CPU */ \ + int running; /* Nonzero if cpu is currently running(usermode). */ \ + /* user data */ \ + void *opaque; \ + \ + uint32_t created; \ + uint32_t stop; /* Stop request */ \ + uint32_t stopped; /* Artificially stopped */ \ + struct QemuThread *thread; \ + struct QemuCond *halt_cond; \ + struct qemu_work_item *queued_work_first, *queued_work_last; \ + const char *cpu_model_str; \ + struct KVMState *kvm_state; \ + struct kvm_run *kvm_run; \ + int kvm_fd; \ + int kvm_vcpu_dirty; + +#endif diff --git a/src/recompiler/cpu-exec.c b/src/recompiler/cpu-exec.c new file mode 100644 index 00000000..4151363c --- /dev/null +++ b/src/recompiler/cpu-exec.c @@ -0,0 +1,1455 @@ +/* + * i386 emulator main execution loop + * + * Copyright (c) 2003-2005 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include "config.h" +#include "exec.h" +#include "disas.h" +#include "tcg.h" +#include "kvm.h" +#include "qemu-barrier.h" + +#if !defined(CONFIG_SOFTMMU) +#undef EAX +#undef ECX +#undef EDX +#undef EBX +#undef ESP +#undef EBP +#undef ESI +#undef EDI +#undef EIP +#include <signal.h> +#ifdef __linux__ +#include <sys/ucontext.h> +#endif +#endif + +#if defined(__sparc__) && !defined(CONFIG_SOLARIS) +// Work around ugly bugs in glibc that mangle global register contents +#undef env +#define env cpu_single_env +#endif + +int tb_invalidated_flag; + +//#define CONFIG_DEBUG_EXEC +//#define DEBUG_SIGNAL + +int qemu_cpu_has_work(CPUState *env) +{ + return cpu_has_work(env); +} + +void cpu_loop_exit(void) +{ + env->current_tb = NULL; + longjmp(env->jmp_env, 1); +} + +/* exit the current TB from a signal handler. The host registers are + restored in a state compatible with the CPU emulator + */ +void cpu_resume_from_signal(CPUState *env1, void *puc) +{ +#if !defined(CONFIG_SOFTMMU) +#ifdef __linux__ + struct ucontext *uc = puc; +#elif defined(__OpenBSD__) + struct sigcontext *uc = puc; +#endif +#endif + + env = env1; + + /* XXX: restore cpu registers saved in host registers */ + +#if !defined(CONFIG_SOFTMMU) + if (puc) { + /* XXX: use siglongjmp ? */ +#ifdef __linux__ +#ifdef __ia64 + sigprocmask(SIG_SETMASK, (sigset_t *)&uc->uc_sigmask, NULL); +#else + sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL); +#endif +#elif defined(__OpenBSD__) + sigprocmask(SIG_SETMASK, &uc->sc_mask, NULL); +#endif + } +#endif + env->exception_index = -1; + longjmp(env->jmp_env, 1); +} + +/* Execute the code without caching the generated code. An interpreter + could be used if available. */ +static void cpu_exec_nocache(int max_cycles, TranslationBlock *orig_tb) +{ + uintptr_t next_tb; + TranslationBlock *tb; + + /* Should never happen. + We only end up here when an existing TB is too long. */ + if (max_cycles > CF_COUNT_MASK) + max_cycles = CF_COUNT_MASK; + + tb = tb_gen_code(env, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, + max_cycles); + env->current_tb = tb; + /* execute the generated code */ +#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) + tcg_qemu_tb_exec(tb->tc_ptr, next_tb); +#else + next_tb = tcg_qemu_tb_exec(tb->tc_ptr); +#endif + env->current_tb = NULL; + + if ((next_tb & 3) == 2) { + /* Restore PC. This may happen if async event occurs before + the TB starts executing. */ + cpu_pc_from_tb(env, tb); + } + tb_phys_invalidate(tb, -1); + tb_free(tb); +} + +static TranslationBlock *tb_find_slow(target_ulong pc, + target_ulong cs_base, + uint64_t flags) +{ + TranslationBlock *tb, **ptb1; + unsigned int h; + tb_page_addr_t phys_pc, phys_page1, phys_page2; + target_ulong virt_page2; + + tb_invalidated_flag = 0; + + /* find translated block using physical mappings */ + phys_pc = get_page_addr_code(env, pc); + phys_page1 = phys_pc & TARGET_PAGE_MASK; + phys_page2 = -1; + h = tb_phys_hash_func(phys_pc); + ptb1 = &tb_phys_hash[h]; + for(;;) { + tb = *ptb1; + if (!tb) + goto not_found; + if (tb->pc == pc && + tb->page_addr[0] == phys_page1 && + tb->cs_base == cs_base && + tb->flags == flags) { + /* check next page if needed */ + if (tb->page_addr[1] != -1) { + virt_page2 = (pc & TARGET_PAGE_MASK) + + TARGET_PAGE_SIZE; + phys_page2 = get_page_addr_code(env, virt_page2); + if (tb->page_addr[1] == phys_page2) + goto found; + } else { + goto found; + } + } + ptb1 = &tb->phys_hash_next; + } + not_found: + /* if no translated code available, then translate it now */ + tb = tb_gen_code(env, pc, cs_base, flags, 0); + + found: + /* we add the TB in the virtual pc hash table */ + env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; + return tb; +} + +static inline TranslationBlock *tb_find_fast(void) +{ + TranslationBlock *tb; + target_ulong cs_base, pc; + int flags; + + /* we record a subset of the CPU state. It will + always be the same before a given translated block + is executed. */ + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || + tb->flags != flags)) { + tb = tb_find_slow(pc, cs_base, flags); + } + return tb; +} + +static CPUDebugExcpHandler *debug_excp_handler; + +CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler) +{ + CPUDebugExcpHandler *old_handler = debug_excp_handler; + + debug_excp_handler = handler; + return old_handler; +} + +static void cpu_handle_debug_exception(CPUState *env) +{ + CPUWatchpoint *wp; + + if (!env->watchpoint_hit) + QTAILQ_FOREACH(wp, &env->watchpoints, entry) + wp->flags &= ~BP_WATCHPOINT_HIT; + + if (debug_excp_handler) + debug_excp_handler(env); +} + +/* main execution loop */ + +volatile sig_atomic_t exit_request; + +int cpu_exec(CPUState *env1) +{ + volatile host_reg_t saved_env_reg; + int ret VBOX_ONLY(= 0), interrupt_request; + TranslationBlock *tb; + uint8_t *tc_ptr; + uintptr_t next_tb; + +# ifndef VBOX + if (cpu_halted(env1) == EXCP_HALTED) + return EXCP_HALTED; +# endif /* !VBOX */ + + cpu_single_env = env1; + + /* the access to env below is actually saving the global register's + value, so that files not including target-xyz/exec.h are free to + use it. */ + QEMU_BUILD_BUG_ON (sizeof (saved_env_reg) != sizeof (env)); + saved_env_reg = (host_reg_t) env; + barrier(); + env = env1; + + if (unlikely(exit_request)) { + env->exit_request = 1; + } + +#if defined(TARGET_I386) + if (!kvm_enabled()) { + /* put eflags in CPU temporary format */ + CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + DF = 1 - (2 * ((env->eflags >> 10) & 1)); + CC_OP = CC_OP_EFLAGS; + env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + } +#elif defined(TARGET_SPARC) +#elif defined(TARGET_M68K) + env->cc_op = CC_OP_FLAGS; + env->cc_dest = env->sr & 0xf; + env->cc_x = (env->sr >> 4) & 1; +#elif defined(TARGET_ALPHA) +#elif defined(TARGET_ARM) +#elif defined(TARGET_PPC) +#elif defined(TARGET_MICROBLAZE) +#elif defined(TARGET_MIPS) +#elif defined(TARGET_SH4) +#elif defined(TARGET_CRIS) +#elif defined(TARGET_S390X) + /* XXXXX */ +#else +#error unsupported target CPU +#endif +#ifndef VBOX /* VBOX: We need to raise traps and suchlike from the outside. */ + env->exception_index = -1; +#endif /* !VBOX */ + + /* prepare setjmp context for exception handling */ + for(;;) { + if (setjmp(env->jmp_env) == 0) { +#if defined(__sparc__) && !defined(CONFIG_SOLARIS) +#undef env + env = cpu_single_env; +#define env cpu_single_env +#endif +#ifdef VBOX + env->current_tb = NULL; /* probably not needed, but whatever... */ + + /* + * Check for fatal errors first + */ + if (env->interrupt_request & CPU_INTERRUPT_RC) { + env->exception_index = EXCP_RC; + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_RC); + ret = env->exception_index; + cpu_loop_exit(); + } +#endif + + /* if an exception is pending, we execute it here */ + if (env->exception_index >= 0) { + if (env->exception_index >= EXCP_INTERRUPT) { + /* exit request from the cpu execution loop */ + ret = env->exception_index; +#ifdef VBOX /* because of the above stuff */ + env->exception_index = -1; +#endif + if (ret == EXCP_DEBUG) + cpu_handle_debug_exception(env); + break; + } else { +#if defined(CONFIG_USER_ONLY) + /* if user mode only, we simulate a fake exception + which will be handled outside the cpu execution + loop */ +#if defined(TARGET_I386) + do_interrupt_user(env->exception_index, + env->exception_is_int, + env->error_code, + env->exception_next_eip); + /* successfully delivered */ + env->old_exception = -1; +#endif + ret = env->exception_index; + break; +#else +#if defined(TARGET_I386) + /* simulate a real cpu exception. On i386, it can + trigger new exceptions, but we do not handle + double or triple faults yet. */ +# ifdef VBOX + RAWEx_ProfileStart(env, STATS_IRQ_HANDLING); + Log(("do_interrupt: vec=%#x int=%d pc=%04x:%RGv\n", env->exception_index, env->exception_is_int, + env->segs[R_CS].selector, (RTGCPTR)env->exception_next_eip)); +# endif /* VBOX */ + do_interrupt(env->exception_index, + env->exception_is_int && env->exception_is_int != EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ, + env->error_code, + env->exception_next_eip, + env->exception_is_int == EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ); + /* successfully delivered */ + env->old_exception = -1; +# ifdef VBOX + RAWEx_ProfileStop(env, STATS_IRQ_HANDLING); +# endif /* VBOX */ +#elif defined(TARGET_PPC) + do_interrupt(env); +#elif defined(TARGET_MICROBLAZE) + do_interrupt(env); +#elif defined(TARGET_MIPS) + do_interrupt(env); +#elif defined(TARGET_SPARC) + do_interrupt(env); +#elif defined(TARGET_ARM) + do_interrupt(env); +#elif defined(TARGET_SH4) + do_interrupt(env); +#elif defined(TARGET_ALPHA) + do_interrupt(env); +#elif defined(TARGET_CRIS) + do_interrupt(env); +#elif defined(TARGET_M68K) + do_interrupt(0); +#endif + env->exception_index = -1; +#endif + } + } + +# ifndef VBOX + if (kvm_enabled()) { + kvm_cpu_exec(env); + longjmp(env->jmp_env, 1); + } +# endif /* !VBOX */ + + next_tb = 0; /* force lookup of first TB */ + for(;;) { + interrupt_request = env->interrupt_request; + if (unlikely(interrupt_request)) { + if (unlikely(env->singlestep_enabled & SSTEP_NOIRQ)) { + /* Mask out external interrupts for this step. */ + interrupt_request &= ~(CPU_INTERRUPT_HARD | + CPU_INTERRUPT_FIQ | + CPU_INTERRUPT_SMI | + CPU_INTERRUPT_NMI); + } + if (interrupt_request & CPU_INTERRUPT_DEBUG) { + env->interrupt_request &= ~CPU_INTERRUPT_DEBUG; + env->exception_index = EXCP_DEBUG; + cpu_loop_exit(); + } +#if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \ + defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \ + defined(TARGET_MICROBLAZE) + if (interrupt_request & CPU_INTERRUPT_HALT) { + env->interrupt_request &= ~CPU_INTERRUPT_HALT; + env->halted = 1; + env->exception_index = EXCP_HLT; + cpu_loop_exit(); + } +#endif +#if defined(TARGET_I386) +# ifdef VBOX + /* Memory registration may post a tlb flush request, process it ASAP. */ + if (interrupt_request & (CPU_INTERRUPT_EXTERNAL_FLUSH_TLB)) { + tlb_flush(env, true); /* (clears the flush flag) */ + } + + /* Single instruction exec request, we execute it and return (one way or the other). + The caller will always reschedule after doing this operation! */ + if (interrupt_request & CPU_INTERRUPT_SINGLE_INSTR) + { + /* not in flight are we? (if we are, we trapped) */ + if (!(env->interrupt_request & CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT)) + { + ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request, CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT); + env->exception_index = EXCP_SINGLE_INSTR; + if (emulate_single_instr(env) == -1) + AssertMsgFailed(("REM: emulate_single_instr failed for EIP=%RGv!!\n", (RTGCPTR)env->eip)); + + /* When we receive an external interrupt during execution of this single + instruction, then we should stay here. We will leave when we're ready + for raw-mode or when interrupted by pending EMT requests. */ + interrupt_request = env->interrupt_request; /* reload this! */ + if ( !(interrupt_request & CPU_INTERRUPT_HARD) + || !(env->eflags & IF_MASK) + || (env->hflags & HF_INHIBIT_IRQ_MASK) + || (env->state & CPU_RAW_HM) + ) + { + env->exception_index = ret = EXCP_SINGLE_INSTR; + cpu_loop_exit(); + } + } + /* Clear CPU_INTERRUPT_SINGLE_INSTR and leave CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT set. */ + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_SINGLE_INSTR); + } +# endif /* VBOX */ + +# ifndef VBOX /** @todo reconcile our code with the following... */ + if (interrupt_request & CPU_INTERRUPT_INIT) { + svm_check_intercept(SVM_EXIT_INIT); + do_cpu_init(env); + env->exception_index = EXCP_HALTED; + cpu_loop_exit(); + } else if (interrupt_request & CPU_INTERRUPT_SIPI) { + do_cpu_sipi(env); + } else if (env->hflags2 & HF2_GIF_MASK) { + if ((interrupt_request & CPU_INTERRUPT_SMI) && + !(env->hflags & HF_SMM_MASK)) { + svm_check_intercept(SVM_EXIT_SMI); + env->interrupt_request &= ~CPU_INTERRUPT_SMI; + do_smm_enter(); + next_tb = 0; + } else if ((interrupt_request & CPU_INTERRUPT_NMI) && + !(env->hflags2 & HF2_NMI_MASK)) { + env->interrupt_request &= ~CPU_INTERRUPT_NMI; + env->hflags2 |= HF2_NMI_MASK; + do_interrupt(EXCP02_NMI, 0, 0, 0, 1); + next_tb = 0; + } else if (interrupt_request & CPU_INTERRUPT_MCE) { + env->interrupt_request &= ~CPU_INTERRUPT_MCE; + do_interrupt(EXCP12_MCHK, 0, 0, 0, 0); + next_tb = 0; + } else if ((interrupt_request & CPU_INTERRUPT_HARD) && + (((env->hflags2 & HF2_VINTR_MASK) && + (env->hflags2 & HF2_HIF_MASK)) || + (!(env->hflags2 & HF2_VINTR_MASK) && + (env->eflags & IF_MASK && + !(env->hflags & HF_INHIBIT_IRQ_MASK))))) { + int intno; + svm_check_intercept(SVM_EXIT_INTR); + env->interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_VIRQ); + intno = cpu_get_pic_interrupt(env); + qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing hardware INT=0x%02x\n", intno); +#if defined(__sparc__) && !defined(CONFIG_SOLARIS) +#undef env + env = cpu_single_env; +#define env cpu_single_env +#endif + do_interrupt(intno, 0, 0, 0, 1); + /* ensure that no TB jump will be modified as + the program flow was changed */ + next_tb = 0; +#if !defined(CONFIG_USER_ONLY) + } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) && + (env->eflags & IF_MASK) && + !(env->hflags & HF_INHIBIT_IRQ_MASK)) { + int intno; + /* FIXME: this should respect TPR */ + svm_check_intercept(SVM_EXIT_VINTR); + intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector)); + qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno); + do_interrupt(intno, 0, 0, 0, 1); + env->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + next_tb = 0; +#endif + } + } +# else /* VBOX */ + RAWEx_ProfileStart(env, STATS_IRQ_HANDLING); + if ((interrupt_request & CPU_INTERRUPT_SMI) && + !(env->hflags & HF_SMM_MASK)) { + env->interrupt_request &= ~CPU_INTERRUPT_SMI; + do_smm_enter(); + next_tb = 0; + } + else if ((interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK) && + !(env->hflags & HF_INHIBIT_IRQ_MASK)) + { + /* if hardware interrupt pending, we execute it */ + int intno; + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_HARD); + intno = cpu_get_pic_interrupt(env); + if (intno >= 0) + { + Log(("do_interrupt %d\n", intno)); + do_interrupt(intno, 0, 0, 0, 1); + } + /* ensure that no TB jump will be modified as + the program flow was changed */ + next_tb = 0; + } +# endif /* VBOX */ +#elif defined(TARGET_PPC) +#if 0 + if ((interrupt_request & CPU_INTERRUPT_RESET)) { + cpu_reset(env); + } +#endif + if (interrupt_request & CPU_INTERRUPT_HARD) { + ppc_hw_interrupt(env); + if (env->pending_interrupts == 0) + env->interrupt_request &= ~CPU_INTERRUPT_HARD; + next_tb = 0; + } +#elif defined(TARGET_MICROBLAZE) + if ((interrupt_request & CPU_INTERRUPT_HARD) + && (env->sregs[SR_MSR] & MSR_IE) + && !(env->sregs[SR_MSR] & (MSR_EIP | MSR_BIP)) + && !(env->iflags & (D_FLAG | IMM_FLAG))) { + env->exception_index = EXCP_IRQ; + do_interrupt(env); + next_tb = 0; + } +#elif defined(TARGET_MIPS) + if ((interrupt_request & CPU_INTERRUPT_HARD) && + (env->CP0_Status & env->CP0_Cause & CP0Ca_IP_mask) && + (env->CP0_Status & (1 << CP0St_IE)) && + !(env->CP0_Status & (1 << CP0St_EXL)) && + !(env->CP0_Status & (1 << CP0St_ERL)) && + !(env->hflags & MIPS_HFLAG_DM)) { + /* Raise it */ + env->exception_index = EXCP_EXT_INTERRUPT; + env->error_code = 0; + do_interrupt(env); + next_tb = 0; + } +#elif defined(TARGET_SPARC) + if (interrupt_request & CPU_INTERRUPT_HARD) { + if (cpu_interrupts_enabled(env) && + env->interrupt_index > 0) { + int pil = env->interrupt_index & 0xf; + int type = env->interrupt_index & 0xf0; + + if (((type == TT_EXTINT) && + cpu_pil_allowed(env, pil)) || + type != TT_EXTINT) { + env->exception_index = env->interrupt_index; + do_interrupt(env); + next_tb = 0; + } + } + } else if (interrupt_request & CPU_INTERRUPT_TIMER) { + //do_interrupt(0, 0, 0, 0, 0); + env->interrupt_request &= ~CPU_INTERRUPT_TIMER; + } +#elif defined(TARGET_ARM) + if (interrupt_request & CPU_INTERRUPT_FIQ + && !(env->uncached_cpsr & CPSR_F)) { + env->exception_index = EXCP_FIQ; + do_interrupt(env); + next_tb = 0; + } + /* ARMv7-M interrupt return works by loading a magic value + into the PC. On real hardware the load causes the + return to occur. The qemu implementation performs the + jump normally, then does the exception return when the + CPU tries to execute code at the magic address. + This will cause the magic PC value to be pushed to + the stack if an interrupt occured at the wrong time. + We avoid this by disabling interrupts when + pc contains a magic address. */ + if (interrupt_request & CPU_INTERRUPT_HARD + && ((IS_M(env) && env->regs[15] < 0xfffffff0) + || !(env->uncached_cpsr & CPSR_I))) { + env->exception_index = EXCP_IRQ; + do_interrupt(env); + next_tb = 0; + } +#elif defined(TARGET_SH4) + if (interrupt_request & CPU_INTERRUPT_HARD) { + do_interrupt(env); + next_tb = 0; + } +#elif defined(TARGET_ALPHA) + if (interrupt_request & CPU_INTERRUPT_HARD) { + do_interrupt(env); + next_tb = 0; + } +#elif defined(TARGET_CRIS) + if (interrupt_request & CPU_INTERRUPT_HARD + && (env->pregs[PR_CCS] & I_FLAG) + && !env->locked_irq) { + env->exception_index = EXCP_IRQ; + do_interrupt(env); + next_tb = 0; + } + if (interrupt_request & CPU_INTERRUPT_NMI + && (env->pregs[PR_CCS] & M_FLAG)) { + env->exception_index = EXCP_NMI; + do_interrupt(env); + next_tb = 0; + } +#elif defined(TARGET_M68K) + if (interrupt_request & CPU_INTERRUPT_HARD + && ((env->sr & SR_I) >> SR_I_SHIFT) + < env->pending_level) { + /* Real hardware gets the interrupt vector via an + IACK cycle at this point. Current emulated + hardware doesn't rely on this, so we + provide/save the vector when the interrupt is + first signalled. */ + env->exception_index = env->pending_vector; + do_interrupt(1); + next_tb = 0; + } +#endif + /* Don't use the cached interupt_request value, + do_interrupt may have updated the EXITTB flag. */ + if (env->interrupt_request & CPU_INTERRUPT_EXITTB) { +#ifndef VBOX + env->interrupt_request &= ~CPU_INTERRUPT_EXITTB; +#else /* VBOX */ + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_EXITTB); +#endif /* VBOX */ + /* ensure that no TB jump will be modified as + the program flow was changed */ + next_tb = 0; + } +#ifdef VBOX + RAWEx_ProfileStop(env, STATS_IRQ_HANDLING); + if (interrupt_request & CPU_INTERRUPT_RC) { + env->exception_index = EXCP_RC; + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_RC); + ret = env->exception_index; + cpu_loop_exit(); + } + if (interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT)) { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~(CPU_INTERRUPT_EXTERNAL_EXIT)); + env->exit_request = 1; + } +#endif + } + if (unlikely(env->exit_request)) { + env->exit_request = 0; + env->exception_index = EXCP_INTERRUPT; + cpu_loop_exit(); + } + +#ifdef VBOX + /* + * Check if we the CPU state allows us to execute the code in raw-mode. + */ + RAWEx_ProfileStart(env, STATS_RAW_CHECK); + if (remR3CanExecuteRaw(env, + env->eip + env->segs[R_CS].base, + env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK)), + &env->exception_index)) + { + RAWEx_ProfileStop(env, STATS_RAW_CHECK); + ret = env->exception_index; + cpu_loop_exit(); + } + RAWEx_ProfileStop(env, STATS_RAW_CHECK); +#endif /* VBOX */ + +#if defined(DEBUG_DISAS) || defined(CONFIG_DEBUG_EXEC) + if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) { + /* restore flags in standard format */ +#if defined(TARGET_I386) + env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK); + log_cpu_state(env, X86_DUMP_CCOP); + env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); +#elif defined(TARGET_M68K) + cpu_m68k_flush_flags(env, env->cc_op); + env->cc_op = CC_OP_FLAGS; + env->sr = (env->sr & 0xffe0) + | env->cc_dest | (env->cc_x << 4); + log_cpu_state(env, 0); +#else + log_cpu_state(env, 0); +#endif + } +#endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */ +#ifdef VBOX + RAWEx_ProfileStart(env, STATS_TLB_LOOKUP); +#endif /*VBOX*/ + spin_lock(&tb_lock); + tb = tb_find_fast(); + /* Note: we do it here to avoid a gcc bug on Mac OS X when + doing it in tb_find_slow */ + if (tb_invalidated_flag) { + /* as some TB could have been invalidated because + of memory exceptions while generating the code, we + must recompute the hash index here */ + next_tb = 0; + tb_invalidated_flag = 0; + } +#ifdef CONFIG_DEBUG_EXEC + qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n", + (void *)tb->tc_ptr, tb->pc, + lookup_symbol(tb->pc)); +#endif + /* see if we can patch the calling TB. When the TB + spans two pages, we cannot safely do a direct + jump. */ +#ifndef VBOX + if (next_tb != 0 && tb->page_addr[1] == -1) { +#else /* VBOX */ + if (next_tb != 0 && !(tb->cflags & CF_RAW_MODE) && tb->page_addr[1] == -1) { +#endif /* VBOX */ + tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb); + } + spin_unlock(&tb_lock); +#ifdef VBOX + RAWEx_ProfileStop(env, STATS_TLB_LOOKUP); +#endif + + /* cpu_interrupt might be called while translating the + TB, but before it is linked into a potentially + infinite loop and becomes env->current_tb. Avoid + starting execution if there is a pending interrupt. */ + env->current_tb = tb; + barrier(); + if (likely(!env->exit_request)) { + tc_ptr = tb->tc_ptr; + /* execute the generated code */ +#ifdef VBOX + RAWEx_ProfileStart(env, STATS_QEMU_RUN_EMULATED_CODE); +#endif +#if defined(__sparc__) && !defined(CONFIG_SOLARIS) +#undef env + env = cpu_single_env; +#define env cpu_single_env +#endif + Log5(("REM: tb=%p tc_ptr=%p %04x:%08RGv\n", tb, tc_ptr, env->segs[R_CS].selector, (RTGCPTR)env->eip)); +#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) + tcg_qemu_tb_exec(tc_ptr, next_tb); +#else + next_tb = tcg_qemu_tb_exec(tc_ptr); +#endif + if (next_tb) + Log5(("REM: next_tb=%p %04x:%08RGv\n", next_tb, env->segs[R_CS].selector, (RTGCPTR)env->eip)); +#ifdef VBOX + RAWEx_ProfileStop(env, STATS_QEMU_RUN_EMULATED_CODE); +#endif + if ((next_tb & 3) == 2) { + /* Instruction counter expired. */ + int insns_left; + tb = (TranslationBlock *)(uintptr_t)(next_tb & ~3); + /* Restore PC. */ + cpu_pc_from_tb(env, tb); + insns_left = env->icount_decr.u32; + if (env->icount_extra && insns_left >= 0) { + /* Refill decrementer and continue execution. */ + env->icount_extra += insns_left; + if (env->icount_extra > 0xffff) { + insns_left = 0xffff; + } else { + insns_left = env->icount_extra; + } + env->icount_extra -= insns_left; + env->icount_decr.u16.low = insns_left; + } else { + if (insns_left > 0) { + /* Execute remaining instructions. */ + cpu_exec_nocache(insns_left, tb); + } + env->exception_index = EXCP_INTERRUPT; + next_tb = 0; + cpu_loop_exit(); + } + } + } + env->current_tb = NULL; + /* reset soft MMU for next block (it can currently + only be set by a memory fault) */ + } /* for(;;) */ + } +#ifdef VBOX_HIGH_RES_TIMERS_HACK + /* NULL the current_tb here so cpu_interrupt() doesn't do anything + unnecessary (like crashing during emulate single instruction). + Note! Don't use env1->pVM here, the code wouldn't run with + gcc-4.4/amd64 anymore, see #3883. */ + env->current_tb = NULL; + if ( !(env->interrupt_request & ( CPU_INTERRUPT_DEBUG | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_RC + | CPU_INTERRUPT_SINGLE_INSTR | CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT)) + && ( (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_TIMER) + || TMTimerPollBool(env->pVM, env->pVCpu)) ) { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_TIMER); + remR3ProfileStart(STATS_QEMU_RUN_TIMERS); + TMR3TimerQueuesDo(env->pVM); + remR3ProfileStop(STATS_QEMU_RUN_TIMERS); + } +#endif + } /* for(;;) */ + + +#if defined(TARGET_I386) + /* restore flags in standard format */ + env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK); +#elif defined(TARGET_ARM) + /* XXX: Save/restore host fpu exception state?. */ +#elif defined(TARGET_SPARC) +#elif defined(TARGET_PPC) +#elif defined(TARGET_M68K) + cpu_m68k_flush_flags(env, env->cc_op); + env->cc_op = CC_OP_FLAGS; + env->sr = (env->sr & 0xffe0) + | env->cc_dest | (env->cc_x << 4); +#elif defined(TARGET_MICROBLAZE) +#elif defined(TARGET_MIPS) +#elif defined(TARGET_SH4) +#elif defined(TARGET_ALPHA) +#elif defined(TARGET_CRIS) +#elif defined(TARGET_S390X) + /* XXXXX */ +#else +#error unsupported target CPU +#endif + + /* restore global registers */ + barrier(); + env = (void *) saved_env_reg; + +# ifndef VBOX /* we might be using elsewhere, we only have one. */ + /* fail safe : never use cpu_single_env outside cpu_exec() */ + cpu_single_env = NULL; +# endif + return ret; +} + +/* must only be called from the generated code as an exception can be + generated */ +void tb_invalidate_page_range(target_ulong start, target_ulong end) +{ + /* XXX: cannot enable it yet because it yields to MMU exception + where NIP != read address on PowerPC */ +#if 0 + target_ulong phys_addr; + phys_addr = get_phys_addr_code(env, start); + tb_invalidate_phys_page_range(phys_addr, phys_addr + end - start, 0); +#endif +} + +#if defined(TARGET_I386) && defined(CONFIG_USER_ONLY) + +void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector) +{ + CPUX86State *saved_env; + + saved_env = env; + env = s; + if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { + selector &= 0xffff; + cpu_x86_load_seg_cache(env, seg_reg, selector, + (selector << 4), 0xffff, 0); + } else { + helper_load_seg(seg_reg, selector); + } + env = saved_env; +} + +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32) +{ + CPUX86State *saved_env; + + saved_env = env; + env = s; + + helper_fsave(ptr, data32); + + env = saved_env; +} + +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32) +{ + CPUX86State *saved_env; + + saved_env = env; + env = s; + + helper_frstor(ptr, data32); + + env = saved_env; +} + +#endif /* TARGET_I386 */ + +#if !defined(CONFIG_SOFTMMU) + +#if defined(TARGET_I386) +#define EXCEPTION_ACTION raise_exception_err(env->exception_index, env->error_code) +#else +#define EXCEPTION_ACTION cpu_loop_exit() +#endif + +/* 'pc' is the host PC at which the exception was raised. 'address' is + the effective address of the memory exception. 'is_write' is 1 if a + write caused the exception and otherwise 0'. 'old_set' is the + signal set which should be restored */ +static inline int handle_cpu_signal(uintptr_t pc, uintptr_t address, + int is_write, sigset_t *old_set, + void *puc) +{ + TranslationBlock *tb; + int ret; + + if (cpu_single_env) + env = cpu_single_env; /* XXX: find a correct solution for multithread */ +#if defined(DEBUG_SIGNAL) + qemu_printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n", + pc, address, is_write, *(unsigned long *)old_set); +#endif + /* XXX: locking issue */ + if (is_write && page_unprotect(h2g(address), pc, puc)) { + return 1; + } + + /* see if it is an MMU fault */ + ret = cpu_handle_mmu_fault(env, address, is_write, MMU_USER_IDX, 0); + if (ret < 0) + return 0; /* not an MMU fault */ + if (ret == 0) + return 1; /* the MMU fault was handled without causing real CPU fault */ + /* now we have a real cpu fault */ + tb = tb_find_pc(pc); + if (tb) { + /* the PC is inside the translated code. It means that we have + a virtual CPU fault */ + cpu_restore_state(tb, env, pc, puc); + } + + /* we restore the process signal mask as the sigreturn should + do it (XXX: use sigsetjmp) */ + sigprocmask(SIG_SETMASK, old_set, NULL); + EXCEPTION_ACTION; + + /* never comes here */ + return 1; +} + +#if defined(__i386__) + +#if defined(__APPLE__) +# include <sys/ucontext.h> + +# define EIP_sig(context) (*((unsigned long*)&(context)->uc_mcontext->ss.eip)) +# define TRAP_sig(context) ((context)->uc_mcontext->es.trapno) +# define ERROR_sig(context) ((context)->uc_mcontext->es.err) +# define MASK_sig(context) ((context)->uc_sigmask) +#elif defined (__NetBSD__) +# include <ucontext.h> + +# define EIP_sig(context) ((context)->uc_mcontext.__gregs[_REG_EIP]) +# define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) +# define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) +# define MASK_sig(context) ((context)->uc_sigmask) +#elif defined (__FreeBSD__) || defined(__DragonFly__) +# include <ucontext.h> + +# define EIP_sig(context) (*((unsigned long*)&(context)->uc_mcontext.mc_eip)) +# define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) +# define ERROR_sig(context) ((context)->uc_mcontext.mc_err) +# define MASK_sig(context) ((context)->uc_sigmask) +#elif defined(__OpenBSD__) +# define EIP_sig(context) ((context)->sc_eip) +# define TRAP_sig(context) ((context)->sc_trapno) +# define ERROR_sig(context) ((context)->sc_err) +# define MASK_sig(context) ((context)->sc_mask) +#else +# define EIP_sig(context) ((context)->uc_mcontext.gregs[REG_EIP]) +# define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) +# define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) +# define MASK_sig(context) ((context)->uc_sigmask) +#endif + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; +#if defined(__NetBSD__) || defined (__FreeBSD__) || defined(__DragonFly__) + ucontext_t *uc = puc; +#elif defined(__OpenBSD__) + struct sigcontext *uc = puc; +#else + struct ucontext *uc = puc; +#endif + uintptr_t pc; + int trapno; + +#ifndef REG_EIP +/* for glibc 2.1 */ +#define REG_EIP EIP +#define REG_ERR ERR +#define REG_TRAPNO TRAPNO +#endif + pc = EIP_sig(uc); + trapno = TRAP_sig(uc); + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + trapno == 0xe ? + (ERROR_sig(uc) >> 1) & 1 : 0, + &MASK_sig(uc), puc); +} + +#elif defined(__x86_64__) + +#ifdef __NetBSD__ +#define PC_sig(context) _UC_MACHINE_PC(context) +#define TRAP_sig(context) ((context)->uc_mcontext.__gregs[_REG_TRAPNO]) +#define ERROR_sig(context) ((context)->uc_mcontext.__gregs[_REG_ERR]) +#define MASK_sig(context) ((context)->uc_sigmask) +#elif defined(__OpenBSD__) +#define PC_sig(context) ((context)->sc_rip) +#define TRAP_sig(context) ((context)->sc_trapno) +#define ERROR_sig(context) ((context)->sc_err) +#define MASK_sig(context) ((context)->sc_mask) +#elif defined (__FreeBSD__) || defined(__DragonFly__) +#include <ucontext.h> + +#define PC_sig(context) (*((unsigned long*)&(context)->uc_mcontext.mc_rip)) +#define TRAP_sig(context) ((context)->uc_mcontext.mc_trapno) +#define ERROR_sig(context) ((context)->uc_mcontext.mc_err) +#define MASK_sig(context) ((context)->uc_sigmask) +#else +#define PC_sig(context) ((context)->uc_mcontext.gregs[REG_RIP]) +#define TRAP_sig(context) ((context)->uc_mcontext.gregs[REG_TRAPNO]) +#define ERROR_sig(context) ((context)->uc_mcontext.gregs[REG_ERR]) +#define MASK_sig(context) ((context)->uc_sigmask) +#endif + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + uintptr_t pc; +#if defined(__NetBSD__) || defined (__FreeBSD__) || defined(__DragonFly__) + ucontext_t *uc = puc; +#elif defined(__OpenBSD__) + struct sigcontext *uc = puc; +#else + struct ucontext *uc = puc; +#endif + + pc = PC_sig(uc); + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + TRAP_sig(uc) == 0xe ? + (ERROR_sig(uc) >> 1) & 1 : 0, + &MASK_sig(uc), puc); +} + +#elif defined(_ARCH_PPC) + +/*********************************************************************** + * signal context platform-specific definitions + * From Wine + */ +#ifdef linux +/* All Registers access - only for local access */ +# define REG_sig(reg_name, context) ((context)->uc_mcontext.regs->reg_name) +/* Gpr Registers access */ +# define GPR_sig(reg_num, context) REG_sig(gpr[reg_num], context) +# define IAR_sig(context) REG_sig(nip, context) /* Program counter */ +# define MSR_sig(context) REG_sig(msr, context) /* Machine State Register (Supervisor) */ +# define CTR_sig(context) REG_sig(ctr, context) /* Count register */ +# define XER_sig(context) REG_sig(xer, context) /* User's integer exception register */ +# define LR_sig(context) REG_sig(link, context) /* Link register */ +# define CR_sig(context) REG_sig(ccr, context) /* Condition register */ +/* Float Registers access */ +# define FLOAT_sig(reg_num, context) (((double*)((char*)((context)->uc_mcontext.regs+48*4)))[reg_num]) +# define FPSCR_sig(context) (*(int*)((char*)((context)->uc_mcontext.regs+(48+32*2)*4))) +/* Exception Registers access */ +# define DAR_sig(context) REG_sig(dar, context) +# define DSISR_sig(context) REG_sig(dsisr, context) +# define TRAP_sig(context) REG_sig(trap, context) +#endif /* linux */ + +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include <ucontext.h> +# define IAR_sig(context) ((context)->uc_mcontext.mc_srr0) +# define MSR_sig(context) ((context)->uc_mcontext.mc_srr1) +# define CTR_sig(context) ((context)->uc_mcontext.mc_ctr) +# define XER_sig(context) ((context)->uc_mcontext.mc_xer) +# define LR_sig(context) ((context)->uc_mcontext.mc_lr) +# define CR_sig(context) ((context)->uc_mcontext.mc_cr) +/* Exception Registers access */ +# define DAR_sig(context) ((context)->uc_mcontext.mc_dar) +# define DSISR_sig(context) ((context)->uc_mcontext.mc_dsisr) +# define TRAP_sig(context) ((context)->uc_mcontext.mc_exc) +#endif /* __FreeBSD__|| __FreeBSD_kernel__ */ + +#ifdef __APPLE__ +# include <sys/ucontext.h> +typedef struct ucontext SIGCONTEXT; +/* All Registers access - only for local access */ +# define REG_sig(reg_name, context) ((context)->uc_mcontext->ss.reg_name) +# define FLOATREG_sig(reg_name, context) ((context)->uc_mcontext->fs.reg_name) +# define EXCEPREG_sig(reg_name, context) ((context)->uc_mcontext->es.reg_name) +# define VECREG_sig(reg_name, context) ((context)->uc_mcontext->vs.reg_name) +/* Gpr Registers access */ +# define GPR_sig(reg_num, context) REG_sig(r##reg_num, context) +# define IAR_sig(context) REG_sig(srr0, context) /* Program counter */ +# define MSR_sig(context) REG_sig(srr1, context) /* Machine State Register (Supervisor) */ +# define CTR_sig(context) REG_sig(ctr, context) +# define XER_sig(context) REG_sig(xer, context) /* Link register */ +# define LR_sig(context) REG_sig(lr, context) /* User's integer exception register */ +# define CR_sig(context) REG_sig(cr, context) /* Condition register */ +/* Float Registers access */ +# define FLOAT_sig(reg_num, context) FLOATREG_sig(fpregs[reg_num], context) +# define FPSCR_sig(context) ((double)FLOATREG_sig(fpscr, context)) +/* Exception Registers access */ +# define DAR_sig(context) EXCEPREG_sig(dar, context) /* Fault registers for coredump */ +# define DSISR_sig(context) EXCEPREG_sig(dsisr, context) +# define TRAP_sig(context) EXCEPREG_sig(exception, context) /* number of powerpc exception taken */ +#endif /* __APPLE__ */ + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + ucontext_t *uc = puc; +#else + struct ucontext *uc = puc; +#endif + uintptr_t pc; + int is_write; + + pc = IAR_sig(uc); + is_write = 0; +#if 0 + /* ppc 4xx case */ + if (DSISR_sig(uc) & 0x00800000) + is_write = 1; +#else + if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000)) + is_write = 1; +#endif + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, &uc->uc_sigmask, puc); +} + +#elif defined(__alpha__) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + struct ucontext *uc = puc; + uint32_t *pc = uc->uc_mcontext.sc_pc; + uint32_t insn = *pc; + int is_write = 0; + + /* XXX: need kernel patch to get write flag faster */ + switch (insn >> 26) { + case 0x0d: // stw + case 0x0e: // stb + case 0x0f: // stq_u + case 0x24: // stf + case 0x25: // stg + case 0x26: // sts + case 0x27: // stt + case 0x2c: // stl + case 0x2d: // stq + case 0x2e: // stl_c + case 0x2f: // stq_c + is_write = 1; + } + + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, &uc->uc_sigmask, puc); +} +#elif defined(__sparc__) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + int is_write; + uint32_t insn; +#if !defined(__arch64__) || defined(CONFIG_SOLARIS) + uint32_t *regs = (uint32_t *)(info + 1); + void *sigmask = (regs + 20); + /* XXX: is there a standard glibc define ? */ + uintptr_t pc = regs[1]; +#else +#ifdef __linux__ + struct sigcontext *sc = puc; + uintptr_t pc = sc->sigc_regs.tpc; + void *sigmask = (void *)sc->sigc_mask; +#elif defined(__OpenBSD__) + struct sigcontext *uc = puc; + uintptr_t pc = uc->sc_pc; + void *sigmask = (void *)(uintptr_t)uc->sc_mask; +#endif +#endif + + /* XXX: need kernel patch to get write flag faster */ + is_write = 0; + insn = *(uint32_t *)pc; + if ((insn >> 30) == 3) { + switch((insn >> 19) & 0x3f) { + case 0x05: // stb + case 0x15: // stba + case 0x06: // sth + case 0x16: // stha + case 0x04: // st + case 0x14: // sta + case 0x07: // std + case 0x17: // stda + case 0x0e: // stx + case 0x1e: // stxa + case 0x24: // stf + case 0x34: // stfa + case 0x27: // stdf + case 0x37: // stdfa + case 0x26: // stqf + case 0x36: // stqfa + case 0x25: // stfsr + case 0x3c: // casa + case 0x3e: // casxa + is_write = 1; + break; + } + } + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, sigmask, NULL); +} + +#elif defined(__arm__) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + struct ucontext *uc = puc; + uintptr_t pc; + int is_write; + +#if (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3)) + pc = uc->uc_mcontext.gregs[R15]; +#else + pc = uc->uc_mcontext.arm_pc; +#endif + /* XXX: compute is_write */ + is_write = 0; + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, + &uc->uc_sigmask, puc); +} + +#elif defined(__mc68000) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + struct ucontext *uc = puc; + uintptr_t pc; + int is_write; + + pc = uc->uc_mcontext.gregs[16]; + /* XXX: compute is_write */ + is_write = 0; + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, + &uc->uc_sigmask, puc); +} + +#elif defined(__ia64) + +#ifndef __ISR_VALID + /* This ought to be in <bits/siginfo.h>... */ +# define __ISR_VALID 1 +#endif + +int cpu_signal_handler(int host_signum, void *pinfo, void *puc) +{ + siginfo_t *info = pinfo; + struct ucontext *uc = puc; + uintptr_t ip; + int is_write = 0; + + ip = uc->uc_mcontext.sc_ip; + switch (host_signum) { + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + case SIGTRAP: + if (info->si_code && (info->si_segvflags & __ISR_VALID)) + /* ISR.W (write-access) is bit 33: */ + is_write = (info->si_isr >> 33) & 1; + break; + + default: + break; + } + return handle_cpu_signal(ip, (uintptr_t)info->si_addr, + is_write, + (sigset_t *)&uc->uc_sigmask, puc); +} + +#elif defined(__s390__) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + struct ucontext *uc = puc; + uintptr_t pc; + uint16_t *pinsn; + int is_write = 0; + + pc = uc->uc_mcontext.psw.addr; + + /* ??? On linux, the non-rt signal handler has 4 (!) arguments instead + of the normal 2 arguments. The 3rd argument contains the "int_code" + from the hardware which does in fact contain the is_write value. + The rt signal handler, as far as I can tell, does not give this value + at all. Not that we could get to it from here even if it were. */ + /* ??? This is not even close to complete, since it ignores all + of the read-modify-write instructions. */ + pinsn = (uint16_t *)pc; + switch (pinsn[0] >> 8) { + case 0x50: /* ST */ + case 0x42: /* STC */ + case 0x40: /* STH */ + is_write = 1; + break; + case 0xc4: /* RIL format insns */ + switch (pinsn[0] & 0xf) { + case 0xf: /* STRL */ + case 0xb: /* STGRL */ + case 0x7: /* STHRL */ + is_write = 1; + } + break; + case 0xe3: /* RXY format insns */ + switch (pinsn[2] & 0xff) { + case 0x50: /* STY */ + case 0x24: /* STG */ + case 0x72: /* STCY */ + case 0x70: /* STHY */ + case 0x8e: /* STPQ */ + case 0x3f: /* STRVH */ + case 0x3e: /* STRV */ + case 0x2f: /* STRVG */ + is_write = 1; + } + break; + } + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, &uc->uc_sigmask, puc); +} + +#elif defined(__mips__) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + struct ucontext *uc = puc; + greg_t pc = uc->uc_mcontext.pc; + int is_write; + + /* XXX: compute is_write */ + is_write = 0; + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, &uc->uc_sigmask, puc); +} + +#elif defined(__hppa__) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + struct siginfo *info = pinfo; + struct ucontext *uc = puc; + uintptr_t pc = uc->uc_mcontext.sc_iaoq[0]; + uint32_t insn = *(uint32_t *)pc; + int is_write = 0; + + /* XXX: need kernel patch to get write flag faster. */ + switch (insn >> 26) { + case 0x1a: /* STW */ + case 0x19: /* STH */ + case 0x18: /* STB */ + case 0x1b: /* STWM */ + is_write = 1; + break; + + case 0x09: /* CSTWX, FSTWX, FSTWS */ + case 0x0b: /* CSTDX, FSTDX, FSTDS */ + /* Distinguish from coprocessor load ... */ + is_write = (insn >> 9) & 1; + break; + + case 0x03: + switch ((insn >> 6) & 15) { + case 0xa: /* STWS */ + case 0x9: /* STHS */ + case 0x8: /* STBS */ + case 0xe: /* STWAS */ + case 0xc: /* STBYS */ + is_write = 1; + } + break; + } + + return handle_cpu_signal(pc, (uintptr_t)info->si_addr, + is_write, &uc->uc_sigmask, puc); +} + +#else + +#error host CPU specific signal handler needed + +#endif + +#endif /* !defined(CONFIG_SOFTMMU) */ diff --git a/src/recompiler/cutils.c b/src/recompiler/cutils.c new file mode 100644 index 00000000..5fbf70bd --- /dev/null +++ b/src/recompiler/cutils.c @@ -0,0 +1,752 @@ +/* + * Simple C functions to supplement the C library + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "host-utils.h" + +#ifdef VBOX +# include "osdep.h" + + +static inline int toupper(int ch) { + if ( (unsigned int)(ch - 'a') < 26u ) + ch += 'A' - 'a'; + return ch; +} + +/* Quick sort from OpenSolaris: + http://src.opensolaris.org/source/raw/onnv/onnv-gate/usr/src/common/util/qsort.c */ +/* + * choose a median of 3 values + * + * note: cstyle specifically prohibits nested conditional operators + * but this is the only way to do the median of 3 function in-line + */ +#define med3(a, b, c) (cmp((a), (b)) < 0) \ + ? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \ + : ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a)) + +#define THRESH_L 5 /* threshold for insertion sort */ +#define THRESH_M3 20 /* threshold for median of 3 */ +#define THRESH_M9 50 /* threshold for median of 9 */ + +typedef struct { + char *b_lim; + size_t nrec; +} stk_t; + +/* + * The following swap functions should not create a stack frame + * the SPARC call / return instruction will be executed + * but the a save / restore will not be executed + * which means we won't do a window turn with the spill / fill overhead + * verify this by examining the assembly code + */ + +/* ARGSUSED */ +static void +swapp32(uint32_t *r1, uint32_t *r2, size_t cnt) +{ + uint32_t temp; + + temp = *r1; + *r1++ = *r2; + *r2++ = temp; +} + +/* ARGSUSED */ +static void +swapp64(uint64_t *r1, uint64_t *r2, size_t cnt) +{ + uint64_t temp; + + temp = *r1; + *r1++ = *r2; + *r2++ = temp; +} + +static void +swapi(uint32_t *r1, uint32_t *r2, size_t cnt) +{ + uint32_t temp; + + /* character by character */ + while (cnt--) { + temp = *r1; + *r1++ = *r2; + *r2++ = temp; + } +} + +static void +swapb(char *r1, char *r2, size_t cnt) +{ + char temp; + + /* character by character */ + while (cnt--) { + temp = *r1; + *r1++ = *r2; + *r2++ = temp; + } +} + +/* + * qsort() is a general purpose, in-place sorting routine using a + * user provided call back function for comparisons. This implementation + * utilizes a ternary quicksort algorithm, and cuts over to an + * insertion sort for partitions involving fewer than THRESH_L records. + * + * Potential User Errors + * There is no return value from qsort, this function has no method + * of alerting the user that a sort did not work or could not work. + * We do not print an error message or exit the process or thread, + * Even if we can detect an error, We CANNOT silently return without + * sorting the data, if we did so the user could never be sure the + * sort completed successfully. + * It is possible we could change the return value of sort from void + * to int and return success or some error codes, but this gets into + * standards and compatibility issues. + * + * Examples of qsort parameter errors might be + * 1) record size (rsiz) equal to 0 + * qsort will loop and never return. + * 2) record size (rsiz) less than 0 + * rsiz is unsigned, so a negative value is insanely large + * 3) number of records (nrec) is 0 + * This is legal - qsort will return without examining any records + * 4) number of records (nrec) is less than 0 + * nrec is unsigned, so a negative value is insanely large. + * 5) nrec * rsiz > memory allocation for sort array + * a segment violation may occur + * corruption of other memory may occur + * 6) The base address of the sort array is invalid + * a segment violation may occur + * corruption of other memory may occur + * 7) The user call back function is invalid + * we may get alignment errors or segment violations + * we may jump into never-never land + * + * Some less obvious errors might be + * 8) The user compare function is not comparing correctly + * 9) The user compare function modifies the data records + */ + +void +qemu_qsort( + void *basep, + size_t nrec, + size_t rsiz, + int (*cmp)(const void *, const void *)) +{ + size_t i; /* temporary variable */ + + /* variables used by swap */ + void (*swapf)(char *, char *, size_t); + size_t loops; + + /* variables used by sort */ + stk_t stack[8 * sizeof (nrec) + 1]; + stk_t *sp; + char *b_lim; /* bottom limit */ + char *b_dup; /* bottom duplicate */ + char *b_par; /* bottom partition */ + char *t_lim; /* top limit */ + char *t_dup; /* top duplicate */ + char *t_par; /* top partition */ + char *m1, *m2, *m3; /* median pointers */ + uintptr_t d_bytelength; /* byte length of duplicate records */ + int b_nrec; + int t_nrec; + int cv; /* results of compare (bottom / top) */ + + /* + * choose a swap function based on alignment and size + * + * The qsort function sorts an array of fixed length records. + * We have very limited knowledge about the data record itself. + * It may be that the data record is in the array we are sorting + * or it may be that the array contains pointers or indexes to + * the actual data record and all that we are sorting is the indexes. + * + * The following decision will choose an optimal swap function + * based on the size and alignment of the data records + * swapp64 will swap 64 bit pointers + * swapp32 will swap 32 bit pointers + * swapi will swap an array of 32 bit integers + * swapb will swap an array of 8 bit characters + * + * swapi and swapb will also require the variable loops to be set + * to control the length of the array being swapped + */ + if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) && + (rsiz == sizeof (uint64_t))) { + loops = 1; + swapf = (void (*)(char *, char *, size_t))swapp64; + } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) && + (rsiz == sizeof (uint32_t))) { + loops = 1; + swapf = (void (*)(char *, char *, size_t))swapp32; + } else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) && + ((rsiz & (sizeof (uint32_t) - 1)) == 0)) { + loops = rsiz / sizeof (int); + swapf = (void (*)(char *, char *, size_t))swapi; + } else { + loops = rsiz; + swapf = swapb; + } + + /* + * qsort is a partitioning sort + * + * the stack is the bookkeeping mechanism to keep track of all + * the partitions. + * + * each sort pass takes one partition and sorts it into two partitions. + * at the top of the loop we simply take the partition on the top + * of the stack and sort it. See the comments at the bottom + * of the loop regarding which partitions to add in what order. + * + * initially put the whole partition on the stack + */ + sp = stack; + sp->b_lim = (char *)basep; + sp->nrec = nrec; + sp++; + while (sp > stack) { + sp--; + b_lim = sp->b_lim; + nrec = sp->nrec; + + /* + * a linear insertion sort i faster than a qsort for + * very small number of records (THRESH_L) + * + * if number records < threshold use linear insertion sort + * + * this also handles the special case where the partition + * 0 or 1 records length. + */ + if (nrec < THRESH_L) { + /* + * Linear insertion sort + */ + t_par = b_lim; + for (i = 1; i < nrec; i++) { + t_par += rsiz; + b_par = t_par; + while (b_par > b_lim) { + b_par -= rsiz; + if ((*cmp)(b_par, b_par + rsiz) <= 0) { + break; + } + (*swapf)(b_par, b_par + rsiz, loops); + } + } + + /* + * a linear insertion sort will put all records + * in their final position and will not create + * subpartitions. + * + * therefore when the insertion sort is complete + * just go to the top of the loop and get the + * next partition to sort. + */ + continue; + } + + /* quicksort */ + + /* + * choose a pivot record + * + * Ideally the pivot record will divide the partition + * into two equal parts. however we have to balance the + * work involved in selecting the pivot record with the + * expected benefit. + * + * The choice of pivot record depends on the number of + * records in the partition + * + * for small partitions (nrec < THRESH_M3) + * we just select the record in the middle of the partition + * + * if (nrec >= THRESH_M3 && nrec < THRESH_M9) + * we select three values and choose the median of 3 + * + * if (nrec >= THRESH_M9) + * then we use an approximate median of 9 + * 9 records are selected and grouped in 3 groups of 3 + * the median of each of these 3 groups is fed into another + * median of 3 decision. + * + * Each median of 3 decision is 2 or 3 compares, + * so median of 9 costs between 8 and 12 compares. + * + * i is byte distance between two consecutive samples + * m2 will point to the pivot record + */ + if (nrec < THRESH_M3) { + m2 = b_lim + (nrec / 2) * rsiz; + } else if (nrec < THRESH_M9) { + /* use median of 3 */ + i = ((nrec - 1) / 2) * rsiz; + m2 = med3(b_lim, b_lim + i, b_lim + 2 * i); + } else { + /* approx median of 9 */ + i = ((nrec - 1) / 8) * rsiz; + m1 = med3(b_lim, b_lim + i, b_lim + 2 * i); + m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i); + m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i); + m2 = med3(m1, m2, m3); + } + + /* + * quick sort partitioning + * + * The partition limits are defined by bottom and top pointers + * b_lim and t_lim. + * + * qsort uses a fairly standard method of moving the + * partitioning pointers, b_par and t_par, to the middle of + * the partition and exchanging records that are in the + * wrong part of the partition. + * + * Two enhancements have been made to the basic algorithm. + * One for handling duplicate records and one to minimize + * the number of swaps. + * + * Two duplicate records pointers are (b_dup and t_dup) are + * initially set to b_lim and t_lim. Each time a record + * whose sort key value is equal to the pivot record is found + * it will be swapped with the record pointed to by + * b_dup or t_dup and the duplicate pointer will be + * incremented toward the center. + * When partitioning is complete, all the duplicate records + * will have been collected at the upper and lower limits of + * the partition and can easily be moved adjacent to the + * pivot record. + * + * The second optimization is to minimize the number of swaps. + * The pointer m2 points to the pivot record. + * During partitioning, if m2 is ever equal to the partitioning + * pointers, b_par or t_par, then b_par or t_par just moves + * onto the next record without doing a compare. + * If as a result of duplicate record detection, + * b_dup or t_dup is ever equal to m2, + * then m2 is changed to point to the duplicate record and + * b_dup or t_dup is incremented with out swapping records. + * + * When partitioning is done, we may not have the same pivot + * record that we started with, but we will have one with + * an equal sort key. + */ + b_dup = b_par = b_lim; + t_dup = t_par = t_lim = b_lim + rsiz * (nrec - 1); + for (;;) { + + /* move bottom pointer up */ + for (; b_par <= t_par; b_par += rsiz) { + if (b_par == m2) { + continue; + } + cv = cmp(b_par, m2); + if (cv > 0) { + break; + } + if (cv == 0) { + if (b_dup == m2) { + m2 = b_par; + } else if (b_dup != b_par) { + (*swapf)(b_dup, b_par, loops); + } + b_dup += rsiz; + } + } + + /* move top pointer down */ + for (; b_par < t_par; t_par -= rsiz) { + if (t_par == m2) { + continue; + } + cv = cmp(t_par, m2); + if (cv < 0) { + break; + } + if (cv == 0) { + if (t_dup == m2) { + m2 = t_par; + } else if (t_dup != t_par) { + (*swapf)(t_dup, t_par, loops); + } + t_dup -= rsiz; + } + } + + /* break if we are done partitioning */ + if (b_par >= t_par) { + break; + } + + /* exchange records at upper and lower break points */ + (*swapf)(b_par, t_par, loops); + b_par += rsiz; + t_par -= rsiz; + } + + /* + * partitioning is now complete + * + * there are two termination conditions from the partitioning + * loop above. Either b_par or t_par have crossed or + * they are equal. + * + * we need to swap the pivot record to its final position + * m2 could be in either the upper or lower partitions + * or it could already be in its final position + */ + /* + * R[b_par] > R[m2] + * R[t_par] < R[m2] + */ + if (t_par < b_par) { + if (m2 < t_par) { + (*swapf)(m2, t_par, loops); + m2 = b_par = t_par; + } else if (m2 > b_par) { + (*swapf)(m2, b_par, loops); + m2 = t_par = b_par; + } else { + b_par = t_par = m2; + } + } else { + if (m2 < t_par) { + t_par = b_par = t_par - rsiz; + } + if (m2 != b_par) { + (*swapf)(m2, b_par, loops); + } + m2 = t_par; + } + + /* + * move bottom duplicates next to pivot + * optimized to eliminate overlap + */ + d_bytelength = b_dup - b_lim; + if (b_par - b_dup < d_bytelength) { + b_dup = b_lim + (b_par - b_dup); + } + while (b_dup > b_lim) { + b_dup -= rsiz; + b_par -= rsiz; + (*swapf)(b_dup, b_par, loops); + } + b_par = m2 - d_bytelength; + + /* + * move top duplicates next to pivot + */ + d_bytelength = t_lim - t_dup; + if (t_dup - t_par < d_bytelength) { + t_dup = t_lim - (t_dup - t_par); + } + while (t_dup < t_lim) { + t_dup += rsiz; + t_par += rsiz; + (*swapf)(t_dup, t_par, loops); + } + t_par = m2 + d_bytelength; + + /* + * when a qsort pass completes there are three partitions + * 1) the lower contains all records less than pivot + * 2) the upper contains all records greater than pivot + * 3) the pivot partition contains all record equal to pivot + * + * all records in the pivot partition are in their final + * position and do not need to be accounted for by the stack + * + * when adding partitions to the stack + * it is important to add the largest partition first + * to prevent stack overflow. + * + * calculate number of unsorted records in top and bottom + * push resulting partitions on stack + */ + b_nrec = (b_par - b_lim) / rsiz; + t_nrec = (t_lim - t_par) / rsiz; + if (b_nrec < t_nrec) { + sp->b_lim = t_par + rsiz; + sp->nrec = t_nrec; + sp++; + sp->b_lim = b_lim; + sp->nrec = b_nrec; + sp++; + } else { + sp->b_lim = b_lim; + sp->nrec = b_nrec; + sp++; + sp->b_lim = t_par + rsiz; + sp->nrec = t_nrec; + sp++; + } + } +} + +#endif /* VBOX */ + +void pstrcpy(char *buf, int buf_size, const char *str) +{ + int c; + char *q = buf; + + if (buf_size <= 0) + return; + + for(;;) { + c = *str++; + if (c == 0 || q >= buf + buf_size - 1) + break; + *q++ = c; + } + *q = '\0'; +} + +/* strcat and truncate. */ +char *pstrcat(char *buf, int buf_size, const char *s) +{ + int len; + len = strlen(buf); + if (len < buf_size) + pstrcpy(buf + len, buf_size - len, s); + return buf; +} + +int strstart(const char *str, const char *val, const char **ptr) +{ + const char *p, *q; + p = str; + q = val; + while (*q != '\0') { + if (*p != *q) + return 0; + p++; + q++; + } + if (ptr) + *ptr = p; + return 1; +} + +int stristart(const char *str, const char *val, const char **ptr) +{ + const char *p, *q; + p = str; + q = val; + while (*q != '\0') { + if (qemu_toupper(*p) != qemu_toupper(*q)) + return 0; + p++; + q++; + } + if (ptr) + *ptr = p; + return 1; +} + +/* XXX: use host strnlen if available ? */ +int qemu_strnlen(const char *s, int max_len) +{ + int i; + + for(i = 0; i < max_len; i++) { + if (s[i] == '\0') { + break; + } + } + return i; +} + +#ifndef VBOX +time_t mktimegm(struct tm *tm) +{ + time_t t; + int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday; + if (m < 3) { + m += 12; + y--; + } + t = 86400 * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + + y / 400 - 719469); + t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec; + return t; +} +#endif /* !VBOX */ + +int qemu_fls(int i) +{ + return 32 - clz32(i); +} + +#ifndef VBOX + +/* + * Make sure data goes on disk, but if possible do not bother to + * write out the inode just for timestamp updates. + * + * Unfortunately even in 2009 many operating systems do not support + * fdatasync and have to fall back to fsync. + */ +int qemu_fdatasync(int fd) +{ +#ifdef CONFIG_FDATASYNC + return fdatasync(fd); +#else + return fsync(fd); +#endif +} + +/* io vectors */ + +void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint) +{ + qiov->iov = qemu_malloc(alloc_hint * sizeof(struct iovec)); + qiov->niov = 0; + qiov->nalloc = alloc_hint; + qiov->size = 0; +} + +void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov) +{ + int i; + + qiov->iov = iov; + qiov->niov = niov; + qiov->nalloc = -1; + qiov->size = 0; + for (i = 0; i < niov; i++) + qiov->size += iov[i].iov_len; +} + +void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) +{ + assert(qiov->nalloc != -1); + + if (qiov->niov == qiov->nalloc) { + qiov->nalloc = 2 * qiov->nalloc + 1; + qiov->iov = qemu_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec)); + } + qiov->iov[qiov->niov].iov_base = base; + qiov->iov[qiov->niov].iov_len = len; + qiov->size += len; + ++qiov->niov; +} + +/* + * Copies iovecs from src to the end dst until src is completely copied or the + * total size of the copied iovec reaches size. The size of the last copied + * iovec is changed in order to fit the specified total size if it isn't a + * perfect fit already. + */ +void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size) +{ + int i; + size_t done; + + assert(dst->nalloc != -1); + + done = 0; + for (i = 0; (i < src->niov) && (done != size); i++) { + if (done + src->iov[i].iov_len > size) { + qemu_iovec_add(dst, src->iov[i].iov_base, size - done); + break; + } else { + qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len); + } + done += src->iov[i].iov_len; + } +} + +void qemu_iovec_destroy(QEMUIOVector *qiov) +{ + assert(qiov->nalloc != -1); + + qemu_free(qiov->iov); +} + +void qemu_iovec_reset(QEMUIOVector *qiov) +{ + assert(qiov->nalloc != -1); + + qiov->niov = 0; + qiov->size = 0; +} + +void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf) +{ + uint8_t *p = (uint8_t *)buf; + int i; + + for (i = 0; i < qiov->niov; ++i) { + memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); + p += qiov->iov[i].iov_len; + } +} + +void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count) +{ + const uint8_t *p = (const uint8_t *)buf; + size_t copy; + int i; + + for (i = 0; i < qiov->niov && count; ++i) { + copy = count; + if (copy > qiov->iov[i].iov_len) + copy = qiov->iov[i].iov_len; + memcpy(qiov->iov[i].iov_base, p, copy); + p += copy; + count -= copy; + } +} + +#ifndef _WIN32 +/* Sets a specific flag */ +int fcntl_setfl(int fd, int flag) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags == -1) + return -errno; + + if (fcntl(fd, F_SETFL, flags | flag) == -1) + return -errno; + + return 0; +} +#endif + +#endif /* !VBOX */ + diff --git a/src/recompiler/def-helper.h b/src/recompiler/def-helper.h new file mode 100644 index 00000000..ad5b9459 --- /dev/null +++ b/src/recompiler/def-helper.h @@ -0,0 +1,258 @@ +/* Helper file for declaring TCG helper functions. + Should be included at the start and end of target-foo/helper.h. + + Targets should use DEF_HELPER_N and DEF_HELPER_FLAGS_N to declare helper + functions. Names should be specified without the helper_ prefix, and + the return and argument types specified. 3 basic types are understood + (i32, i64 and ptr). Additional aliases are provided for convenience and + to match the types used by the C helper implementation. + + The target helper.h should be included in all files that use/define + helper functions. THis will ensure that function prototypes are + consistent. In addition it should be included an extra two times for + helper.c, defining: + GEN_HELPER 1 to produce op generation functions (gen_helper_*) + GEN_HELPER 2 to do runtime registration helper functions. + */ + +#ifndef DEF_HELPER_H +#define DEF_HELPER_H 1 + +#define HELPER(name) glue(helper_, name) + +#define GET_TCGV_i32 GET_TCGV_I32 +#define GET_TCGV_i64 GET_TCGV_I64 +#define GET_TCGV_ptr GET_TCGV_PTR + +/* Some types that make sense in C, but not for TCG. */ +#define dh_alias_i32 i32 +#define dh_alias_s32 i32 +#define dh_alias_int i32 +#define dh_alias_i64 i64 +#define dh_alias_s64 i64 +#define dh_alias_f32 i32 +#define dh_alias_f64 i64 +#if TARGET_LONG_BITS == 32 +#define dh_alias_tl i32 +#else +#define dh_alias_tl i64 +#endif +#define dh_alias_ptr ptr +#define dh_alias_void void +#define dh_alias_env ptr +#ifdef VBOX +# if ARCH_BITS == 32 +# define dh_alias_RTCCUINTREG i32 +# define dh_alias_RTCCINTREG i32 +# else +# define dh_alias_RTCCUINTREG i64 +# define dh_alias_RTCCINTREG i64 +# endif +#endif +#define dh_alias(t) glue(dh_alias_, t) + +#define dh_ctype_i32 uint32_t +#define dh_ctype_s32 int32_t +#define dh_ctype_int int +#define dh_ctype_i64 uint64_t +#define dh_ctype_s64 int64_t +#define dh_ctype_f32 float32 +#define dh_ctype_f64 float64 +#define dh_ctype_tl target_ulong +#define dh_ctype_ptr void * +#define dh_ctype_void void +#define dh_ctype_env CPUState * +#ifdef VBOX +# if ARCH_BITS == 32 +# define dh_ctype_RTCCUINTREG uint32_t +# define dh_ctype_RTCCINTREG int32_t +# else +# define dh_ctype_RTCCUINTREG uint64_t +# define dh_ctype_RTCCINTREG int64_t +# endif +#endif +#define dh_ctype(t) dh_ctype_##t + +/* We can't use glue() here because it falls foul of C preprocessor + recursive expansion rules. */ +#define dh_retvar_decl0_void void +#define dh_retvar_decl0_i32 TCGv_i32 retval +#define dh_retvar_decl0_i64 TCGv_i64 retval +#define dh_retvar_decl0_ptr TCGv_ptr retval +#define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t)) + +#define dh_retvar_decl_void +#define dh_retvar_decl_i32 TCGv_i32 retval, +#define dh_retvar_decl_i64 TCGv_i64 retval, +#define dh_retvar_decl_ptr TCGv_ptr retval, +#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t)) + +#define dh_retvar_void TCG_CALL_DUMMY_ARG +#define dh_retvar_i32 GET_TCGV_i32(retval) +#define dh_retvar_i64 GET_TCGV_i64(retval) +#define dh_retvar_ptr GET_TCGV_ptr(retval) +#define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) + +#define dh_is_64bit_void 0 +#define dh_is_64bit_i32 0 +#define dh_is_64bit_i64 1 +#define dh_is_64bit_ptr (TCG_TARGET_REG_BITS == 64) +#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t)) + +#define dh_is_signed_void 0 +#define dh_is_signed_i32 0 +#define dh_is_signed_s32 1 +#define dh_is_signed_i64 0 +#define dh_is_signed_s64 1 +#define dh_is_signed_f32 0 +#define dh_is_signed_f64 0 +#define dh_is_signed_tl 0 +#define dh_is_signed_int 1 +/* ??? This is highly specific to the host cpu. There are even special + extension instructions that may be required, e.g. ia64's addp4. But + for now we don't support any 64-bit targets with 32-bit pointers. */ +#define dh_is_signed_ptr 0 +#define dh_is_signed_env dh_is_signed_ptr +#define dh_is_signed(t) dh_is_signed_##t + +#define dh_sizemask(t, n) \ + sizemask |= dh_is_64bit(t) << (n*2); \ + sizemask |= dh_is_signed(t) << (n*2+1) + +#define dh_arg(t, n) \ + args[n - 1] = glue(GET_TCGV_, dh_alias(t))(glue(arg, n)); \ + dh_sizemask(t, n) + +#define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n) + + +#define DEF_HELPER_0(name, ret) \ + DEF_HELPER_FLAGS_0(name, 0, ret) +#define DEF_HELPER_1(name, ret, t1) \ + DEF_HELPER_FLAGS_1(name, 0, ret, t1) +#define DEF_HELPER_2(name, ret, t1, t2) \ + DEF_HELPER_FLAGS_2(name, 0, ret, t1, t2) +#define DEF_HELPER_3(name, ret, t1, t2, t3) \ + DEF_HELPER_FLAGS_3(name, 0, ret, t1, t2, t3) +#define DEF_HELPER_4(name, ret, t1, t2, t3, t4) \ + DEF_HELPER_FLAGS_4(name, 0, ret, t1, t2, t3, t4) + +#endif /* DEF_HELPER_H */ + +#ifndef GEN_HELPER +/* Function prototypes. */ + +#define DEF_HELPER_FLAGS_0(name, flags, ret) \ +dh_ctype(ret) HELPER(name) (void); + +#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1)); + +#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)); + +#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3)); + +#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ + dh_ctype(t4)); + +#undef GEN_HELPER +#define GEN_HELPER -1 + +#elif GEN_HELPER == 1 +/* Gen functions. */ + +#define DEF_HELPER_FLAGS_0(name, flags, ret) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret)) \ +{ \ + int sizemask; \ + sizemask = dh_is_64bit(ret); \ + tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 0, NULL); \ +} + +#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1)) \ +{ \ + TCGArg args[1]; \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ + dh_arg(t1, 1); \ + tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 1, args); \ +} + +#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1), \ + dh_arg_decl(t2, 2)) \ +{ \ + TCGArg args[2]; \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ + dh_arg(t1, 1); \ + dh_arg(t2, 2); \ + tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 2, args); \ +} + +#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1), \ + dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \ +{ \ + TCGArg args[3]; \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ + dh_arg(t1, 1); \ + dh_arg(t2, 2); \ + dh_arg(t3, 3); \ + tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 3, args); \ +} + +#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1), \ + dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \ +{ \ + TCGArg args[4]; \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ + dh_arg(t1, 1); \ + dh_arg(t2, 2); \ + dh_arg(t3, 3); \ + dh_arg(t4, 4); \ + tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 4, args); \ +} + +#undef GEN_HELPER +#define GEN_HELPER -1 + +#elif GEN_HELPER == 2 +/* Register helpers. */ + +#define DEF_HELPER_FLAGS_0(name, flags, ret) \ +tcg_register_helper(HELPER(name), #name); + +#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ +DEF_HELPER_FLAGS_0(name, flags, ret) + +#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ +DEF_HELPER_FLAGS_0(name, flags, ret) + +#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \ +DEF_HELPER_FLAGS_0(name, flags, ret) + +#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ +DEF_HELPER_FLAGS_0(name, flags, ret) + +#undef GEN_HELPER +#define GEN_HELPER -1 + +#elif GEN_HELPER == -1 +/* Undefine macros. */ + +#undef DEF_HELPER_FLAGS_0 +#undef DEF_HELPER_FLAGS_1 +#undef DEF_HELPER_FLAGS_2 +#undef DEF_HELPER_FLAGS_3 +#undef DEF_HELPER_FLAGS_4 +#undef GEN_HELPER + +#endif diff --git a/src/recompiler/disas.h b/src/recompiler/disas.h new file mode 100644 index 00000000..a20df127 --- /dev/null +++ b/src/recompiler/disas.h @@ -0,0 +1,47 @@ +#ifndef _QEMU_DISAS_H +#define _QEMU_DISAS_H + +#include "qemu-common.h" + +#ifdef NEED_CPU_H +/* Disassemble this for me please... (debugging). */ +void disas(FILE *out, void *code, unsigned long size); +void target_disas(FILE *out, target_ulong code, target_ulong size, int flags); + +#ifndef VBOX +/* The usual mess... FIXME: Remove this condition once dyngen-exec.h is gone */ +#ifndef __DYNGEN_EXEC_H__ +void monitor_disas(Monitor *mon, CPUState *env, + target_ulong pc, int nb_insn, int is_physical, int flags); +#endif +#endif /*!VBOX*/ + +/* Look up symbol for debugging purpose. Returns "" if unknown. */ +const char *lookup_symbol(target_ulong orig_addr); +#endif + +struct syminfo; +struct elf32_sym; +struct elf64_sym; + +#if defined(CONFIG_USER_ONLY) +typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr); +#else +typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_phys_addr_t orig_addr); +#endif + +struct syminfo { + lookup_symbol_t lookup_symbol; + unsigned int disas_num_syms; + union { + struct elf32_sym *elf32; + struct elf64_sym *elf64; + } disas_symtab; + const char *disas_strtab; + struct syminfo *next; +}; + +/* Filled in by elfload.c. Simplistic, but will do for now. */ +extern struct syminfo *syminfos; + +#endif /* _QEMU_DISAS_H */ diff --git a/src/recompiler/dyngen-exec.h b/src/recompiler/dyngen-exec.h new file mode 100644 index 00000000..5397a211 --- /dev/null +++ b/src/recompiler/dyngen-exec.h @@ -0,0 +1,131 @@ +/* + * dyngen defines for micro operation code + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#if !defined(__DYNGEN_EXEC_H__) +#define __DYNGEN_EXEC_H__ + +#ifndef VBOX +/* prevent Solaris from trying to typedef FILE in gcc's + include/floatingpoint.h which will conflict with the + definition down below */ +#ifdef __sun__ +#define _FILEDEFED +#endif +#endif /* !VBOX */ + +/* NOTE: standard headers should be used with special care at this + point because host CPU registers are used as global variables. Some + host headers do not allow that. */ +#include <stddef.h> +#ifndef VBOX +#include <stdint.h> + +#ifdef __OpenBSD__ +#include <sys/types.h> +#endif + +/* XXX: This may be wrong for 64-bit ILP32 hosts. */ +typedef void * host_reg_t; + +#ifdef CONFIG_BSD +typedef struct __sFILE FILE; +#else +typedef struct FILE FILE; +#endif +extern int fprintf(FILE *, const char *, ...); +extern int fputs(const char *, FILE *); +extern int printf(const char *, ...); + +#else /* VBOX */ + +/* XXX: This may be wrong for 64-bit ILP32 hosts. */ +typedef void * host_reg_t; + +#include <iprt/stdint.h> +#include <stdio.h> + +#endif /* VBOX */ + +#if defined(__i386__) +# ifndef VBOX +#define AREG0 "ebp" +# else /* VBOX - why are we different? frame-pointer optimizations on mac? */ +# define AREG0 "esi" +# endif /* VBOX */ +#elif defined(__x86_64__) +#define AREG0 "r14" +#elif defined(_ARCH_PPC) +#define AREG0 "r27" +#elif defined(__arm__) +#define AREG0 "r7" +#elif defined(__hppa__) +#define AREG0 "r17" +#elif defined(__mips__) +#define AREG0 "s0" +#elif defined(__sparc__) +#ifdef CONFIG_SOLARIS +#define AREG0 "g2" +#else +#ifdef __sparc_v9__ +#define AREG0 "g5" +#else +#define AREG0 "g6" +#endif +#endif +#elif defined(__s390__) +#define AREG0 "r10" +#elif defined(__alpha__) +/* Note $15 is the frame pointer, so anything in op-i386.c that would + require a frame pointer, like alloca, would probably loose. */ +#define AREG0 "$15" +#elif defined(__mc68000) +#define AREG0 "%a5" +#elif defined(__ia64__) +#define AREG0 "r7" +#else +#error unsupported CPU +#endif + +#define xglue(x, y) x ## y +#define glue(x, y) xglue(x, y) +#define stringify(s) tostring(s) +#define tostring(s) #s + +/* The return address may point to the start of the next instruction. + Subtracting one gets us the call instruction itself. */ +#if defined(__s390__) && !defined(__s390x__) +# define GETPC() ((void*)(((uintptr_t)__builtin_return_address(0) & 0x7fffffffUL) - 1)) +#elif defined(__arm__) +/* Thumb return addresses have the low bit set, so we need to subtract two. + This is still safe in ARM mode because instructions are 4 bytes. */ +# define GETPC() ((void *)((uintptr_t)__builtin_return_address(0) - 2)) +#else +# define GETPC() ((void *)((uintptr_t)__builtin_return_address(0) - 1)) +#endif + +#endif /* !defined(__DYNGEN_EXEC_H__) */ diff --git a/src/recompiler/elf.h b/src/recompiler/elf.h new file mode 100644 index 00000000..eb9e3bec --- /dev/null +++ b/src/recompiler/elf.h @@ -0,0 +1,1196 @@ +#ifndef _QEMU_ELF_H +#define _QEMU_ELF_H + +#include <inttypes.h> + +/* 32-bit ELF base types. */ +typedef uint32_t Elf32_Addr; +typedef uint16_t Elf32_Half; +typedef uint32_t Elf32_Off; +typedef int32_t Elf32_Sword; +typedef uint32_t Elf32_Word; + +/* 64-bit ELF base types. */ +typedef uint64_t Elf64_Addr; +typedef uint16_t Elf64_Half; +typedef int16_t Elf64_SHalf; +typedef uint64_t Elf64_Off; +typedef int32_t Elf64_Sword; +typedef uint32_t Elf64_Word; +typedef uint64_t Elf64_Xword; +typedef int64_t Elf64_Sxword; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_MIPS_REGINFO 0x70000000 +#define PT_MIPS_OPTIONS 0x70000001 + +/* Flags in the e_flags field of the header */ +/* MIPS architecture level. */ +#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code. */ +#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code. */ +#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code. */ +#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code. */ +#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code. */ +#define EF_MIPS_ARCH_32 0x50000000 /* MIPS32 code. */ +#define EF_MIPS_ARCH_64 0x60000000 /* MIPS64 code. */ + +/* The ABI of a file. */ +#define EF_MIPS_ABI_O32 0x00001000 /* O32 ABI. */ +#define EF_MIPS_ABI_O64 0x00002000 /* O32 extended for 64 bit. */ + +#define EF_MIPS_NOREORDER 0x00000001 +#define EF_MIPS_PIC 0x00000002 +#define EF_MIPS_CPIC 0x00000004 +#define EF_MIPS_ABI2 0x00000020 +#define EF_MIPS_OPTIONS_FIRST 0x00000080 +#define EF_MIPS_32BITMODE 0x00000100 +#define EF_MIPS_ABI 0x0000f000 +#define EF_MIPS_ARCH 0xf0000000 + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* These constants define the various ELF target machines */ +#define EM_NONE 0 +#define EM_M32 1 +#define EM_SPARC 2 +#define EM_386 3 +#define EM_68K 4 +#define EM_88K 5 +#define EM_486 6 /* Perhaps disused */ +#define EM_860 7 + +#define EM_MIPS 8 /* MIPS R3000 (officially, big-endian only) */ + +#define EM_MIPS_RS4_BE 10 /* MIPS R4000 big-endian */ + +#define EM_PARISC 15 /* HPPA */ + +#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ + +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC64 */ + +#define EM_ARM 40 /* ARM */ + +#define EM_SH 42 /* SuperH */ + +#define EM_SPARCV9 43 /* SPARC v9 64-bit */ + +#define EM_IA_64 50 /* HP/Intel IA-64 */ + +#define EM_X86_64 62 /* AMD x86-64 */ + +#define EM_S390 22 /* IBM S/390 */ + +#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ + +#define EM_V850 87 /* NEC v850 */ + +#define EM_H8_300H 47 /* Hitachi H8/300H */ +#define EM_H8S 48 /* Hitachi H8S */ + +/* + * This is an interim value that we will use until the committee comes + * up with a final number. + */ +#define EM_ALPHA 0x9026 + +/* Bogus old v850 magic number, used by old tools. */ +#define EM_CYGNUS_V850 0x9080 + +/* + * This is the old interim value for S/390 architecture + */ +#define EM_S390_OLD 0xA390 + +#define EM_MICROBLAZE 189 +#define EM_MICROBLAZE_OLD 0xBAAB + +/* This is the info that is needed to parse the dynamic section of the file */ +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_SONAME 14 +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_DEBUG 21 +#define DT_TEXTREL 22 +#define DT_JMPREL 23 +#define DT_LOPROC 0x70000000 +#define DT_HIPROC 0x7fffffff +#define DT_MIPS_RLD_VERSION 0x70000001 +#define DT_MIPS_TIME_STAMP 0x70000002 +#define DT_MIPS_ICHECKSUM 0x70000003 +#define DT_MIPS_IVERSION 0x70000004 +#define DT_MIPS_FLAGS 0x70000005 + #define RHF_NONE 0 + #define RHF_HARDWAY 1 + #define RHF_NOTPOT 2 +#define DT_MIPS_BASE_ADDRESS 0x70000006 +#define DT_MIPS_CONFLICT 0x70000008 +#define DT_MIPS_LIBLIST 0x70000009 +#define DT_MIPS_LOCAL_GOTNO 0x7000000a +#define DT_MIPS_CONFLICTNO 0x7000000b +#define DT_MIPS_LIBLISTNO 0x70000010 +#define DT_MIPS_SYMTABNO 0x70000011 +#define DT_MIPS_UNREFEXTNO 0x70000012 +#define DT_MIPS_GOTSYM 0x70000013 +#define DT_MIPS_HIPAGENO 0x70000014 +#define DT_MIPS_RLD_MAP 0x70000016 + +/* This info is needed when parsing the symbol table */ +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +/* Symbolic values for the entries in the auxiliary table + put on the initial stack */ +#define AT_NULL 0 /* end of vector */ +#define AT_IGNORE 1 /* entry should be ignored */ +#define AT_EXECFD 2 /* file descriptor of program */ +#define AT_PHDR 3 /* program headers for program */ +#define AT_PHENT 4 /* size of program header entry */ +#define AT_PHNUM 5 /* number of program headers */ +#define AT_PAGESZ 6 /* system page size */ +#define AT_BASE 7 /* base address of interpreter */ +#define AT_FLAGS 8 /* flags */ +#define AT_ENTRY 9 /* entry point of program */ +#define AT_NOTELF 10 /* program is not ELF */ +#define AT_UID 11 /* real uid */ +#define AT_EUID 12 /* effective uid */ +#define AT_GID 13 /* real gid */ +#define AT_EGID 14 /* effective gid */ +#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ +#define AT_CLKTCK 17 /* frequency at which times() increments */ + +typedef struct dynamic{ + Elf32_Sword d_tag; + union{ + Elf32_Sword d_val; + Elf32_Addr d_ptr; + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Sxword d_tag; /* entry tag value */ + union { + Elf64_Xword d_val; + Elf64_Addr d_ptr; + } d_un; +} Elf64_Dyn; + +/* The following are used with relocations */ +#define ELF32_R_SYM(x) ((x) >> 8) +#define ELF32_R_TYPE(x) ((x) & 0xff) + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((i) & 0xffffffff) +#define ELF64_R_TYPE_DATA(i) (((ELF64_R_TYPE(i) >> 8) ^ 0x00800000) - 0x00800000) + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 +/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */ +#define R_386_PC8 23 + +#define R_MIPS_NONE 0 +#define R_MIPS_16 1 +#define R_MIPS_32 2 +#define R_MIPS_REL32 3 +#define R_MIPS_26 4 +#define R_MIPS_HI16 5 +#define R_MIPS_LO16 6 +#define R_MIPS_GPREL16 7 +#define R_MIPS_LITERAL 8 +#define R_MIPS_GOT16 9 +#define R_MIPS_PC16 10 +#define R_MIPS_CALL16 11 +#define R_MIPS_GPREL32 12 +/* The remaining relocs are defined on Irix, although they are not + in the MIPS ELF ABI. */ +#define R_MIPS_UNUSED1 13 +#define R_MIPS_UNUSED2 14 +#define R_MIPS_UNUSED3 15 +#define R_MIPS_SHIFT5 16 +#define R_MIPS_SHIFT6 17 +#define R_MIPS_64 18 +#define R_MIPS_GOT_DISP 19 +#define R_MIPS_GOT_PAGE 20 +#define R_MIPS_GOT_OFST 21 +/* + * The following two relocation types are specified in the MIPS ABI + * conformance guide version 1.2 but not yet in the psABI. + */ +#define R_MIPS_GOTHI16 22 +#define R_MIPS_GOTLO16 23 +#define R_MIPS_SUB 24 +#define R_MIPS_INSERT_A 25 +#define R_MIPS_INSERT_B 26 +#define R_MIPS_DELETE 27 +#define R_MIPS_HIGHER 28 +#define R_MIPS_HIGHEST 29 +/* + * The following two relocation types are specified in the MIPS ABI + * conformance guide version 1.2 but not yet in the psABI. + */ +#define R_MIPS_CALLHI16 30 +#define R_MIPS_CALLLO16 31 +/* + * This range is reserved for vendor specific relocations. + */ +#define R_MIPS_LOVENDOR 100 +#define R_MIPS_HIVENDOR 127 + + +/* + * Sparc ELF relocation types + */ +#define R_SPARC_NONE 0 +#define R_SPARC_8 1 +#define R_SPARC_16 2 +#define R_SPARC_32 3 +#define R_SPARC_DISP8 4 +#define R_SPARC_DISP16 5 +#define R_SPARC_DISP32 6 +#define R_SPARC_WDISP30 7 +#define R_SPARC_WDISP22 8 +#define R_SPARC_HI22 9 +#define R_SPARC_22 10 +#define R_SPARC_13 11 +#define R_SPARC_LO10 12 +#define R_SPARC_GOT10 13 +#define R_SPARC_GOT13 14 +#define R_SPARC_GOT22 15 +#define R_SPARC_PC10 16 +#define R_SPARC_PC22 17 +#define R_SPARC_WPLT30 18 +#define R_SPARC_COPY 19 +#define R_SPARC_GLOB_DAT 20 +#define R_SPARC_JMP_SLOT 21 +#define R_SPARC_RELATIVE 22 +#define R_SPARC_UA32 23 +#define R_SPARC_PLT32 24 +#define R_SPARC_HIPLT22 25 +#define R_SPARC_LOPLT10 26 +#define R_SPARC_PCPLT32 27 +#define R_SPARC_PCPLT22 28 +#define R_SPARC_PCPLT10 29 +#define R_SPARC_10 30 +#define R_SPARC_11 31 +#define R_SPARC_64 32 +#define R_SPARC_OLO10 33 +#define R_SPARC_HH22 34 +#define R_SPARC_HM10 35 +#define R_SPARC_LM22 36 +#define R_SPARC_WDISP16 40 +#define R_SPARC_WDISP19 41 +#define R_SPARC_7 43 +#define R_SPARC_5 44 +#define R_SPARC_6 45 + +/* Bits present in AT_HWCAP, primarily for Sparc32. */ + +#define HWCAP_SPARC_FLUSH 1 /* CPU supports flush instruction. */ +#define HWCAP_SPARC_STBAR 2 +#define HWCAP_SPARC_SWAP 4 +#define HWCAP_SPARC_MULDIV 8 +#define HWCAP_SPARC_V9 16 +#define HWCAP_SPARC_ULTRA3 32 + +/* + * 68k ELF relocation types + */ +#define R_68K_NONE 0 +#define R_68K_32 1 +#define R_68K_16 2 +#define R_68K_8 3 +#define R_68K_PC32 4 +#define R_68K_PC16 5 +#define R_68K_PC8 6 +#define R_68K_GOT32 7 +#define R_68K_GOT16 8 +#define R_68K_GOT8 9 +#define R_68K_GOT32O 10 +#define R_68K_GOT16O 11 +#define R_68K_GOT8O 12 +#define R_68K_PLT32 13 +#define R_68K_PLT16 14 +#define R_68K_PLT8 15 +#define R_68K_PLT32O 16 +#define R_68K_PLT16O 17 +#define R_68K_PLT8O 18 +#define R_68K_COPY 19 +#define R_68K_GLOB_DAT 20 +#define R_68K_JMP_SLOT 21 +#define R_68K_RELATIVE 22 + +/* + * Alpha ELF relocation types + */ +#define R_ALPHA_NONE 0 /* No reloc */ +#define R_ALPHA_REFLONG 1 /* Direct 32 bit */ +#define R_ALPHA_REFQUAD 2 /* Direct 64 bit */ +#define R_ALPHA_GPREL32 3 /* GP relative 32 bit */ +#define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */ +#define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */ +#define R_ALPHA_GPDISP 6 /* Add displacement to GP */ +#define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */ +#define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */ +#define R_ALPHA_SREL16 9 /* PC relative 16 bit */ +#define R_ALPHA_SREL32 10 /* PC relative 32 bit */ +#define R_ALPHA_SREL64 11 /* PC relative 64 bit */ +#define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */ +#define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */ +#define R_ALPHA_GPREL16 19 /* GP relative 16 bit */ +#define R_ALPHA_COPY 24 /* Copy symbol at runtime */ +#define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */ +#define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */ +#define R_ALPHA_RELATIVE 27 /* Adjust by program base */ +#define R_ALPHA_BRSGP 28 +#define R_ALPHA_TLSGD 29 +#define R_ALPHA_TLS_LDM 30 +#define R_ALPHA_DTPMOD64 31 +#define R_ALPHA_GOTDTPREL 32 +#define R_ALPHA_DTPREL64 33 +#define R_ALPHA_DTPRELHI 34 +#define R_ALPHA_DTPRELLO 35 +#define R_ALPHA_DTPREL16 36 +#define R_ALPHA_GOTTPREL 37 +#define R_ALPHA_TPREL64 38 +#define R_ALPHA_TPRELHI 39 +#define R_ALPHA_TPRELLO 40 +#define R_ALPHA_TPREL16 41 + +#define SHF_ALPHA_GPREL 0x10000000 + + +/* PowerPC relocations defined by the ABIs */ +#define R_PPC_NONE 0 +#define R_PPC_ADDR32 1 /* 32bit absolute address */ +#define R_PPC_ADDR24 2 /* 26bit address, 2 bits ignored. */ +#define R_PPC_ADDR16 3 /* 16bit absolute address */ +#define R_PPC_ADDR16_LO 4 /* lower 16bit of absolute address */ +#define R_PPC_ADDR16_HI 5 /* high 16bit of absolute address */ +#define R_PPC_ADDR16_HA 6 /* adjusted high 16bit */ +#define R_PPC_ADDR14 7 /* 16bit address, 2 bits ignored */ +#define R_PPC_ADDR14_BRTAKEN 8 +#define R_PPC_ADDR14_BRNTAKEN 9 +#define R_PPC_REL24 10 /* PC relative 26 bit */ +#define R_PPC_REL14 11 /* PC relative 16 bit */ +#define R_PPC_REL14_BRTAKEN 12 +#define R_PPC_REL14_BRNTAKEN 13 +#define R_PPC_GOT16 14 +#define R_PPC_GOT16_LO 15 +#define R_PPC_GOT16_HI 16 +#define R_PPC_GOT16_HA 17 +#define R_PPC_PLTREL24 18 +#define R_PPC_COPY 19 +#define R_PPC_GLOB_DAT 20 +#define R_PPC_JMP_SLOT 21 +#define R_PPC_RELATIVE 22 +#define R_PPC_LOCAL24PC 23 +#define R_PPC_UADDR32 24 +#define R_PPC_UADDR16 25 +#define R_PPC_REL32 26 +#define R_PPC_PLT32 27 +#define R_PPC_PLTREL32 28 +#define R_PPC_PLT16_LO 29 +#define R_PPC_PLT16_HI 30 +#define R_PPC_PLT16_HA 31 +#define R_PPC_SDAREL16 32 +#define R_PPC_SECTOFF 33 +#define R_PPC_SECTOFF_LO 34 +#define R_PPC_SECTOFF_HI 35 +#define R_PPC_SECTOFF_HA 36 +/* Keep this the last entry. */ +#ifndef R_PPC_NUM +#define R_PPC_NUM 37 +#endif + +/* ARM specific declarations */ + +/* Processor specific flags for the ELF header e_flags field. */ +#define EF_ARM_RELEXEC 0x01 +#define EF_ARM_HASENTRY 0x02 +#define EF_ARM_INTERWORK 0x04 +#define EF_ARM_APCS_26 0x08 +#define EF_ARM_APCS_FLOAT 0x10 +#define EF_ARM_PIC 0x20 +#define EF_ALIGN8 0x40 /* 8-bit structure alignment is in use */ +#define EF_NEW_ABI 0x80 +#define EF_OLD_ABI 0x100 + +/* Additional symbol types for Thumb */ +#define STT_ARM_TFUNC 0xd + +/* ARM-specific values for sh_flags */ +#define SHF_ARM_ENTRYSECT 0x10000000 /* Section contains an entry point */ +#define SHF_ARM_COMDEF 0x80000000 /* Section may be multiply defined + in the input to a link step */ + +/* ARM-specific program header flags */ +#define PF_ARM_SB 0x10000000 /* Segment contains the location + addressed by the static base */ + +/* ARM relocs. */ +#define R_ARM_NONE 0 /* No reloc */ +#define R_ARM_PC24 1 /* PC relative 26 bit branch */ +#define R_ARM_ABS32 2 /* Direct 32 bit */ +#define R_ARM_REL32 3 /* PC relative 32 bit */ +#define R_ARM_PC13 4 +#define R_ARM_ABS16 5 /* Direct 16 bit */ +#define R_ARM_ABS12 6 /* Direct 12 bit */ +#define R_ARM_THM_ABS5 7 +#define R_ARM_ABS8 8 /* Direct 8 bit */ +#define R_ARM_SBREL32 9 +#define R_ARM_THM_PC22 10 +#define R_ARM_THM_PC8 11 +#define R_ARM_AMP_VCALL9 12 +#define R_ARM_SWI24 13 +#define R_ARM_THM_SWI8 14 +#define R_ARM_XPC25 15 +#define R_ARM_THM_XPC22 16 +#define R_ARM_COPY 20 /* Copy symbol at runtime */ +#define R_ARM_GLOB_DAT 21 /* Create GOT entry */ +#define R_ARM_JUMP_SLOT 22 /* Create PLT entry */ +#define R_ARM_RELATIVE 23 /* Adjust by program base */ +#define R_ARM_GOTOFF 24 /* 32 bit offset to GOT */ +#define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */ +#define R_ARM_GOT32 26 /* 32 bit GOT entry */ +#define R_ARM_PLT32 27 /* 32 bit PLT address */ +#define R_ARM_CALL 28 +#define R_ARM_JUMP24 29 +#define R_ARM_GNU_VTENTRY 100 +#define R_ARM_GNU_VTINHERIT 101 +#define R_ARM_THM_PC11 102 /* thumb unconditional branch */ +#define R_ARM_THM_PC9 103 /* thumb conditional branch */ +#define R_ARM_RXPC25 249 +#define R_ARM_RSBREL32 250 +#define R_ARM_THM_RPC22 251 +#define R_ARM_RREL32 252 +#define R_ARM_RABS22 253 +#define R_ARM_RPC24 254 +#define R_ARM_RBASE 255 +/* Keep this the last entry. */ +#define R_ARM_NUM 256 + +/* s390 relocations defined by the ABIs */ +#define R_390_NONE 0 /* No reloc. */ +#define R_390_8 1 /* Direct 8 bit. */ +#define R_390_12 2 /* Direct 12 bit. */ +#define R_390_16 3 /* Direct 16 bit. */ +#define R_390_32 4 /* Direct 32 bit. */ +#define R_390_PC32 5 /* PC relative 32 bit. */ +#define R_390_GOT12 6 /* 12 bit GOT offset. */ +#define R_390_GOT32 7 /* 32 bit GOT offset. */ +#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */ +#define R_390_COPY 9 /* Copy symbol at runtime. */ +#define R_390_GLOB_DAT 10 /* Create GOT entry. */ +#define R_390_JMP_SLOT 11 /* Create PLT entry. */ +#define R_390_RELATIVE 12 /* Adjust by program base. */ +#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */ +#define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */ +#define R_390_GOT16 15 /* 16 bit GOT offset. */ +#define R_390_PC16 16 /* PC relative 16 bit. */ +#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */ +#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */ +#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */ +#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */ +#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */ +#define R_390_64 22 /* Direct 64 bit. */ +#define R_390_PC64 23 /* PC relative 64 bit. */ +#define R_390_GOT64 24 /* 64 bit GOT offset. */ +#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */ +#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */ +#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */ +#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */ +#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */ +#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */ +#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */ +#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */ +#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */ +#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */ +#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */ +#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */ +#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */ +#define R_390_TLS_GDCALL 38 /* Tag for function call in general + dynamic TLS code. */ +#define R_390_TLS_LDCALL 39 /* Tag for function call in local + dynamic TLS code. */ +#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic + thread local data. */ +#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic + thread local data. */ +#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS + block. */ +#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS + block. */ +#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */ +#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ +#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS + block. */ +/* Keep this the last entry. */ +#define R_390_NUM 57 + +/* x86-64 relocation types */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative + offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ + +#define R_X86_64_NUM 16 + +/* Legal values for e_flags field of Elf64_Ehdr. */ + +#define EF_ALPHA_32BIT 1 /* All addresses are below 2GB */ + +/* HPPA specific definitions. */ + +/* Legal values for e_flags field of Elf32_Ehdr. */ + +#define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */ +#define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */ +#define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */ +#define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */ +#define EF_PARISC_NO_KABP 0x00100000 /* No kernel assisted branch + prediction. */ +#define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */ +#define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */ + +/* Defined values for `e_flags & EF_PARISC_ARCH' are: */ + +#define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */ +#define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */ +#define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */ + +/* Additional section indeces. */ + +#define SHN_PARISC_ANSI_COMMON 0xff00 /* Section for tenatively declared + symbols in ANSI C. */ +#define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */ + +/* Legal values for sh_type field of Elf32_Shdr. */ + +#define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */ +#define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */ +#define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */ + +/* Legal values for sh_flags field of Elf32_Shdr. */ + +#define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */ +#define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */ +#define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */ + +/* Legal values for ST_TYPE subfield of st_info (symbol type). */ + +#define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */ + +#define STT_HP_OPAQUE (STT_LOOS + 0x1) +#define STT_HP_STUB (STT_LOOS + 0x2) + +/* HPPA relocs. */ + +#define R_PARISC_NONE 0 /* No reloc. */ +#define R_PARISC_DIR32 1 /* Direct 32-bit reference. */ +#define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */ +#define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */ +#define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */ +#define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */ +#define R_PARISC_PCREL32 9 /* 32-bit rel. address. */ +#define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */ +#define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */ +#define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */ +#define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */ +#define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */ +#define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */ +#define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */ +#define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */ +#define R_PARISC_LTOFF21L 34 /* LT-relative, left 21 bits. */ +#define R_PARISC_LTOFF14R 38 /* LT-relative, right 14 bits. */ +#define R_PARISC_SECREL32 41 /* 32 bits section rel. address. */ +#define R_PARISC_SEGBASE 48 /* No relocation, set segment base. */ +#define R_PARISC_SEGREL32 49 /* 32 bits segment rel. address. */ +#define R_PARISC_PLTOFF21L 50 /* PLT rel. address, left 21 bits. */ +#define R_PARISC_PLTOFF14R 54 /* PLT rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_FPTR32 57 /* 32 bits LT-rel. function pointer. */ +#define R_PARISC_LTOFF_FPTR21L 58 /* LT-rel. fct ptr, left 21 bits. */ +#define R_PARISC_LTOFF_FPTR14R 62 /* LT-rel. fct ptr, right 14 bits. */ +#define R_PARISC_FPTR64 64 /* 64 bits function address. */ +#define R_PARISC_PLABEL32 65 /* 32 bits function address. */ +#define R_PARISC_PCREL64 72 /* 64 bits PC-rel. address. */ +#define R_PARISC_PCREL22F 74 /* 22 bits PC-rel. address. */ +#define R_PARISC_PCREL14WR 75 /* PC-rel. address, right 14 bits. */ +#define R_PARISC_PCREL14DR 76 /* PC rel. address, right 14 bits. */ +#define R_PARISC_PCREL16F 77 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16WF 78 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16DF 79 /* 16 bits PC-rel. address. */ +#define R_PARISC_DIR64 80 /* 64 bits of eff. address. */ +#define R_PARISC_DIR14WR 83 /* 14 bits of eff. address. */ +#define R_PARISC_DIR14DR 84 /* 14 bits of eff. address. */ +#define R_PARISC_DIR16F 85 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16WF 86 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16DF 87 /* 16 bits of eff. address. */ +#define R_PARISC_GPREL64 88 /* 64 bits of GP-rel. address. */ +#define R_PARISC_GPREL14WR 91 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL14DR 92 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL16F 93 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16WF 94 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16DF 95 /* 16 bits GP-rel. address. */ +#define R_PARISC_LTOFF64 96 /* 64 bits LT-rel. address. */ +#define R_PARISC_LTOFF14WR 99 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF14DR 100 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF16F 101 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16WF 102 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16DF 103 /* 16 bits LT-rel. address. */ +#define R_PARISC_SECREL64 104 /* 64 bits section rel. address. */ +#define R_PARISC_SEGREL64 112 /* 64 bits segment rel. address. */ +#define R_PARISC_PLTOFF14WR 115 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF14DR 116 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF16F 117 /* 16 bits LT-rel. address. */ +#define R_PARISC_PLTOFF16WF 118 /* 16 bits PLT-rel. address. */ +#define R_PARISC_PLTOFF16DF 119 /* 16 bits PLT-rel. address. */ +#define R_PARISC_LTOFF_FPTR64 120 /* 64 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR16F 125 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LORESERVE 128 +#define R_PARISC_COPY 128 /* Copy relocation. */ +#define R_PARISC_IPLT 129 /* Dynamic reloc, imported PLT */ +#define R_PARISC_EPLT 130 /* Dynamic reloc, exported PLT */ +#define R_PARISC_TPREL32 153 /* 32 bits TP-rel. address. */ +#define R_PARISC_TPREL21L 154 /* TP-rel. address, left 21 bits. */ +#define R_PARISC_TPREL14R 158 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_TP21L 162 /* LT-TP-rel. address, left 21 bits. */ +#define R_PARISC_LTOFF_TP14R 166 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14F 167 /* 14 bits LT-TP-rel. address. */ +#define R_PARISC_TPREL64 216 /* 64 bits TP-rel. address. */ +#define R_PARISC_TPREL14WR 219 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL14DR 220 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL16F 221 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16WF 222 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16DF 223 /* 16 bits TP-rel. address. */ +#define R_PARISC_LTOFF_TP64 224 /* 64 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP14WR 227 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14DR 228 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP16F 229 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16WF 230 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16DF 231 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_HIRESERVE 255 + +/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */ + +#define PT_HP_TLS (PT_LOOS + 0x0) +#define PT_HP_CORE_NONE (PT_LOOS + 0x1) +#define PT_HP_CORE_VERSION (PT_LOOS + 0x2) +#define PT_HP_CORE_KERNEL (PT_LOOS + 0x3) +#define PT_HP_CORE_COMM (PT_LOOS + 0x4) +#define PT_HP_CORE_PROC (PT_LOOS + 0x5) +#define PT_HP_CORE_LOADABLE (PT_LOOS + 0x6) +#define PT_HP_CORE_STACK (PT_LOOS + 0x7) +#define PT_HP_CORE_SHM (PT_LOOS + 0x8) +#define PT_HP_CORE_MMF (PT_LOOS + 0x9) +#define PT_HP_PARALLEL (PT_LOOS + 0x10) +#define PT_HP_FASTBIND (PT_LOOS + 0x11) +#define PT_HP_OPT_ANNOT (PT_LOOS + 0x12) +#define PT_HP_HSL_ANNOT (PT_LOOS + 0x13) +#define PT_HP_STACK (PT_LOOS + 0x14) + +#define PT_PARISC_ARCHEXT 0x70000000 +#define PT_PARISC_UNWIND 0x70000001 + +/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr. */ + +#define PF_PARISC_SBP 0x08000000 + +#define PF_HP_PAGE_SIZE 0x00100000 +#define PF_HP_FAR_SHARED 0x00200000 +#define PF_HP_NEAR_SHARED 0x00400000 +#define PF_HP_CODE 0x01000000 +#define PF_HP_MODIFY 0x02000000 +#define PF_HP_LAZYSWAP 0x04000000 +#define PF_HP_SBP 0x08000000 + +/* IA-64 specific declarations. */ + +/* Processor specific flags for the Ehdr e_flags field. */ +#define EF_IA_64_MASKOS 0x0000000f /* os-specific flags */ +#define EF_IA_64_ABI64 0x00000010 /* 64-bit ABI */ +#define EF_IA_64_ARCH 0xff000000 /* arch. version mask */ + +/* Processor specific values for the Phdr p_type field. */ +#define PT_IA_64_ARCHEXT (PT_LOPROC + 0) /* arch extension bits */ +#define PT_IA_64_UNWIND (PT_LOPROC + 1) /* ia64 unwind bits */ + +/* Processor specific flags for the Phdr p_flags field. */ +#define PF_IA_64_NORECOV 0x80000000 /* spec insns w/o recovery */ + +/* Processor specific values for the Shdr sh_type field. */ +#define SHT_IA_64_EXT (SHT_LOPROC + 0) /* extension bits */ +#define SHT_IA_64_UNWIND (SHT_LOPROC + 1) /* unwind bits */ + +/* Processor specific flags for the Shdr sh_flags field. */ +#define SHF_IA_64_SHORT 0x10000000 /* section near gp */ +#define SHF_IA_64_NORECOV 0x20000000 /* spec insns w/o recovery */ + +/* Processor specific values for the Dyn d_tag field. */ +#define DT_IA_64_PLT_RESERVE (DT_LOPROC + 0) +#define DT_IA_64_NUM 1 + +/* IA-64 relocations. */ +#define R_IA64_NONE 0x00 /* none */ +#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */ +#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */ +#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */ +#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */ +#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */ +#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */ +#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */ +#define R_IA64_GPREL22 0x2a /* @gprel(sym + add), add imm22 */ +#define R_IA64_GPREL64I 0x2b /* @gprel(sym + add), mov imm64 */ +#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym + add), data4 MSB */ +#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym + add), data4 LSB */ +#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym + add), data8 MSB */ +#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF22 0x32 /* @ltoff(sym + add), add imm22 */ +#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym + add), mov imm64 */ +#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym + add), add imm22 */ +#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym + add), mov imm64 */ +#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym + add), data8 MSB */ +#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym + add), data8 LSB */ +#define R_IA64_FPTR64I 0x43 /* @fptr(sym + add), mov imm64 */ +#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym + add), data4 MSB */ +#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym + add), data4 LSB */ +#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym + add), data8 MSB */ +#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym + add), data8 LSB */ +#define R_IA64_PCREL60B 0x48 /* @pcrel(sym + add), brl */ +#define R_IA64_PCREL21B 0x49 /* @pcrel(sym + add), ptb, call */ +#define R_IA64_PCREL21M 0x4a /* @pcrel(sym + add), chk.s */ +#define R_IA64_PCREL21F 0x4b /* @pcrel(sym + add), fchkf */ +#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym + add), data4 MSB */ +#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym + add), data4 LSB */ +#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym + add), data8 MSB */ +#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */ +#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */ +#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), data4 MSB */ +#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), data4 LSB */ +#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), data8 MSB */ +#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), data8 LSB */ +#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym + add), data4 MSB */ +#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym + add), data4 LSB */ +#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym + add), data8 MSB */ +#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym + add), data8 LSB */ +#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym + add), data4 MSB */ +#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym + add), data4 LSB */ +#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym + add), data8 MSB */ +#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym + add), data8 LSB */ +#define R_IA64_REL32MSB 0x6c /* data 4 + REL */ +#define R_IA64_REL32LSB 0x6d /* data 4 + REL */ +#define R_IA64_REL64MSB 0x6e /* data 8 + REL */ +#define R_IA64_REL64LSB 0x6f /* data 8 + REL */ +#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */ +#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */ +#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */ +#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */ +#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym + add), 21bit inst */ +#define R_IA64_PCREL22 0x7a /* @pcrel(sym + add), 22bit inst */ +#define R_IA64_PCREL64I 0x7b /* @pcrel(sym + add), 64bit inst */ +#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */ +#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */ +#define R_IA64_COPY 0x84 /* copy relocation */ +#define R_IA64_SUB 0x85 /* Addend and symbol difference */ +#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */ +#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */ +#define R_IA64_TPREL14 0x91 /* @tprel(sym + add), imm14 */ +#define R_IA64_TPREL22 0x92 /* @tprel(sym + add), imm22 */ +#define R_IA64_TPREL64I 0x93 /* @tprel(sym + add), imm64 */ +#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym + add), data8 MSB */ +#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), imm2 */ +#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym + add), data8 MSB */ +#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym + add), data8 LSB */ +#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(sym + add)), imm22 */ +#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym + add), imm14 */ +#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym + add), imm22 */ +#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym + add), imm64 */ +#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym + add), data4 MSB */ +#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym + add), data4 LSB */ +#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym + add), data8 MSB */ +#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */ +#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ + +typedef struct elf32_rel { + Elf32_Addr r_offset; + Elf32_Word r_info; +} Elf32_Rel; + +typedef struct elf64_rel { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ +} Elf64_Rel; + +typedef struct elf32_rela{ + Elf32_Addr r_offset; + Elf32_Word r_info; + Elf32_Sword r_addend; +} Elf32_Rela; + +typedef struct elf64_rela { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ + Elf64_Sxword r_addend; /* Constant addend used to compute value */ +} Elf64_Rela; + +typedef struct elf32_sym{ + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Half st_shndx; +} Elf32_Sym; + +typedef struct elf64_sym { + Elf64_Word st_name; /* Symbol name, index in string tbl */ + unsigned char st_info; /* Type and binding attributes */ + unsigned char st_other; /* No defined meaning, 0 */ + Elf64_Half st_shndx; /* Associated section index */ + Elf64_Addr st_value; /* Value of the symbol */ + Elf64_Xword st_size; /* Associated symbol size */ +} Elf64_Sym; + + +#define EI_NIDENT 16 + +typedef struct elf32_hdr{ + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[16]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr{ + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +/* sh_type */ +#define SHT_NULL 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_HASH 5 +#define SHT_DYNAMIC 6 +#define SHT_NOTE 7 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_SHLIB 10 +#define SHT_DYNSYM 11 +#define SHT_NUM 12 +#define SHT_LOPROC 0x70000000 +#define SHT_HIPROC 0x7fffffff +#define SHT_LOUSER 0x80000000 +#define SHT_HIUSER 0xffffffff +#define SHT_MIPS_LIST 0x70000000 +#define SHT_MIPS_CONFLICT 0x70000002 +#define SHT_MIPS_GPTAB 0x70000003 +#define SHT_MIPS_UCODE 0x70000004 + +/* sh_flags */ +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 +#define SHF_MASKPROC 0xf0000000 +#define SHF_MIPS_GPREL 0x10000000 + +/* special section indexes */ +#define SHN_UNDEF 0 +#define SHN_LORESERVE 0xff00 +#define SHN_LOPROC 0xff00 +#define SHN_HIPROC 0xff1f +#define SHN_ABS 0xfff1 +#define SHN_COMMON 0xfff2 +#define SHN_HIRESERVE 0xffff +#define SHN_MIPS_ACCOMON 0xff00 + +typedef struct elf32_shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; + +typedef struct elf64_shdr { + Elf64_Word sh_name; /* Section name, index in string tbl */ + Elf64_Word sh_type; /* Type of section */ + Elf64_Xword sh_flags; /* Miscellaneous section attributes */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Size of section in bytes */ + Elf64_Word sh_link; /* Index of another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFOSABI_NONE 0 /* UNIX System V ABI */ +#define ELFOSABI_SYSV 0 /* Alias. */ +#define ELFOSABI_HPUX 1 /* HP-UX */ +#define ELFOSABI_NETBSD 2 /* NetBSD. */ +#define ELFOSABI_LINUX 3 /* Linux. */ +#define ELFOSABI_SOLARIS 6 /* Sun Solaris. */ +#define ELFOSABI_AIX 7 /* IBM AIX. */ +#define ELFOSABI_IRIX 8 /* SGI Irix. */ +#define ELFOSABI_FREEBSD 9 /* FreeBSD. */ +#define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */ +#define ELFOSABI_MODESTO 11 /* Novell Modesto. */ +#define ELFOSABI_OPENBSD 12 /* OpenBSD. */ +#define ELFOSABI_ARM 97 /* ARM */ +#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +/* Notes used in ET_CORE */ +#define NT_PRSTATUS 1 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_TASKSTRUCT 4 +#define NT_AUXV 6 +#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ + + +/* Note header in a PT_NOTE section */ +typedef struct elf32_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ +} Elf32_Nhdr; + +/* Note header in a PT_NOTE section */ +typedef struct elf64_note { + Elf64_Word n_namesz; /* Name size */ + Elf64_Word n_descsz; /* Content size */ + Elf64_Word n_type; /* Content type */ +} Elf64_Nhdr; + +#ifdef ELF_CLASS +#if ELF_CLASS == ELFCLASS32 + +#define elfhdr elf32_hdr +#define elf_phdr elf32_phdr +#define elf_note elf32_note +#define elf_shdr elf32_shdr +#define elf_sym elf32_sym +#define elf_addr_t Elf32_Off + +#ifdef ELF_USES_RELOCA +# define ELF_RELOC Elf32_Rela +#else +# define ELF_RELOC Elf32_Rel +#endif + +#else + +#define elfhdr elf64_hdr +#define elf_phdr elf64_phdr +#define elf_note elf64_note +#define elf_shdr elf64_shdr +#define elf_sym elf64_sym +#define elf_addr_t Elf64_Off + +#ifdef ELF_USES_RELOCA +# define ELF_RELOC Elf64_Rela +#else +# define ELF_RELOC Elf64_Rel +#endif + +#endif /* ELF_CLASS */ + +#ifndef ElfW +# if ELF_CLASS == ELFCLASS32 +# define ElfW(x) Elf32_ ## x +# define ELFW(x) ELF32_ ## x +# else +# define ElfW(x) Elf64_ ## x +# define ELFW(x) ELF64_ ## x +# endif +#endif + +#endif /* ELF_CLASS */ + + +#endif /* _QEMU_ELF_H */ diff --git a/src/recompiler/exec-all.h b/src/recompiler/exec-all.h new file mode 100644 index 00000000..52145e3e --- /dev/null +++ b/src/recompiler/exec-all.h @@ -0,0 +1,400 @@ +/* + * internal execution defines for qemu + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#ifndef _EXEC_ALL_H_ +#define _EXEC_ALL_H_ + +#include "qemu-common.h" +#ifdef VBOX +# include <VBox/vmm/tm.h> +# include <VBox/vmm/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */ +# include <VBox/vmm/em.h> /* EMRemIsLockOwner */ +# ifndef LOG_GROUP +# define LOG_GROUP LOG_GROUP_REM +# endif +# include <VBox/log.h> +# include "REMInternal.h" +# include <VBox/vmm/vm.h> +#endif /* VBOX */ + +/* allow to see translation results - the slowdown should be negligible, so we leave it */ +#ifndef VBOX +#define DEBUG_DISAS +#endif /* !VBOX */ + +/* Page tracking code uses ram addresses in system mode, and virtual + addresses in userspace mode. Define tb_page_addr_t to be an appropriate + type. */ +#if defined(CONFIG_USER_ONLY) +typedef abi_ulong tb_page_addr_t; +#else +typedef ram_addr_t tb_page_addr_t; +#endif + +/* is_jmp field values */ +#define DISAS_NEXT 0 /* next instruction can be analyzed */ +#define DISAS_JUMP 1 /* only pc was modified dynamically */ +#define DISAS_UPDATE 2 /* cpu state was modified dynamically */ +#define DISAS_TB_JUMP 3 /* only pc was modified statically */ + +typedef struct TranslationBlock TranslationBlock; + +/* XXX: make safe guess about sizes */ +#define MAX_OP_PER_INSTR 266 + +#if HOST_LONG_BITS == 32 +#define MAX_OPC_PARAM_PER_ARG 2 +#else +#define MAX_OPC_PARAM_PER_ARG 1 +#endif +#define MAX_OPC_PARAM_IARGS 4 +#define MAX_OPC_PARAM_OARGS 1 +#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS) + +/* A Call op needs up to 4 + 2N parameters on 32-bit archs, + * and up to 4 + N parameters on 64-bit archs + * (N = number of input arguments + output arguments). */ +#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) +#define OPC_BUF_SIZE 640 +#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR) + +/* Maximum size a TCG op can expand to. This is complicated because a + single op may require several host instructions and register reloads. + For now take a wild guess at 192 bytes, which should allow at least + a couple of fixup instructions per argument. */ +#define TCG_MAX_OP_SIZE 192 + +#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM) + +extern target_ulong gen_opc_pc[OPC_BUF_SIZE]; +extern uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; +extern uint16_t gen_opc_icount[OPC_BUF_SIZE]; + +#include "qemu-log.h" + +void gen_intermediate_code(CPUState *env, struct TranslationBlock *tb); +void gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb); +void gen_pc_load(CPUState *env, struct TranslationBlock *tb, + uintptr_t searched_pc, int pc_pos, void *puc); + +void cpu_gen_init(void); +int cpu_gen_code(CPUState *env, struct TranslationBlock *tb, + int *gen_code_size_ptr); +int cpu_restore_state(struct TranslationBlock *tb, + CPUState *env, uintptr_t searched_pc, + void *puc); +void cpu_resume_from_signal(CPUState *env1, void *puc); +void cpu_io_recompile(CPUState *env, void *retaddr); +TranslationBlock *tb_gen_code(CPUState *env, + target_ulong pc, target_ulong cs_base, int flags, + int cflags); +void cpu_exec_init(CPUState *env); +void QEMU_NORETURN cpu_loop_exit(void); +int page_unprotect(target_ulong address, uintptr_t pc, void *puc); +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, + int is_cpu_write_access); +void tb_invalidate_page_range(target_ulong start, target_ulong end); +void tlb_flush_page(CPUState *env, target_ulong addr); +void tlb_flush(CPUState *env, int flush_global); +#if !defined(CONFIG_USER_ONLY) +void tlb_set_page(CPUState *env, target_ulong vaddr, + target_phys_addr_t paddr, int prot, + int mmu_idx, target_ulong size); +#endif + +#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ + +#define CODE_GEN_PHYS_HASH_BITS 15 +#define CODE_GEN_PHYS_HASH_SIZE (1 << CODE_GEN_PHYS_HASH_BITS) + +#define MIN_CODE_GEN_BUFFER_SIZE (1024 * 1024) + +/* estimated block size for TB allocation */ +/* XXX: use a per code average code fragment size and modulate it + according to the host CPU */ +#if defined(CONFIG_SOFTMMU) +#define CODE_GEN_AVG_BLOCK_SIZE 128 +#else +#define CODE_GEN_AVG_BLOCK_SIZE 64 +#endif + +#if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || defined(__i386__) +#define USE_DIRECT_JUMP +#endif + +#ifdef VBOX /* bird: not safe in next step because of threading & cpu_interrupt. */ +# undef USE_DIRECT_JUMP +#endif /* VBOX */ + +struct TranslationBlock { + target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */ + target_ulong cs_base; /* CS base for this block */ + uint64_t flags; /* flags defining in which context the code was generated */ + uint16_t size; /* size of target code for this block (1 <= + size <= TARGET_PAGE_SIZE) */ + uint16_t cflags; /* compile flags */ +#define CF_COUNT_MASK 0x7fff +#define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */ +#ifdef VBOX +# define CF_RAW_MODE 0x0010 /* block was generated in raw mode */ +#endif + + uint8_t *tc_ptr; /* pointer to the translated code */ + /* next matching tb for physical address. */ + struct TranslationBlock *phys_hash_next; + /* first and second physical page containing code. The lower bit + of the pointer tells the index in page_next[] */ + struct TranslationBlock *page_next[2]; + tb_page_addr_t page_addr[2]; + + /* the following data are used to directly call another TB from + the code of this one. */ + uint16_t tb_next_offset[2]; /* offset of original jump target */ +#ifdef USE_DIRECT_JUMP + uint16_t tb_jmp_offset[2]; /* offset of jump instruction */ +#else + uintptr_t tb_next[2]; /* address of jump generated code */ +#endif + /* list of TBs jumping to this one. This is a circular list using + the two least significant bits of the pointers to tell what is + the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 = + jmp_first */ + struct TranslationBlock *jmp_next[2]; + struct TranslationBlock *jmp_first; + uint32_t icount; +}; + +static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc) +{ + target_ulong tmp; + tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); + return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK; +} + +static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) +{ + target_ulong tmp; + tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)); + return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK) + | (tmp & TB_JMP_ADDR_MASK)); +} + +static inline unsigned int tb_phys_hash_func(tb_page_addr_t pc) +{ + return pc & (CODE_GEN_PHYS_HASH_SIZE - 1); +} + +TranslationBlock *tb_alloc(target_ulong pc); +void tb_free(TranslationBlock *tb); +void tb_flush(CPUState *env); +void tb_link_page(TranslationBlock *tb, + tb_page_addr_t phys_pc, tb_page_addr_t phys_page2); +void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); + +extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; + +#if defined(USE_DIRECT_JUMP) + +#if defined(_ARCH_PPC) +extern void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr); +#define tb_set_jmp_target1 ppc_tb_set_jmp_target +#elif defined(__i386__) || defined(__x86_64__) +static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + /* patch the branch destination */ + *(uint32_t *)jmp_addr = addr - (jmp_addr + 4); + /* no need to flush icache explicitly */ +} +#elif defined(__arm__) +static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ +#if QEMU_GNUC_PREREQ(4, 1) + void __clear_cache(char *beg, char *end); +#else + register unsigned long _beg __asm ("a1"); + register unsigned long _end __asm ("a2"); + register unsigned long _flg __asm ("a3"); +#endif + + /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */ + *(uint32_t *)jmp_addr = + (*(uint32_t *)jmp_addr & ~0xffffff) + | (((addr - (jmp_addr + 8)) >> 2) & 0xffffff); + +#if QEMU_GNUC_PREREQ(4, 1) + __clear_cache((char *) jmp_addr, (char *) jmp_addr + 4); +#else + /* flush icache */ + _beg = jmp_addr; + _end = jmp_addr + 4; + _flg = 0; + __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); +#endif +} +#endif + +static inline void tb_set_jmp_target(TranslationBlock *tb, + int n, uintptr_t addr) +{ + uintptr_t offset; + + offset = tb->tb_jmp_offset[n]; + tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr); +} + +#else + +/* set the jump target */ +static inline void tb_set_jmp_target(TranslationBlock *tb, + int n, uintptr_t addr) +{ + tb->tb_next[n] = addr; +} + +#endif + +static inline void tb_add_jump(TranslationBlock *tb, int n, + TranslationBlock *tb_next) +{ + /* NOTE: this test is only needed for thread safety */ + if (!tb->jmp_next[n]) { + /* patch the native jump address */ + tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr); + + /* add in TB jmp circular list */ + tb->jmp_next[n] = tb_next->jmp_first; + tb_next->jmp_first = (TranslationBlock *)((intptr_t)(tb) | (n)); + } +} + +TranslationBlock *tb_find_pc(uintptr_t pc_ptr); + +#include "qemu-lock.h" + +extern spinlock_t tb_lock; + +extern int tb_invalidated_flag; + +#if !defined(CONFIG_USER_ONLY) + +extern CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4]; +extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4]; +extern void *io_mem_opaque[IO_MEM_NB_ENTRIES]; + +void tlb_fill(target_ulong addr, int is_write, int mmu_idx, + void *retaddr); + +#include "softmmu_defs.h" + +#define ACCESS_TYPE (NB_MMU_MODES + 1) +#define MEMSUFFIX _code +#define env cpu_single_env + +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" + +#undef ACCESS_TYPE +#undef MEMSUFFIX +#undef env + +#endif + +#if defined(CONFIG_USER_ONLY) +static inline tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr) +{ + return addr; +} +#else +# ifdef VBOX +target_ulong remR3PhysGetPhysicalAddressCode(CPUState *env, target_ulong addr, CPUTLBEntry *pTLBEntry, target_phys_addr_t ioTLBEntry); +# endif +/* NOTE: this function can trigger an exception */ +/* NOTE2: the returned address is not exactly the physical address: it + is the offset relative to phys_ram_base */ +static inline tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr) +{ + int mmu_idx, page_index, pd; +# ifndef VBOX + void *p; +# endif + + page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + mmu_idx = cpu_mmu_index(env1); + if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code != + (addr & TARGET_PAGE_MASK))) { + ldub_code(addr); + } + pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK; + if (pd > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) { +# ifdef VBOX + /* deal with non-MMIO access handlers. */ + return remR3PhysGetPhysicalAddressCode(env1, addr, + &env1->tlb_table[mmu_idx][page_index], + env1->iotlb[mmu_idx][page_index]); +# elif defined(TARGET_SPARC) || defined(TARGET_MIPS) + do_unassigned_access(addr, 0, 1, 0, 4); +#else + cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr); +#endif + } +# if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) + return addr + env1->tlb_table[mmu_idx][page_index].addend; +# elif defined(VBOX) + Assert(env1->phys_addends[mmu_idx][page_index] != -1); + return addr + env1->phys_addends[mmu_idx][page_index]; +# else + p = (void *)(uintptr_t)addr + + env1->tlb_table[mmu_idx][page_index].addend; + return qemu_ram_addr_from_host(p); +# endif +} +#endif + +typedef void (CPUDebugExcpHandler)(CPUState *env); + +CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler); + +#ifndef VBOX +/* vl.c */ +extern int singlestep; + +/* cpu-exec.c */ +extern volatile sig_atomic_t exit_request; +#endif /*!VBOX*/ + + +#endif diff --git a/src/recompiler/exec.c b/src/recompiler/exec.c new file mode 100644 index 00000000..ca632372 --- /dev/null +++ b/src/recompiler/exec.c @@ -0,0 +1,4586 @@ +/* + * virtual page mapping and translated block handling + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include "config.h" +#ifndef VBOX +#ifdef _WIN32 +#include <windows.h> +#else +#include <sys/types.h> +#include <sys/mman.h> +#endif +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <inttypes.h> +#else /* VBOX */ +# include <stdlib.h> +# include <stdio.h> +# include <iprt/alloc.h> +# include <iprt/string.h> +# include <iprt/param.h> +# include <VBox/vmm/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */ +# include <iprt/errcore.h> +#endif /* VBOX */ + +#include "cpu.h" +#include "exec-all.h" +#include "qemu-common.h" +#include "tcg.h" +#ifndef VBOX +#include "hw/hw.h" +#include "hw/qdev.h" +#endif /* !VBOX */ +#include "osdep.h" +#include "kvm.h" +#include "qemu-timer.h" +#if defined(CONFIG_USER_ONLY) +#include <qemu.h> +#include <signal.h> +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include <sys/param.h> +#if __FreeBSD_version >= 700104 +#define HAVE_KINFO_GETVMMAP +#define sigqueue sigqueue_freebsd /* avoid redefinition */ +#include <sys/time.h> +#include <sys/proc.h> +#include <machine/profile.h> +#define _KERNEL +#include <sys/user.h> +#undef _KERNEL +#undef sigqueue +#include <libutil.h> +#endif +#endif +#endif + +//#define DEBUG_TB_INVALIDATE +//#define DEBUG_FLUSH +//#define DEBUG_TLB +//#define DEBUG_UNASSIGNED + +/* make various TB consistency checks */ +//#define DEBUG_TB_CHECK +//#define DEBUG_TLB_CHECK + +//#define DEBUG_IOPORT +//#define DEBUG_SUBPAGE + +#if !defined(CONFIG_USER_ONLY) +/* TB consistency checks only implemented for usermode emulation. */ +#undef DEBUG_TB_CHECK +#endif + +#define SMC_BITMAP_USE_THRESHOLD 10 + +static TranslationBlock *tbs; +static int code_gen_max_blocks; +TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; +static int nb_tbs; +/* any access to the tbs or the page table must use this lock */ +spinlock_t tb_lock = SPIN_LOCK_UNLOCKED; + +#ifndef VBOX +#if defined(__arm__) || defined(__sparc_v9__) +/* The prologue must be reachable with a direct jump. ARM and Sparc64 + have limited branch ranges (possibly also PPC) so place it in a + section close to code segment. */ +#define code_gen_section \ + __attribute__((__section__(".gen_code"))) \ + __attribute__((aligned (32))) +#elif defined(_WIN32) +/* Maximum alignment for Win32 is 16. */ +#define code_gen_section \ + __attribute__((aligned (16))) +#else +#define code_gen_section \ + __attribute__((aligned (32))) +#endif + +uint8_t code_gen_prologue[1024] code_gen_section; +#else /* VBOX */ +extern uint8_t *code_gen_prologue; +#endif /* VBOX */ +static uint8_t *code_gen_buffer; +static size_t code_gen_buffer_size; +/* threshold to flush the translated code buffer */ +static size_t code_gen_buffer_max_size; +static uint8_t *code_gen_ptr; + +#if !defined(CONFIG_USER_ONLY) +# ifndef VBOX +int phys_ram_fd; +static int in_migration; +# endif /* !VBOX */ + +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) }; +#endif + +CPUState *first_cpu; +/* current CPU in the current thread. It is only valid inside + cpu_exec() */ +CPUState *cpu_single_env; +/* 0 = Do not count executed instructions. + 1 = Precise instruction counting. + 2 = Adaptive rate instruction counting. */ +int use_icount = 0; +/* Current instruction counter. While executing translated code this may + include some instructions that have not yet been executed. */ +int64_t qemu_icount; + +typedef struct PageDesc { + /* list of TBs intersecting this ram page */ + TranslationBlock *first_tb; + /* in order to optimize self modifying code, we count the number + of lookups we do to a given page to use a bitmap */ + unsigned int code_write_count; + uint8_t *code_bitmap; +#if defined(CONFIG_USER_ONLY) + unsigned long flags; +#endif +} PageDesc; + +/* In system mode we want L1_MAP to be based on ram offsets, + while in user mode we want it to be based on virtual addresses. */ +#if !defined(CONFIG_USER_ONLY) +#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS +# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS +#else +# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS +#endif +#else +# define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS +#endif + +/* Size of the L2 (and L3, etc) page tables. */ +#define L2_BITS 10 +#define L2_SIZE (1 << L2_BITS) + +/* The bits remaining after N lower levels of page tables. */ +#define P_L1_BITS_REM \ + ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS) +#define V_L1_BITS_REM \ + ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS) + +/* Size of the L1 page table. Avoid silly small sizes. */ +#if P_L1_BITS_REM < 4 +#define P_L1_BITS (P_L1_BITS_REM + L2_BITS) +#else +#define P_L1_BITS P_L1_BITS_REM +#endif + +#if V_L1_BITS_REM < 4 +#define V_L1_BITS (V_L1_BITS_REM + L2_BITS) +#else +#define V_L1_BITS V_L1_BITS_REM +#endif + +#define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS) +#define V_L1_SIZE ((target_ulong)1 << V_L1_BITS) + +#define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS) +#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) + +size_t qemu_real_host_page_size; +size_t qemu_host_page_bits; +size_t qemu_host_page_size; +uintptr_t qemu_host_page_mask; + +/* This is a multi-level map on the virtual address space. + The bottom level has pointers to PageDesc. */ +static void *l1_map[V_L1_SIZE]; + +#if !defined(CONFIG_USER_ONLY) +typedef struct PhysPageDesc { + /* offset in host memory of the page + io_index in the low bits */ + ram_addr_t phys_offset; + ram_addr_t region_offset; +} PhysPageDesc; + +/* This is a multi-level map on the physical address space. + The bottom level has pointers to PhysPageDesc. */ +static void *l1_phys_map[P_L1_SIZE]; + +static void io_mem_init(void); + +/* io memory support */ +CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4]; +CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4]; +void *io_mem_opaque[IO_MEM_NB_ENTRIES]; +static char io_mem_used[IO_MEM_NB_ENTRIES]; +static int io_mem_watch; +#endif + +#ifndef VBOX +/* log support */ +#ifdef WIN32 +static const char *logfilename = "qemu.log"; +#else +static const char *logfilename = "/tmp/qemu.log"; +#endif +#endif /* !VBOX */ +FILE *logfile; +int loglevel; +#ifndef VBOX +static int log_append = 0; +#endif /* !VBOX */ + +/* statistics */ +#ifndef VBOX +#if !defined(CONFIG_USER_ONLY) +static int tlb_flush_count; +#endif +static int tb_flush_count; +static int tb_phys_invalidate_count; +#else /* VBOX - Resettable U32 stats, see VBoxRecompiler.c. */ +uint32_t tlb_flush_count; +uint32_t tb_flush_count; +uint32_t tb_phys_invalidate_count; +#endif /* VBOX */ + +#ifndef VBOX +#ifdef _WIN32 +static void map_exec(void *addr, size_t size) +{ + DWORD old_protect; + VirtualProtect(addr, size, + PAGE_EXECUTE_READWRITE, &old_protect); + +} +#else +static void map_exec(void *addr, size_t size) +{ + uintptr_t start, end, page_size; + + page_size = getpagesize(); + start = (uintptr_t)addr; + start &= ~(page_size - 1); + + end = (uintptr_t)addr + size; + end += page_size - 1; + end &= ~(page_size - 1); + + mprotect((void *)start, end - start, + PROT_READ | PROT_WRITE | PROT_EXEC); +} +#endif +#else /* VBOX */ +static void map_exec(void *addr, size_t size) +{ + RTMemProtect(addr, size, + RTMEM_PROT_EXEC | RTMEM_PROT_READ | RTMEM_PROT_WRITE); +} +#endif /* VBOX */ + +static void page_init(void) +{ + /* NOTE: we can always suppose that qemu_host_page_size >= + TARGET_PAGE_SIZE */ +#ifdef VBOX + RTMemProtect(code_gen_buffer, code_gen_buffer_size, + RTMEM_PROT_EXEC | RTMEM_PROT_READ | RTMEM_PROT_WRITE); + qemu_real_host_page_size = PAGE_SIZE; +#else /* !VBOX */ +#ifdef _WIN32 + { + SYSTEM_INFO system_info; + + GetSystemInfo(&system_info); + qemu_real_host_page_size = system_info.dwPageSize; + } +#else + qemu_real_host_page_size = getpagesize(); +#endif +#endif /* !VBOX */ + if (qemu_host_page_size == 0) + qemu_host_page_size = qemu_real_host_page_size; + if (qemu_host_page_size < TARGET_PAGE_SIZE) + qemu_host_page_size = TARGET_PAGE_SIZE; + qemu_host_page_bits = 0; + while ((1 << qemu_host_page_bits) < VBOX_ONLY((int))qemu_host_page_size) + qemu_host_page_bits++; + qemu_host_page_mask = ~(qemu_host_page_size - 1); + +#ifndef VBOX /* We use other means to set reserved bit on our pages */ +#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) + { +#ifdef HAVE_KINFO_GETVMMAP + struct kinfo_vmentry *freep; + int i, cnt; + + freep = kinfo_getvmmap(getpid(), &cnt); + if (freep) { + mmap_lock(); + for (i = 0; i < cnt; i++) { + uintptr_t startaddr, endaddr; + + startaddr = freep[i].kve_start; + endaddr = freep[i].kve_end; + if (h2g_valid(startaddr)) { + startaddr = h2g(startaddr) & TARGET_PAGE_MASK; + + if (h2g_valid(endaddr)) { + endaddr = h2g(endaddr); + page_set_flags(startaddr, endaddr, PAGE_RESERVED); + } else { +#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS + endaddr = ~0ul; + page_set_flags(startaddr, endaddr, PAGE_RESERVED); +#endif + } + } + } + free(freep); + mmap_unlock(); + } +#else + FILE *f; + + last_brk = (uintptr_t)sbrk(0); + + f = fopen("/compat/linux/proc/self/maps", "r"); + if (f) { + mmap_lock(); + + do { + uintptr_t startaddr, endaddr; + int n; + + n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); + + if (n == 2 && h2g_valid(startaddr)) { + startaddr = h2g(startaddr) & TARGET_PAGE_MASK; + + if (h2g_valid(endaddr)) { + endaddr = h2g(endaddr); + } else { + endaddr = ~0ul; + } + page_set_flags(startaddr, endaddr, PAGE_RESERVED); + } + } while (!feof(f)); + + fclose(f); + mmap_unlock(); + } +#endif + } +#endif +#endif /* !VBOX */ +} + +static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) +{ + PageDesc *pd; + void **lp; + int i; + +#if defined(CONFIG_USER_ONLY) + /* We can't use qemu_malloc because it may recurse into a locked mutex. */ +# define ALLOC(P, SIZE) \ + do { \ + P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \ + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \ + } while (0) +#else +# define ALLOC(P, SIZE) \ + do { P = qemu_mallocz(SIZE); } while (0) +#endif + + /* Level 1. Always allocated. */ + lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1)); + + /* Level 2..N-1. */ + for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) { + void **p = *lp; + + if (p == NULL) { + if (!alloc) { + return NULL; + } + ALLOC(p, sizeof(void *) * L2_SIZE); + *lp = p; + } + + lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1)); + } + + pd = *lp; + if (pd == NULL) { + if (!alloc) { + return NULL; + } + ALLOC(pd, sizeof(PageDesc) * L2_SIZE); + *lp = pd; + } + +#undef ALLOC + + return pd + (index & (L2_SIZE - 1)); +} + +static inline PageDesc *page_find(tb_page_addr_t index) +{ + return page_find_alloc(index, 0); +} + +#if !defined(CONFIG_USER_ONLY) +static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc) +{ + PhysPageDesc *pd; + void **lp; + int i; + + /* Level 1. Always allocated. */ + lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1)); + + /* Level 2..N-1. */ + for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) { + void **p = *lp; + if (p == NULL) { + if (!alloc) { + return NULL; + } + *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE); + } + lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1)); + } + + pd = *lp; + if (pd == NULL) { + int i; + + if (!alloc) { + return NULL; + } + + *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE); + + for (i = 0; i < L2_SIZE; i++) { + pd[i].phys_offset = IO_MEM_UNASSIGNED; + pd[i].region_offset = (index + i) << TARGET_PAGE_BITS; + } + } + + return pd + (index & (L2_SIZE - 1)); +} + +static inline PhysPageDesc *phys_page_find(target_phys_addr_t index) +{ + return phys_page_find_alloc(index, 0); +} + +static void tlb_protect_code(ram_addr_t ram_addr); +static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr, + target_ulong vaddr); +#define mmap_lock() do { } while(0) +#define mmap_unlock() do { } while(0) +#endif + +#ifdef VBOX /* We don't need such huge codegen buffer size, as execute + most of the code in raw or hm mode. */ +#define DEFAULT_CODE_GEN_BUFFER_SIZE (8 * 1024 * 1024) +#else /* !VBOX */ +#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024) +#endif /* !VBOX */ + +#if defined(CONFIG_USER_ONLY) +/* Currently it is not recommended to allocate big chunks of data in + user mode. It will change when a dedicated libc will be used */ +#define USE_STATIC_CODE_GEN_BUFFER +#endif + +#if defined(VBOX) && defined(USE_STATIC_CODE_GEN_BUFFER) +# error "VBox allocates codegen buffer dynamically" +#endif + +#ifdef USE_STATIC_CODE_GEN_BUFFER +static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] + __attribute__((aligned (CODE_GEN_ALIGN))); +#endif + +static void code_gen_alloc(uintptr_t tb_size) +{ +#ifdef USE_STATIC_CODE_GEN_BUFFER + code_gen_buffer = static_code_gen_buffer; + code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE; + map_exec(code_gen_buffer, code_gen_buffer_size); +#else +# ifdef VBOX + /* We cannot use phys_ram_size here, as it's 0 now, + * it only gets initialized once RAM registration callback + * (REMR3NotifyPhysRamRegister()) called. + */ + code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE; +# else /* !VBOX */ + code_gen_buffer_size = tb_size; + if (code_gen_buffer_size == 0) { +#if defined(CONFIG_USER_ONLY) + /* in user mode, phys_ram_size is not meaningful */ + code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE; +#else + /* XXX: needs adjustments */ + code_gen_buffer_size = (uintptr_t)(ram_size / 4); +#endif + } + if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) + code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE; +# endif /* !VBOX */ + /* The code gen buffer location may have constraints depending on + the host cpu and OS */ +# ifdef VBOX + code_gen_buffer = RTMemExecAlloc(code_gen_buffer_size); + + if (!code_gen_buffer) { + LogRel(("REM: failed allocate codegen buffer %lld\n", + code_gen_buffer_size)); + return; + } +# else /* !VBOX */ +#if defined(__linux__) + { + int flags; + void *start = NULL; + + flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if defined(__x86_64__) + flags |= MAP_32BIT; + /* Cannot map more than that */ + if (code_gen_buffer_size > (800 * 1024 * 1024)) + code_gen_buffer_size = (800 * 1024 * 1024); +#elif defined(__sparc_v9__) + // Map the buffer below 2G, so we can use direct calls and branches + flags |= MAP_FIXED; + start = (void *) 0x60000000UL; + if (code_gen_buffer_size > (512 * 1024 * 1024)) + code_gen_buffer_size = (512 * 1024 * 1024); +#elif defined(__arm__) + /* Map the buffer below 32M, so we can use direct calls and branches */ + flags |= MAP_FIXED; + start = (void *) 0x01000000UL; + if (code_gen_buffer_size > 16 * 1024 * 1024) + code_gen_buffer_size = 16 * 1024 * 1024; +#elif defined(__s390x__) + /* Map the buffer so that we can use direct calls and branches. */ + /* We have a +- 4GB range on the branches; leave some slop. */ + if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) { + code_gen_buffer_size = 3ul * 1024 * 1024 * 1024; + } + start = (void *)0x90000000UL; +#endif + code_gen_buffer = mmap(start, code_gen_buffer_size, + PROT_WRITE | PROT_READ | PROT_EXEC, + flags, -1, 0); + if (code_gen_buffer == MAP_FAILED) { + fprintf(stderr, "Could not allocate dynamic translator buffer\n"); + exit(1); + } + } +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) + { + int flags; + void *addr = NULL; + flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if defined(__x86_64__) + /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume + * 0x40000000 is free */ + flags |= MAP_FIXED; + addr = (void *)0x40000000; + /* Cannot map more than that */ + if (code_gen_buffer_size > (800 * 1024 * 1024)) + code_gen_buffer_size = (800 * 1024 * 1024); +#endif + code_gen_buffer = mmap(addr, code_gen_buffer_size, + PROT_WRITE | PROT_READ | PROT_EXEC, + flags, -1, 0); + if (code_gen_buffer == MAP_FAILED) { + fprintf(stderr, "Could not allocate dynamic translator buffer\n"); + exit(1); + } + } +#else + code_gen_buffer = qemu_malloc(code_gen_buffer_size); + map_exec(code_gen_buffer, code_gen_buffer_size); +#endif +# endif /* !VBOX */ +#endif /* !USE_STATIC_CODE_GEN_BUFFER */ +#ifndef VBOX /** @todo r=bird: why are we different? */ + map_exec(code_gen_prologue, sizeof(code_gen_prologue)); +#else + map_exec(code_gen_prologue, _1K); +#endif + code_gen_buffer_max_size = code_gen_buffer_size - + (TCG_MAX_OP_SIZE * OPC_MAX_SIZE); + code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; + tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock)); +} + +/* Must be called before using the QEMU cpus. 'tb_size' is the size + (in bytes) allocated to the translation buffer. Zero means default + size. */ +void cpu_exec_init_all(uintptr_t tb_size) +{ + cpu_gen_init(); + code_gen_alloc(tb_size); + code_gen_ptr = code_gen_buffer; + page_init(); +#if !defined(CONFIG_USER_ONLY) + io_mem_init(); +#endif +#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE) + /* There's no guest base to take into account, so go ahead and + initialize the prologue now. */ + tcg_prologue_init(&tcg_ctx); +#endif +} + +#ifndef VBOX +#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY) + +static int cpu_common_post_load(void *opaque, int version_id) +{ + CPUState *env = opaque; + + /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the + version_id is increased. */ + env->interrupt_request &= ~0x01; + tlb_flush(env, 1); + + return 0; +} + +static const VMStateDescription vmstate_cpu_common = { + .name = "cpu_common", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .post_load = cpu_common_post_load, + .fields = (VMStateField []) { + VMSTATE_UINT32(halted, CPUState), + VMSTATE_UINT32(interrupt_request, CPUState), + VMSTATE_END_OF_LIST() + } +}; +#endif + +CPUState *qemu_get_cpu(int cpu) +{ + CPUState *env = first_cpu; + + while (env) { + if (env->cpu_index == cpu) + break; + env = env->next_cpu; + } + + return env; +} + +#endif /* !VBOX */ + +void cpu_exec_init(CPUState *env) +{ + CPUState **penv; + int cpu_index; + +#if defined(CONFIG_USER_ONLY) + cpu_list_lock(); +#endif + env->next_cpu = NULL; + penv = &first_cpu; + cpu_index = 0; + while (*penv != NULL) { + penv = &(*penv)->next_cpu; + cpu_index++; + } + env->cpu_index = cpu_index; + env->numa_node = 0; + QTAILQ_INIT(&env->breakpoints); + QTAILQ_INIT(&env->watchpoints); + *penv = env; +#ifndef VBOX +#if defined(CONFIG_USER_ONLY) + cpu_list_unlock(); +#endif +#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY) + vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env); + register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION, + cpu_save, cpu_load, env); +#endif +#endif /* !VBOX */ +} + +static inline void invalidate_page_bitmap(PageDesc *p) +{ + if (p->code_bitmap) { + qemu_free(p->code_bitmap); + p->code_bitmap = NULL; + } + p->code_write_count = 0; +} + +/* Set to NULL all the 'first_tb' fields in all PageDescs. */ + +static void page_flush_tb_1 (int level, void **lp) +{ + int i; + + if (*lp == NULL) { + return; + } + if (level == 0) { + PageDesc *pd = *lp; + for (i = 0; i < L2_SIZE; ++i) { + pd[i].first_tb = NULL; + invalidate_page_bitmap(pd + i); + } + } else { + void **pp = *lp; + for (i = 0; i < L2_SIZE; ++i) { + page_flush_tb_1 (level - 1, pp + i); + } + } +} + +static void page_flush_tb(void) +{ + int i; + for (i = 0; i < V_L1_SIZE; i++) { + page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i); + } +} + +/* flush all the translation blocks */ +/* XXX: tb_flush is currently not thread safe */ +void tb_flush(CPUState *env1) +{ + CPUState *env; +#ifdef VBOX + STAM_PROFILE_START(&env1->StatTbFlush, a); +#endif +#if defined(DEBUG_FLUSH) + printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", + (unsigned long)(code_gen_ptr - code_gen_buffer), + nb_tbs, nb_tbs > 0 ? + ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0); +#endif + if ((uintptr_t)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size) + cpu_abort(env1, "Internal error: code buffer overflow\n"); + + nb_tbs = 0; + + for(env = first_cpu; env != NULL; env = env->next_cpu) { + memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *)); + } + + memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *)); + page_flush_tb(); + + code_gen_ptr = code_gen_buffer; + /* XXX: flush processor icache at this point if cache flush is + expensive */ + tb_flush_count++; +#ifdef VBOX + STAM_PROFILE_STOP(&env1->StatTbFlush, a); +#endif +} + +#ifdef DEBUG_TB_CHECK + +static void tb_invalidate_check(target_ulong address) +{ + TranslationBlock *tb; + int i; + address &= TARGET_PAGE_MASK; + for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) { + for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + if (!(address + TARGET_PAGE_SIZE <= tb->pc || + address >= tb->pc + tb->size)) { + printf("ERROR invalidate: address=" TARGET_FMT_lx + " PC=%08lx size=%04x\n", + address, (long)tb->pc, tb->size); + } + } + } +} + +/* verify that all the pages have correct rights for code */ +static void tb_page_check(void) +{ + TranslationBlock *tb; + int i, flags1, flags2; + + for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) { + for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + flags1 = page_get_flags(tb->pc); + flags2 = page_get_flags(tb->pc + tb->size - 1); + if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { + printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n", + (long)tb->pc, tb->size, flags1, flags2); + } + } + } +} + +#endif + +/* invalidate one TB */ +static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb, + int next_offset) +{ + TranslationBlock *tb1; + for(;;) { + tb1 = *ptb; + if (tb1 == tb) { + *ptb = *(TranslationBlock **)((char *)tb1 + next_offset); + break; + } + ptb = (TranslationBlock **)((char *)tb1 + next_offset); + } +} + +static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) +{ + TranslationBlock *tb1; + unsigned int n1; + + for(;;) { + tb1 = *ptb; + n1 = (intptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3); + if (tb1 == tb) { + *ptb = tb1->page_next[n1]; + break; + } + ptb = &tb1->page_next[n1]; + } +} + +static inline void tb_jmp_remove(TranslationBlock *tb, int n) +{ + TranslationBlock *tb1, **ptb; + unsigned int n1; + + ptb = &tb->jmp_next[n]; + tb1 = *ptb; + if (tb1) { + /* find tb(n) in circular list */ + for(;;) { + tb1 = *ptb; + n1 = (intptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3); + if (n1 == n && tb1 == tb) + break; + if (n1 == 2) { + ptb = &tb1->jmp_first; + } else { + ptb = &tb1->jmp_next[n1]; + } + } + /* now we can suppress tb(n) from the list */ + *ptb = tb->jmp_next[n]; + + tb->jmp_next[n] = NULL; + } +} + +/* reset the jump entry 'n' of a TB so that it is not chained to + another TB */ +static inline void tb_reset_jump(TranslationBlock *tb, int n) +{ + tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n])); +} + +void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) +{ + CPUState *env; + PageDesc *p; + unsigned int h, n1; + tb_page_addr_t phys_pc; + TranslationBlock *tb1, *tb2; + + /* remove the TB from the hash list */ + phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); + h = tb_phys_hash_func(phys_pc); + tb_remove(&tb_phys_hash[h], tb, + offsetof(TranslationBlock, phys_hash_next)); + + /* remove the TB from the page list */ + if (tb->page_addr[0] != page_addr) { + p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); + tb_page_remove(&p->first_tb, tb); + invalidate_page_bitmap(p); + } + if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) { + p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); + tb_page_remove(&p->first_tb, tb); + invalidate_page_bitmap(p); + } + + tb_invalidated_flag = 1; + + /* remove the TB from the hash list */ + h = tb_jmp_cache_hash_func(tb->pc); + for(env = first_cpu; env != NULL; env = env->next_cpu) { + if (env->tb_jmp_cache[h] == tb) + env->tb_jmp_cache[h] = NULL; + } + + /* suppress this TB from the two jump lists */ + tb_jmp_remove(tb, 0); + tb_jmp_remove(tb, 1); + + /* suppress any remaining jumps to this TB */ + tb1 = tb->jmp_first; + for(;;) { + n1 = (intptr_t)tb1 & 3; + if (n1 == 2) + break; + tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3); + tb2 = tb1->jmp_next[n1]; + tb_reset_jump(tb1, n1); + tb1->jmp_next[n1] = NULL; + tb1 = tb2; + } + tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2); /* fail safe */ + + tb_phys_invalidate_count++; +} + +#ifdef VBOX + +void tb_invalidate_virt(CPUState *env, uint32_t eip) +{ +# if 1 + tb_flush(env); +# else + uint8_t *cs_base, *pc; + unsigned int flags, h, phys_pc; + TranslationBlock *tb, **ptb; + + flags = env->hflags; + flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK)); + cs_base = env->segs[R_CS].base; + pc = cs_base + eip; + + tb = tb_find(&ptb, (uintptr_t)pc, (uintptr_t)cs_base, + flags); + + if(tb) + { +# ifdef DEBUG + printf("invalidating TB (%08X) at %08X\n", tb, eip); +# endif + tb_invalidate(tb); + //Note: this will leak TBs, but the whole cache will be flushed + // when it happens too often + tb->pc = 0; + tb->cs_base = 0; + tb->flags = 0; + } +# endif +} + +# ifdef VBOX_STRICT +/** + * Gets the page offset. + */ +ram_addr_t get_phys_page_offset(target_ulong addr) +{ + PhysPageDesc *p = phys_page_find(addr >> TARGET_PAGE_BITS); + return p ? p->phys_offset : 0; +} +# endif /* VBOX_STRICT */ + +#endif /* VBOX */ + +static inline void set_bits(uint8_t *tab, int start, int len) +{ + int end, mask, end1; + + end = start + len; + tab += start >> 3; + mask = 0xff << (start & 7); + if ((start & ~7) == (end & ~7)) { + if (start < end) { + mask &= ~(0xff << (end & 7)); + *tab |= mask; + } + } else { + *tab++ |= mask; + start = (start + 8) & ~7; + end1 = end & ~7; + while (start < end1) { + *tab++ = 0xff; + start += 8; + } + if (start < end) { + mask = ~(0xff << (end & 7)); + *tab |= mask; + } + } +} + +static void build_page_bitmap(PageDesc *p) +{ + int n, tb_start, tb_end; + TranslationBlock *tb; + + p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8); + + tb = p->first_tb; + while (tb != NULL) { + n = (intptr_t)tb & 3; + tb = (TranslationBlock *)((intptr_t)tb & ~3); + /* NOTE: this is subtle as a TB may span two physical pages */ + if (n == 0) { + /* NOTE: tb_end may be after the end of the page, but + it is not a problem */ + tb_start = tb->pc & ~TARGET_PAGE_MASK; + tb_end = tb_start + tb->size; + if (tb_end > TARGET_PAGE_SIZE) + tb_end = TARGET_PAGE_SIZE; + } else { + tb_start = 0; + tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); + } + set_bits(p->code_bitmap, tb_start, tb_end - tb_start); + tb = tb->page_next[n]; + } +} + +TranslationBlock *tb_gen_code(CPUState *env, + target_ulong pc, target_ulong cs_base, + int flags, int cflags) +{ + TranslationBlock *tb; + uint8_t *tc_ptr; + tb_page_addr_t phys_pc, phys_page2; + target_ulong virt_page2; + int code_gen_size; + + phys_pc = get_page_addr_code(env, pc); + tb = tb_alloc(pc); + if (!tb) { + /* flush must be done */ + tb_flush(env); + /* cannot fail at this point */ + tb = tb_alloc(pc); + /* Don't forget to invalidate previous TB info. */ + tb_invalidated_flag = 1; + } + tc_ptr = code_gen_ptr; + tb->tc_ptr = tc_ptr; + tb->cs_base = cs_base; + tb->flags = flags; + tb->cflags = cflags; + cpu_gen_code(env, tb, &code_gen_size); + code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); + + /* check next page if needed */ + virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK; + phys_page2 = -1; + if ((pc & TARGET_PAGE_MASK) != virt_page2) { + phys_page2 = get_page_addr_code(env, virt_page2); + } + tb_link_page(tb, phys_pc, phys_page2); + return tb; +} + +/* invalidate all TBs which intersect with the target physical page + starting in range [start;end[. NOTE: start and end must refer to + the same physical page. 'is_cpu_write_access' should be true if called + from a real cpu write access: the virtual CPU will exit the current + TB if code is modified inside this TB. */ +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, + int is_cpu_write_access) +{ + TranslationBlock *tb, *tb_next, *saved_tb; + CPUState *env = cpu_single_env; + tb_page_addr_t tb_start, tb_end; + PageDesc *p; + int n; +#ifdef TARGET_HAS_PRECISE_SMC + int current_tb_not_found = is_cpu_write_access; + TranslationBlock *current_tb = NULL; + int current_tb_modified = 0; + target_ulong current_pc = 0; + target_ulong current_cs_base = 0; + int current_flags = 0; +#endif /* TARGET_HAS_PRECISE_SMC */ + + p = page_find(start >> TARGET_PAGE_BITS); + if (!p) + return; + if (!p->code_bitmap && + ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD && + is_cpu_write_access) { + /* build code bitmap */ + build_page_bitmap(p); + } + + /* we remove all the TBs in the range [start, end[ */ + /* XXX: see if in some cases it could be faster to invalidate all the code */ + tb = p->first_tb; + while (tb != NULL) { + n = (intptr_t)tb & 3; + tb = (TranslationBlock *)((intptr_t)tb & ~3); + tb_next = tb->page_next[n]; + /* NOTE: this is subtle as a TB may span two physical pages */ + if (n == 0) { + /* NOTE: tb_end may be after the end of the page, but + it is not a problem */ + tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); + tb_end = tb_start + tb->size; + } else { + tb_start = tb->page_addr[1]; + tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); + } + if (!(tb_end <= start || tb_start >= end)) { +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb_not_found) { + current_tb_not_found = 0; + current_tb = NULL; + if (env->mem_io_pc) { + /* now we have a real cpu fault */ + current_tb = tb_find_pc(env->mem_io_pc); + } + } + if (current_tb == tb && + (current_tb->cflags & CF_COUNT_MASK) != 1) { + /* If we are modifying the current TB, we must stop + its execution. We could be more precise by checking + that the modification is after the current PC, but it + would require a specialized function to partially + restore the CPU state */ + + current_tb_modified = 1; + cpu_restore_state(current_tb, env, + env->mem_io_pc, NULL); + cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, + ¤t_flags); + } +#endif /* TARGET_HAS_PRECISE_SMC */ + /* we need to do that to handle the case where a signal + occurs while doing tb_phys_invalidate() */ + saved_tb = NULL; + if (env) { + saved_tb = env->current_tb; + env->current_tb = NULL; + } + tb_phys_invalidate(tb, -1); + if (env) { + env->current_tb = saved_tb; + if (env->interrupt_request && env->current_tb) + cpu_interrupt(env, env->interrupt_request); + } + } + tb = tb_next; + } +#if !defined(CONFIG_USER_ONLY) + /* if no code remaining, no need to continue to use slow writes */ + if (!p->first_tb) { + invalidate_page_bitmap(p); + if (is_cpu_write_access) { + tlb_unprotect_code_phys(env, start, env->mem_io_vaddr); + } + } +#endif +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb_modified) { + /* we generate a block containing just the instruction + modifying the memory. It will ensure that it cannot modify + itself */ + env->current_tb = NULL; + tb_gen_code(env, current_pc, current_cs_base, current_flags, 1); + cpu_resume_from_signal(env, NULL); + } +#endif +} + +/* len must be <= 8 and start must be a multiple of len */ +static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) +{ + PageDesc *p; + int offset, b; +#if 0 + if (1) { + qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n", + cpu_single_env->mem_io_vaddr, len, + cpu_single_env->eip, + cpu_single_env->eip + (intptr_t)cpu_single_env->segs[R_CS].base); + } +#endif + p = page_find(start >> TARGET_PAGE_BITS); + if (!p) + return; + if (p->code_bitmap) { + offset = start & ~TARGET_PAGE_MASK; + b = p->code_bitmap[offset >> 3] >> (offset & 7); + if (b & ((1 << len) - 1)) + goto do_invalidate; + } else { + do_invalidate: + tb_invalidate_phys_page_range(start, start + len, 1); + } +} + +#if !defined(CONFIG_SOFTMMU) +static void tb_invalidate_phys_page(tb_page_addr_t addr, + uintptr_t pc, void *puc) +{ + TranslationBlock *tb; + PageDesc *p; + int n; +#ifdef TARGET_HAS_PRECISE_SMC + TranslationBlock *current_tb = NULL; + CPUState *env = cpu_single_env; + int current_tb_modified = 0; + target_ulong current_pc = 0; + target_ulong current_cs_base = 0; + int current_flags = 0; +#endif + + addr &= TARGET_PAGE_MASK; + p = page_find(addr >> TARGET_PAGE_BITS); + if (!p) + return; + tb = p->first_tb; +#ifdef TARGET_HAS_PRECISE_SMC + if (tb && pc != 0) { + current_tb = tb_find_pc(pc); + } +#endif + while (tb != NULL) { + n = (intptr_t)tb & 3; + tb = (TranslationBlock *)((intptr_t)tb & ~3); +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb == tb && + (current_tb->cflags & CF_COUNT_MASK) != 1) { + /* If we are modifying the current TB, we must stop + its execution. We could be more precise by checking + that the modification is after the current PC, but it + would require a specialized function to partially + restore the CPU state */ + + current_tb_modified = 1; + cpu_restore_state(current_tb, env, pc, puc); + cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, + ¤t_flags); + } +#endif /* TARGET_HAS_PRECISE_SMC */ + tb_phys_invalidate(tb, addr); + tb = tb->page_next[n]; + } + p->first_tb = NULL; +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb_modified) { + /* we generate a block containing just the instruction + modifying the memory. It will ensure that it cannot modify + itself */ + env->current_tb = NULL; + tb_gen_code(env, current_pc, current_cs_base, current_flags, 1); + cpu_resume_from_signal(env, puc); + } +#endif +} +#endif + +/* add the tb in the target page and protect it if necessary */ +static inline void tb_alloc_page(TranslationBlock *tb, + unsigned int n, tb_page_addr_t page_addr) +{ + PageDesc *p; + TranslationBlock *last_first_tb; + + tb->page_addr[n] = page_addr; + p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); + tb->page_next[n] = p->first_tb; + last_first_tb = p->first_tb; + p->first_tb = (TranslationBlock *)((intptr_t)tb | n); + invalidate_page_bitmap(p); + +#if defined(TARGET_HAS_SMC) || 1 + +#if defined(CONFIG_USER_ONLY) + if (p->flags & PAGE_WRITE) { + target_ulong addr; + PageDesc *p2; + int prot; + + /* force the host page as non writable (writes will have a + page fault + mprotect overhead) */ + page_addr &= qemu_host_page_mask; + prot = 0; + for(addr = page_addr; addr < page_addr + qemu_host_page_size; + addr += TARGET_PAGE_SIZE) { + + p2 = page_find (addr >> TARGET_PAGE_BITS); + if (!p2) + continue; + prot |= p2->flags; + p2->flags &= ~PAGE_WRITE; + } + mprotect(g2h(page_addr), qemu_host_page_size, + (prot & PAGE_BITS) & ~PAGE_WRITE); +#ifdef DEBUG_TB_INVALIDATE + printf("protecting code page: 0x" TARGET_FMT_lx "\n", + page_addr); +#endif + } +#else + /* if some code is already present, then the pages are already + protected. So we handle the case where only the first TB is + allocated in a physical page */ + if (!last_first_tb) { + tlb_protect_code(page_addr); + } +#endif + +#endif /* TARGET_HAS_SMC */ +} + +/* Allocate a new translation block. Flush the translation buffer if + too many translation blocks or too much generated code. */ +TranslationBlock *tb_alloc(target_ulong pc) +{ + TranslationBlock *tb; + + if (nb_tbs >= code_gen_max_blocks || + (code_gen_ptr - code_gen_buffer) >= VBOX_ONLY((uintptr_t))code_gen_buffer_max_size) + return NULL; + tb = &tbs[nb_tbs++]; + tb->pc = pc; + tb->cflags = 0; + return tb; +} + +void tb_free(TranslationBlock *tb) +{ + /* In practice this is mostly used for single use temporary TB + Ignore the hard cases and just back up if this TB happens to + be the last one generated. */ + if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) { + code_gen_ptr = tb->tc_ptr; + nb_tbs--; + } +} + +/* add a new TB and link it to the physical page tables. phys_page2 is + (-1) to indicate that only one page contains the TB. */ +void tb_link_page(TranslationBlock *tb, + tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) +{ + unsigned int h; + TranslationBlock **ptb; + + /* Grab the mmap lock to stop another thread invalidating this TB + before we are done. */ + mmap_lock(); + /* add in the physical hash table */ + h = tb_phys_hash_func(phys_pc); + ptb = &tb_phys_hash[h]; + tb->phys_hash_next = *ptb; + *ptb = tb; + + /* add in the page list */ + tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); + if (phys_page2 != -1) + tb_alloc_page(tb, 1, phys_page2); + else + tb->page_addr[1] = -1; + + tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2); + tb->jmp_next[0] = NULL; + tb->jmp_next[1] = NULL; + + /* init original jump addresses */ + if (tb->tb_next_offset[0] != 0xffff) + tb_reset_jump(tb, 0); + if (tb->tb_next_offset[1] != 0xffff) + tb_reset_jump(tb, 1); + +#ifdef DEBUG_TB_CHECK + tb_page_check(); +#endif + mmap_unlock(); +} + +/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < + tb[1].tc_ptr. Return NULL if not found */ +TranslationBlock *tb_find_pc(uintptr_t tc_ptr) +{ + int m_min, m_max, m; + uintptr_t v; + TranslationBlock *tb; + + if (nb_tbs <= 0) + return NULL; + if (tc_ptr < (uintptr_t)code_gen_buffer || + tc_ptr >= (uintptr_t)code_gen_ptr) + return NULL; + /* binary search (cf Knuth) */ + m_min = 0; + m_max = nb_tbs - 1; + while (m_min <= m_max) { + m = (m_min + m_max) >> 1; + tb = &tbs[m]; + v = (uintptr_t)tb->tc_ptr; + if (v == tc_ptr) + return tb; + else if (tc_ptr < v) { + m_max = m - 1; + } else { + m_min = m + 1; + } + } + return &tbs[m_max]; +} + +static void tb_reset_jump_recursive(TranslationBlock *tb); + +static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n) +{ + TranslationBlock *tb1, *tb_next, **ptb; + unsigned int n1; + + tb1 = tb->jmp_next[n]; + if (tb1 != NULL) { + /* find head of list */ + for(;;) { + n1 = (intptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3); + if (n1 == 2) + break; + tb1 = tb1->jmp_next[n1]; + } + /* we are now sure now that tb jumps to tb1 */ + tb_next = tb1; + + /* remove tb from the jmp_first list */ + ptb = &tb_next->jmp_first; + for(;;) { + tb1 = *ptb; + n1 = (intptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((intptr_t)tb1 & ~3); + if (n1 == n && tb1 == tb) + break; + ptb = &tb1->jmp_next[n1]; + } + *ptb = tb->jmp_next[n]; + tb->jmp_next[n] = NULL; + + /* suppress the jump to next tb in generated code */ + tb_reset_jump(tb, n); + + /* suppress jumps in the tb on which we could have jumped */ + tb_reset_jump_recursive(tb_next); + } +} + +static void tb_reset_jump_recursive(TranslationBlock *tb) +{ + tb_reset_jump_recursive2(tb, 0); + tb_reset_jump_recursive2(tb, 1); +} + +#if defined(TARGET_HAS_ICE) +#if defined(CONFIG_USER_ONLY) +static void breakpoint_invalidate(CPUState *env, target_ulong pc) +{ + tb_invalidate_phys_page_range(pc, pc + 1, 0); +} +#else +static void breakpoint_invalidate(CPUState *env, target_ulong pc) +{ + target_phys_addr_t addr; + target_ulong pd; + ram_addr_t ram_addr; + PhysPageDesc *p; + + addr = cpu_get_phys_page_debug(env, pc); + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK); + tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); +} +#endif +#endif /* TARGET_HAS_ICE */ + +#if defined(CONFIG_USER_ONLY) +void cpu_watchpoint_remove_all(CPUState *env, int mask) + +{ +} + +int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len, + int flags, CPUWatchpoint **watchpoint) +{ + return -ENOSYS; +} +#else +/* Add a watchpoint. */ +int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len, + int flags, CPUWatchpoint **watchpoint) +{ + target_ulong len_mask = ~(len - 1); + CPUWatchpoint *wp; + + /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */ + if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) { + fprintf(stderr, "qemu: tried to set invalid watchpoint at " + TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len); +#ifndef VBOX + return -EINVAL; +#else + return VERR_INVALID_PARAMETER; +#endif + } + wp = qemu_malloc(sizeof(*wp)); + + wp->vaddr = addr; + wp->len_mask = len_mask; + wp->flags = flags; + + /* keep all GDB-injected watchpoints in front */ + if (flags & BP_GDB) + QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry); + else + QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry); + + tlb_flush_page(env, addr); + + if (watchpoint) + *watchpoint = wp; + return 0; +} + +/* Remove a specific watchpoint. */ +int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len, + int flags) +{ + target_ulong len_mask = ~(len - 1); + CPUWatchpoint *wp; + + QTAILQ_FOREACH(wp, &env->watchpoints, entry) { + if (addr == wp->vaddr && len_mask == wp->len_mask + && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) { + cpu_watchpoint_remove_by_ref(env, wp); + return 0; + } + } +#ifndef VBOX + return -ENOENT; +#else + return VERR_NOT_FOUND; +#endif +} + +/* Remove a specific watchpoint by reference. */ +void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint) +{ + QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry); + + tlb_flush_page(env, watchpoint->vaddr); + + qemu_free(watchpoint); +} + +/* Remove all matching watchpoints. */ +void cpu_watchpoint_remove_all(CPUState *env, int mask) +{ + CPUWatchpoint *wp, *next; + + QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) { + if (wp->flags & mask) + cpu_watchpoint_remove_by_ref(env, wp); + } +} +#endif + +/* Add a breakpoint. */ +int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags, + CPUBreakpoint **breakpoint) +{ +#if defined(TARGET_HAS_ICE) + CPUBreakpoint *bp; + + bp = qemu_malloc(sizeof(*bp)); + + bp->pc = pc; + bp->flags = flags; + + /* keep all GDB-injected breakpoints in front */ + if (flags & BP_GDB) + QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry); + else + QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry); + + breakpoint_invalidate(env, pc); + + if (breakpoint) + *breakpoint = bp; + return 0; +#else + return -ENOSYS; +#endif +} + +/* Remove a specific breakpoint. */ +int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags) +{ +#if defined(TARGET_HAS_ICE) + CPUBreakpoint *bp; + + QTAILQ_FOREACH(bp, &env->breakpoints, entry) { + if (bp->pc == pc && bp->flags == flags) { + cpu_breakpoint_remove_by_ref(env, bp); + return 0; + } + } +# ifndef VBOX + return -ENOENT; +# else + return VERR_NOT_FOUND; +# endif +#else + return -ENOSYS; +#endif +} + +/* Remove a specific breakpoint by reference. */ +void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint) +{ +#if defined(TARGET_HAS_ICE) + QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry); + + breakpoint_invalidate(env, breakpoint->pc); + + qemu_free(breakpoint); +#endif +} + +/* Remove all matching breakpoints. */ +void cpu_breakpoint_remove_all(CPUState *env, int mask) +{ +#if defined(TARGET_HAS_ICE) + CPUBreakpoint *bp, *next; + + QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) { + if (bp->flags & mask) + cpu_breakpoint_remove_by_ref(env, bp); + } +#endif +} + +/* enable or disable single step mode. EXCP_DEBUG is returned by the + CPU loop after each instruction */ +void cpu_single_step(CPUState *env, int enabled) +{ +#if defined(TARGET_HAS_ICE) + if (env->singlestep_enabled != enabled) { + env->singlestep_enabled = enabled; + if (kvm_enabled()) + kvm_update_guest_debug(env, 0); + else { + /* must flush all the translated code to avoid inconsistencies */ + /* XXX: only flush what is necessary */ + tb_flush(env); + } + } +#endif +} + +#ifndef VBOX + +/* enable or disable low levels log */ +void cpu_set_log(int log_flags) +{ + loglevel = log_flags; + if (loglevel && !logfile) { + logfile = fopen(logfilename, log_append ? "a" : "w"); + if (!logfile) { + perror(logfilename); + _exit(1); + } +#if !defined(CONFIG_SOFTMMU) + /* must avoid mmap() usage of glibc by setting a buffer "by hand" */ + { + static char logfile_buf[4096]; + setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf)); + } +#elif !defined(_WIN32) + /* Win32 doesn't support line-buffering and requires size >= 2 */ + setvbuf(logfile, NULL, _IOLBF, 0); +#endif + log_append = 1; + } + if (!loglevel && logfile) { + fclose(logfile); + logfile = NULL; + } +} + +void cpu_set_log_filename(const char *filename) +{ + logfilename = strdup(filename); + if (logfile) { + fclose(logfile); + logfile = NULL; + } + cpu_set_log(loglevel); +} + +#endif /* !VBOX */ + +static void cpu_unlink_tb(CPUState *env) +{ + /* FIXME: TB unchaining isn't SMP safe. For now just ignore the + problem and hope the cpu will stop of its own accord. For userspace + emulation this often isn't actually as bad as it sounds. Often + signals are used primarily to interrupt blocking syscalls. */ + TranslationBlock *tb; + static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED; + + spin_lock(&interrupt_lock); + tb = env->current_tb; + /* if the cpu is currently executing code, we must unlink it and + all the potentially executing TB */ + if (tb) { + env->current_tb = NULL; + tb_reset_jump_recursive(tb); + } + spin_unlock(&interrupt_lock); +} + +/* mask must never be zero, except for A20 change call */ +void cpu_interrupt(CPUState *env, int mask) +{ + int old_mask; + + old_mask = env->interrupt_request; +#ifndef VBOX + env->interrupt_request |= mask; +#else /* VBOX */ + VM_ASSERT_EMT(env->pVM); + ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request, mask); +#endif /* VBOX */ + +#ifndef VBOX +#ifndef CONFIG_USER_ONLY + /* + * If called from iothread context, wake the target cpu in + * case its halted. + */ + if (!qemu_cpu_self(env)) { + qemu_cpu_kick(env); + return; + } +#endif +#endif /* !VBOX */ + + if (use_icount) { + env->icount_decr.u16.high = 0xffff; +#ifndef CONFIG_USER_ONLY + if (!can_do_io(env) + && (mask & ~old_mask) != 0) { + cpu_abort(env, "Raised interrupt while not in I/O function"); + } +#endif + } else { + cpu_unlink_tb(env); + } +} + +void cpu_reset_interrupt(CPUState *env, int mask) +{ +#ifdef VBOX + /* + * Note: the current implementation can be executed by another thread without problems; make sure this remains true + * for future changes! + */ + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~mask); +#else /* !VBOX */ + env->interrupt_request &= ~mask; +#endif /* !VBOX */ +} + +void cpu_exit(CPUState *env) +{ + env->exit_request = 1; + cpu_unlink_tb(env); +} + +#ifndef VBOX +const CPULogItem cpu_log_items[] = { + { CPU_LOG_TB_OUT_ASM, "out_asm", + "show generated host assembly code for each compiled TB" }, + { CPU_LOG_TB_IN_ASM, "in_asm", + "show target assembly code for each compiled TB" }, + { CPU_LOG_TB_OP, "op", + "show micro ops for each compiled TB" }, + { CPU_LOG_TB_OP_OPT, "op_opt", + "show micro ops " +#ifdef TARGET_I386 + "before eflags optimization and " +#endif + "after liveness analysis" }, + { CPU_LOG_INT, "int", + "show interrupts/exceptions in short format" }, + { CPU_LOG_EXEC, "exec", + "show trace before each executed TB (lots of logs)" }, + { CPU_LOG_TB_CPU, "cpu", + "show CPU state before block translation" }, +#ifdef TARGET_I386 + { CPU_LOG_PCALL, "pcall", + "show protected mode far calls/returns/exceptions" }, + { CPU_LOG_RESET, "cpu_reset", + "show CPU state before CPU resets" }, +#endif +#ifdef DEBUG_IOPORT + { CPU_LOG_IOPORT, "ioport", + "show all i/o ports accesses" }, +#endif + { 0, NULL, NULL }, +}; + +#ifndef CONFIG_USER_ONLY +static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list + = QLIST_HEAD_INITIALIZER(memory_client_list); + +static void cpu_notify_set_memory(target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset) +{ + CPUPhysMemoryClient *client; + QLIST_FOREACH(client, &memory_client_list, list) { + client->set_memory(client, start_addr, size, phys_offset); + } +} + +static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start, + target_phys_addr_t end) +{ + CPUPhysMemoryClient *client; + QLIST_FOREACH(client, &memory_client_list, list) { + int r = client->sync_dirty_bitmap(client, start, end); + if (r < 0) + return r; + } + return 0; +} + +static int cpu_notify_migration_log(int enable) +{ + CPUPhysMemoryClient *client; + QLIST_FOREACH(client, &memory_client_list, list) { + int r = client->migration_log(client, enable); + if (r < 0) + return r; + } + return 0; +} + +static void phys_page_for_each_1(CPUPhysMemoryClient *client, + int level, void **lp) +{ + int i; + + if (*lp == NULL) { + return; + } + if (level == 0) { + PhysPageDesc *pd = *lp; + for (i = 0; i < L2_SIZE; ++i) { + if (pd[i].phys_offset != IO_MEM_UNASSIGNED) { + client->set_memory(client, pd[i].region_offset, + TARGET_PAGE_SIZE, pd[i].phys_offset); + } + } + } else { + void **pp = *lp; + for (i = 0; i < L2_SIZE; ++i) { + phys_page_for_each_1(client, level - 1, pp + i); + } + } +} + +static void phys_page_for_each(CPUPhysMemoryClient *client) +{ + int i; + for (i = 0; i < P_L1_SIZE; ++i) { + phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1, + l1_phys_map + 1); + } +} + +void cpu_register_phys_memory_client(CPUPhysMemoryClient *client) +{ + QLIST_INSERT_HEAD(&memory_client_list, client, list); + phys_page_for_each(client); +} + +void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client) +{ + QLIST_REMOVE(client, list); +} +#endif + +static int cmp1(const char *s1, int n, const char *s2) +{ + if (strlen(s2) != n) + return 0; + return memcmp(s1, s2, n) == 0; +} + +/* takes a comma separated list of log masks. Return 0 if error. */ +int cpu_str_to_log_mask(const char *str) +{ + const CPULogItem *item; + int mask; + const char *p, *p1; + + p = str; + mask = 0; + for(;;) { + p1 = strchr(p, ','); + if (!p1) + p1 = p + strlen(p); + if(cmp1(p,p1-p,"all")) { + for(item = cpu_log_items; item->mask != 0; item++) { + mask |= item->mask; + } + } else { + for(item = cpu_log_items; item->mask != 0; item++) { + if (cmp1(p, p1 - p, item->name)) + goto found; + } + return 0; + } + found: + mask |= item->mask; + if (*p1 != ',') + break; + p = p1 + 1; + } + return mask; +} + +void cpu_abort(CPUState *env, const char *fmt, ...) +{ + va_list ap; + va_list ap2; + + va_start(ap, fmt); + va_copy(ap2, ap); + fprintf(stderr, "qemu: fatal: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); +#ifdef TARGET_I386 + cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP); +#else + cpu_dump_state(env, stderr, fprintf, 0); +#endif + if (qemu_log_enabled()) { + qemu_log("qemu: fatal: "); + qemu_log_vprintf(fmt, ap2); + qemu_log("\n"); +#ifdef TARGET_I386 + log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP); +#else + log_cpu_state(env, 0); +#endif + qemu_log_flush(); + qemu_log_close(); + } + va_end(ap2); + va_end(ap); +#if defined(CONFIG_USER_ONLY) + { + struct sigaction act; + sigfillset(&act.sa_mask); + act.sa_handler = SIG_DFL; + sigaction(SIGABRT, &act, NULL); + } +#endif + abort(); +} + +CPUState *cpu_copy(CPUState *env) +{ + CPUState *new_env = cpu_init(env->cpu_model_str); + CPUState *next_cpu = new_env->next_cpu; + int cpu_index = new_env->cpu_index; +#if defined(TARGET_HAS_ICE) + CPUBreakpoint *bp; + CPUWatchpoint *wp; +#endif + + memcpy(new_env, env, sizeof(CPUState)); + + /* Preserve chaining and index. */ + new_env->next_cpu = next_cpu; + new_env->cpu_index = cpu_index; + + /* Clone all break/watchpoints. + Note: Once we support ptrace with hw-debug register access, make sure + BP_CPU break/watchpoints are handled correctly on clone. */ + QTAILQ_INIT(&env->breakpoints); + QTAILQ_INIT(&env->watchpoints); +#if defined(TARGET_HAS_ICE) + QTAILQ_FOREACH(bp, &env->breakpoints, entry) { + cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL); + } + QTAILQ_FOREACH(wp, &env->watchpoints, entry) { + cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1, + wp->flags, NULL); + } +#endif + + return new_env; +} + +#endif /* !VBOX */ +#if !defined(CONFIG_USER_ONLY) + +static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr) +{ + unsigned int i; + + /* Discard jump cache entries for any tb which might potentially + overlap the flushed page. */ + i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); + memset (&env->tb_jmp_cache[i], 0, + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + + i = tb_jmp_cache_hash_page(addr); + memset (&env->tb_jmp_cache[i], 0, + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); +#ifdef VBOX + + /* inform raw mode about TLB page flush */ + remR3FlushPage(env, addr); +#endif /* VBOX */ +} + +static CPUTLBEntry s_cputlb_empty_entry = { + .addr_read = -1, + .addr_write = -1, + .addr_code = -1, + .addend = -1, +}; + +/* NOTE: if flush_global is true, also flush global entries (not + implemented yet) */ +void tlb_flush(CPUState *env, int flush_global) +{ + int i; + +#ifdef VBOX + Assert(EMRemIsLockOwner(env->pVM)); + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_EXTERNAL_FLUSH_TLB); +#endif + +#if defined(DEBUG_TLB) + printf("tlb_flush:\n"); +#endif + /* must reset current TB so that interrupts cannot modify the + links while we are modifying them */ + env->current_tb = NULL; + + for(i = 0; i < CPU_TLB_SIZE; i++) { + int mmu_idx; + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry; + } + } + + memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *)); + + env->tlb_flush_addr = -1; + env->tlb_flush_mask = 0; + tlb_flush_count++; +#ifdef VBOX + + /* inform raw mode about TLB flush */ + remR3FlushTLB(env, flush_global); +#endif /* VBOX */ +} + +static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr) +{ + if (addr == (tlb_entry->addr_read & + (TARGET_PAGE_MASK | TLB_INVALID_MASK)) || + addr == (tlb_entry->addr_write & + (TARGET_PAGE_MASK | TLB_INVALID_MASK)) || + addr == (tlb_entry->addr_code & + (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + *tlb_entry = s_cputlb_empty_entry; + } +} + +void tlb_flush_page(CPUState *env, target_ulong addr) +{ + int i; + int mmu_idx; + + Assert(EMRemIsLockOwner(env->pVM)); +#if defined(DEBUG_TLB) + printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr); +#endif + /* Check if we need to flush due to large pages. */ + if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) { +#if defined(DEBUG_TLB) + printf("tlb_flush_page: forced full flush (" + TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", + env->tlb_flush_addr, env->tlb_flush_mask); +#endif + tlb_flush(env, 1); + return; + } + /* must reset current TB so that interrupts cannot modify the + links while we are modifying them */ + env->current_tb = NULL; + + addr &= TARGET_PAGE_MASK; + i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) + tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); + + tlb_flush_jmp_cache(env, addr); +} + +/* update the TLBs so that writes to code in the virtual page 'addr' + can be detected */ +static void tlb_protect_code(ram_addr_t ram_addr) +{ + cpu_physical_memory_reset_dirty(ram_addr, + ram_addr + TARGET_PAGE_SIZE, + CODE_DIRTY_FLAG); +#if defined(VBOX) && defined(REM_MONITOR_CODE_PAGES) + /** @todo Retest this? This function has changed... */ + remR3ProtectCode(cpu_single_env, ram_addr); +#endif /* VBOX */ +} + +/* update the TLB so that writes in physical page 'phys_addr' are no longer + tested for self modifying code */ +static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr, + target_ulong vaddr) +{ + cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG); +} + +static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, + uintptr_t start, uintptr_t length) +{ + uintptr_t addr; +#ifdef VBOX + + if (start & 3) + return; +#endif /* VBOX */ + if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) { + addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend; + if ((addr - start) < length) { + tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY; + } + } +} + +/* Note: start and end must be within the same ram block. */ +void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, + int dirty_flags) +{ + CPUState *env; + uintptr_t length, start1; + int i; + + start &= TARGET_PAGE_MASK; + end = TARGET_PAGE_ALIGN(end); + + length = end - start; + if (length == 0) + return; + cpu_physical_memory_mask_dirty_range(start, length, dirty_flags); + + /* we modify the TLB cache so that the dirty bit will be set again + when accessing the range */ +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) + start1 = start; +#elif !defined(VBOX) + start1 = (uintptr_t)qemu_get_ram_ptr(start); + /* Chek that we don't span multiple blocks - this breaks the + address comparisons below. */ + if ((uintptr_t)qemu_get_ram_ptr(end - 1) - start1 + != (end - 1) - start) { + abort(); + } +#else + start1 = (uintptr_t)remR3TlbGCPhys2Ptr(first_cpu, start, 1 /*fWritable*/); /** @todo page replacing (sharing or read only) may cause trouble, fix interface/whatever. */ +#endif + + for(env = first_cpu; env != NULL; env = env->next_cpu) { + int mmu_idx; + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + for(i = 0; i < CPU_TLB_SIZE; i++) + tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i], + start1, length); + } + } +} + +#ifndef VBOX + +int cpu_physical_memory_set_dirty_tracking(int enable) +{ + int ret = 0; + in_migration = enable; + ret = cpu_notify_migration_log(!!enable); + return ret; +} + +int cpu_physical_memory_get_dirty_tracking(void) +{ + return in_migration; +} + +#endif /* !VBOX */ + +int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, + target_phys_addr_t end_addr) +{ +#ifndef VBOX + int ret; + + ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr); + return ret; +#else /* VBOX */ + return 0; +#endif /* VBOX */ +} + +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) +DECLINLINE(void) tlb_update_dirty(CPUTLBEntry *tlb_entry, target_phys_addr_t phys_addend) +#else +static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry) +#endif +{ + ram_addr_t ram_addr; +#ifndef VBOX + void *p; +#endif + + if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) { +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) + ram_addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend; +#elif !defined(VBOX) + p = (void *)(uintptr_t)((tlb_entry->addr_write & TARGET_PAGE_MASK) + + tlb_entry->addend); + ram_addr = qemu_ram_addr_from_host(p); +#else + Assert(phys_addend != -1); + ram_addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + phys_addend; +#endif + if (!cpu_physical_memory_is_dirty(ram_addr)) { + tlb_entry->addr_write |= TLB_NOTDIRTY; + } + } +} + +/* update the TLB according to the current state of the dirty bits */ +void cpu_tlb_update_dirty(CPUState *env) +{ + int i; + int mmu_idx; + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + for(i = 0; i < CPU_TLB_SIZE; i++) +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) + tlb_update_dirty(&env->tlb_table[mmu_idx][i], env->phys_addends[mmu_idx][i]); +#else + tlb_update_dirty(&env->tlb_table[mmu_idx][i]); +#endif + } +} + +static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) +{ + if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) + tlb_entry->addr_write = vaddr; +} + +/* update the TLB corresponding to virtual page vaddr + so that it is no longer dirty */ +static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr) +{ + int i; + int mmu_idx; + + vaddr &= TARGET_PAGE_MASK; + i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) + tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); +} + +/* Our TLB does not support large pages, so remember the area covered by + large pages and trigger a full TLB flush if these are invalidated. */ +static void tlb_add_large_page(CPUState *env, target_ulong vaddr, + target_ulong size) +{ + target_ulong mask = ~(size - 1); + + if (env->tlb_flush_addr == (target_ulong)-1) { + env->tlb_flush_addr = vaddr & mask; + env->tlb_flush_mask = mask; + return; + } + /* Extend the existing region to include the new page. + This is a compromise between unnecessary flushes and the cost + of maintaining a full variable size TLB. */ + mask &= env->tlb_flush_mask; + while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) { + mask <<= 1; + } + env->tlb_flush_addr &= mask; + env->tlb_flush_mask = mask; +} + +/* Add a new TLB entry. At most one entry for a given virtual address + is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the + supplied size is only used by tlb_flush_page. */ +void tlb_set_page(CPUState *env, target_ulong vaddr, + target_phys_addr_t paddr, int prot, + int mmu_idx, target_ulong size) +{ + PhysPageDesc *p; + ram_addr_t pd; + unsigned int index; + target_ulong address; + target_ulong code_address; + uintptr_t addend; + CPUTLBEntry *te; + CPUWatchpoint *wp; + target_phys_addr_t iotlb; +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) + int read_mods = 0, write_mods = 0, code_mods = 0; +#endif + + assert(size >= TARGET_PAGE_SIZE); + if (size != TARGET_PAGE_SIZE) { + tlb_add_large_page(env, vaddr, size); + } + p = phys_page_find(paddr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } +#if defined(DEBUG_TLB) + printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x idx=%d size=" TARGET_FMT_lx " pd=0x%08lx\n", + vaddr, (int)paddr, prot, mmu_idx, size, (long)pd); +#endif + + address = vaddr; + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) { + /* IO memory case (romd handled later) */ + address |= TLB_MMIO; + } +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) + addend = pd & TARGET_PAGE_MASK; +#elif !defined(VBOX) + addend = (uintptr_t)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK); +#else + /** @todo this is racing the phys_page_find call above since it may register + * a new chunk of memory... */ + addend = (uintptr_t)remR3TlbGCPhys2Ptr(env, pd & TARGET_PAGE_MASK, !!(prot & PAGE_WRITE)); +#endif + + if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) { + /* Normal RAM. */ + iotlb = pd & TARGET_PAGE_MASK; + if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM) + iotlb |= IO_MEM_NOTDIRTY; + else + iotlb |= IO_MEM_ROM; + } else { + /* IO handlers are currently passed a physical address. + It would be nice to pass an offset from the base address + of that region. This would avoid having to special case RAM, + and avoid full address decoding in every device. + We can't use the high bits of pd for this because + IO_MEM_ROMD uses these as a ram address. */ + iotlb = (pd & ~TARGET_PAGE_MASK); + if (p) { + iotlb += p->region_offset; + } else { + iotlb += paddr; + } + } + + code_address = address; +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) + + if (addend & 0x3) + { + if (addend & 0x2) + { + /* catch write */ + if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) + write_mods |= TLB_MMIO; + } + else if (addend & 0x1) + { + /* catch all */ + if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) + { + read_mods |= TLB_MMIO; + write_mods |= TLB_MMIO; + code_mods |= TLB_MMIO; + } + } + if ((iotlb & ~TARGET_PAGE_MASK) == 0) + iotlb = env->pVM->rem.s.iHandlerMemType + paddr; + addend &= ~(target_ulong)0x3; + } + +#endif + /* Make accesses to pages with watchpoints go via the + watchpoint trap routines. */ + QTAILQ_FOREACH(wp, &env->watchpoints, entry) { + if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) { + /* Avoid trapping reads of pages with a write breakpoint. */ + if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) { + iotlb = io_mem_watch + paddr; + address |= TLB_MMIO; + break; + } + } + } + + index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + env->iotlb[mmu_idx][index] = iotlb - vaddr; + te = &env->tlb_table[mmu_idx][index]; + te->addend = addend - vaddr; + if (prot & PAGE_READ) { + te->addr_read = address; + } else { + te->addr_read = -1; + } + + if (prot & PAGE_EXEC) { + te->addr_code = code_address; + } else { + te->addr_code = -1; + } + if (prot & PAGE_WRITE) { + if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM || + (pd & IO_MEM_ROMD)) { + /* Write access calls the I/O callback. */ + te->addr_write = address | TLB_MMIO; + } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM && + !cpu_physical_memory_is_dirty(pd)) { + te->addr_write = address | TLB_NOTDIRTY; + } else { + te->addr_write = address; + } + } else { + te->addr_write = -1; + } + +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) + if (prot & PAGE_READ) + te->addr_read |= read_mods; + if (prot & PAGE_EXEC) + te->addr_code |= code_mods; + if (prot & PAGE_WRITE) + te->addr_write |= write_mods; + + env->phys_addends[mmu_idx][index] = (pd & TARGET_PAGE_MASK)- vaddr; +#endif + +#ifdef VBOX + /* inform raw mode about TLB page change */ + remR3FlushPage(env, vaddr); +#endif +} + +#else + +void tlb_flush(CPUState *env, int flush_global) +{ +} + +void tlb_flush_page(CPUState *env, target_ulong addr) +{ +} + +/* + * Walks guest process memory "regions" one by one + * and calls callback function 'fn' for each region. + */ + +struct walk_memory_regions_data +{ + walk_memory_regions_fn fn; + void *priv; + uintptr_t start; + int prot; +}; + +static int walk_memory_regions_end(struct walk_memory_regions_data *data, + abi_ulong end, int new_prot) +{ + if (data->start != -1ul) { + int rc = data->fn(data->priv, data->start, end, data->prot); + if (rc != 0) { + return rc; + } + } + + data->start = (new_prot ? end : -1ul); + data->prot = new_prot; + + return 0; +} + +static int walk_memory_regions_1(struct walk_memory_regions_data *data, + abi_ulong base, int level, void **lp) +{ + abi_ulong pa; + int i, rc; + + if (*lp == NULL) { + return walk_memory_regions_end(data, base, 0); + } + + if (level == 0) { + PageDesc *pd = *lp; + for (i = 0; i < L2_SIZE; ++i) { + int prot = pd[i].flags; + + pa = base | (i << TARGET_PAGE_BITS); + if (prot != data->prot) { + rc = walk_memory_regions_end(data, pa, prot); + if (rc != 0) { + return rc; + } + } + } + } else { + void **pp = *lp; + for (i = 0; i < L2_SIZE; ++i) { + pa = base | ((abi_ulong)i << + (TARGET_PAGE_BITS + L2_BITS * level)); + rc = walk_memory_regions_1(data, pa, level - 1, pp + i); + if (rc != 0) { + return rc; + } + } + } + + return 0; +} + +int walk_memory_regions(void *priv, walk_memory_regions_fn fn) +{ + struct walk_memory_regions_data data; + target_ulong i; + + data.fn = fn; + data.priv = priv; + data.start = -1ul; + data.prot = 0; + + for (i = 0; i < V_L1_SIZE; i++) { + int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT, + V_L1_SHIFT / L2_BITS - 1, l1_map + i); + if (rc != 0) { + return rc; + } + } + + return walk_memory_regions_end(&data, 0, 0); +} + +static int dump_region(void *priv, abi_ulong start, + abi_ulong end, unsigned long prot) +{ + FILE *f = (FILE *)priv; + + (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx + " "TARGET_ABI_FMT_lx" %c%c%c\n", + start, end, end - start, + ((prot & PAGE_READ) ? 'r' : '-'), + ((prot & PAGE_WRITE) ? 'w' : '-'), + ((prot & PAGE_EXEC) ? 'x' : '-')); + + return (0); +} + +/* dump memory mappings */ +void page_dump(FILE *f) +{ + (void) fprintf(f, "%-8s %-8s %-8s %s\n", + "start", "end", "size", "prot"); + walk_memory_regions(f, dump_region); +} + +int page_get_flags(target_ulong address) +{ + PageDesc *p; + + p = page_find(address >> TARGET_PAGE_BITS); + if (!p) + return 0; + return p->flags; +} + +/* Modify the flags of a page and invalidate the code if necessary. + The flag PAGE_WRITE_ORG is positioned automatically depending + on PAGE_WRITE. The mmap_lock should already be held. */ +void page_set_flags(target_ulong start, target_ulong end, int flags) +{ + target_ulong addr, len; + + /* This function should never be called with addresses outside the + guest address space. If this assert fires, it probably indicates + a missing call to h2g_valid. */ +#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS + assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); +#endif + assert(start < end); + + start = start & TARGET_PAGE_MASK; + end = TARGET_PAGE_ALIGN(end); + + if (flags & PAGE_WRITE) { + flags |= PAGE_WRITE_ORG; + } + +#ifdef VBOX + AssertMsgFailed(("We shouldn't be here, and if we should, we must have an env to do the proper locking!\n")); +#endif + for (addr = start, len = end - start; + len != 0; + len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { + PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1); + + /* If the write protection bit is set, then we invalidate + the code inside. */ + if (!(p->flags & PAGE_WRITE) && + (flags & PAGE_WRITE) && + p->first_tb) { + tb_invalidate_phys_page(addr, 0, NULL); + } + p->flags = flags; + } +} + +int page_check_range(target_ulong start, target_ulong len, int flags) +{ + PageDesc *p; + target_ulong end; + target_ulong addr; + + /* This function should never be called with addresses outside the + guest address space. If this assert fires, it probably indicates + a missing call to h2g_valid. */ +#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS + assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); +#endif + + if (len == 0) { + return 0; + } + if (start + len - 1 < start) { + /* We've wrapped around. */ + return -1; + } + + end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */ + start = start & TARGET_PAGE_MASK; + + for (addr = start, len = end - start; + len != 0; + len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { + p = page_find(addr >> TARGET_PAGE_BITS); + if( !p ) + return -1; + if( !(p->flags & PAGE_VALID) ) + return -1; + + if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) + return -1; + if (flags & PAGE_WRITE) { + if (!(p->flags & PAGE_WRITE_ORG)) + return -1; + /* unprotect the page if it was put read-only because it + contains translated code */ + if (!(p->flags & PAGE_WRITE)) { + if (!page_unprotect(addr, 0, NULL)) + return -1; + } + return 0; + } + } + return 0; +} + +/* called from signal handler: invalidate the code and unprotect the + page. Return TRUE if the fault was successfully handled. */ +int page_unprotect(target_ulong address, uintptr_t pc, void *puc) +{ + unsigned int prot; + PageDesc *p; + target_ulong host_start, host_end, addr; + + /* Technically this isn't safe inside a signal handler. However we + know this only ever happens in a synchronous SEGV handler, so in + practice it seems to be ok. */ + mmap_lock(); + + p = page_find(address >> TARGET_PAGE_BITS); + if (!p) { + mmap_unlock(); + return 0; + } + + /* if the page was really writable, then we change its + protection back to writable */ + if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) { + host_start = address & qemu_host_page_mask; + host_end = host_start + qemu_host_page_size; + + prot = 0; + for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) { + p = page_find(addr >> TARGET_PAGE_BITS); + p->flags |= PAGE_WRITE; + prot |= p->flags; + + /* and since the content will be modified, we must invalidate + the corresponding translated code. */ + tb_invalidate_phys_page(addr, pc, puc); +#ifdef DEBUG_TB_CHECK + tb_invalidate_check(addr); +#endif + } + mprotect((void *)g2h(host_start), qemu_host_page_size, + prot & PAGE_BITS); + + mmap_unlock(); + return 1; + } + mmap_unlock(); + return 0; +} + +static inline void tlb_set_dirty(CPUState *env, + uintptr_t addr, target_ulong vaddr) +{ +} +#endif /* defined(CONFIG_USER_ONLY) */ + +#if !defined(CONFIG_USER_ONLY) + +#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK) +typedef struct subpage_t { + target_phys_addr_t base; + ram_addr_t sub_io_index[TARGET_PAGE_SIZE]; + ram_addr_t region_offset[TARGET_PAGE_SIZE]; +} subpage_t; + +static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, + ram_addr_t memory, ram_addr_t region_offset); +static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys, + ram_addr_t orig_memory, + ram_addr_t region_offset); +#define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \ + need_subpage) \ + do { \ + if (addr > start_addr) \ + start_addr2 = 0; \ + else { \ + start_addr2 = start_addr & ~TARGET_PAGE_MASK; \ + if (start_addr2 > 0) \ + need_subpage = 1; \ + } \ + \ + if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \ + end_addr2 = TARGET_PAGE_SIZE - 1; \ + else { \ + end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \ + if (end_addr2 < TARGET_PAGE_SIZE - 1) \ + need_subpage = 1; \ + } \ + } while (0) + +/* register physical memory. + For RAM, 'size' must be a multiple of the target page size. + If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an + io memory page. The address used when calling the IO function is + the offset from the start of the region, plus region_offset. Both + start_addr and region_offset are rounded down to a page boundary + before calculating this offset. This should not be a problem unless + the low bits of start_addr and region_offset differ. */ +void cpu_register_physical_memory_offset(target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset, + ram_addr_t region_offset) +{ + target_phys_addr_t addr, end_addr; + PhysPageDesc *p; + CPUState *env; + ram_addr_t orig_size = size; + subpage_t *subpage; + +#ifndef VBOX + cpu_notify_set_memory(start_addr, size, phys_offset); +#endif /* !VBOX */ + + if (phys_offset == IO_MEM_UNASSIGNED) { + region_offset = start_addr; + } + region_offset &= TARGET_PAGE_MASK; + size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK; + end_addr = start_addr + (target_phys_addr_t)size; + for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) { + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (p && p->phys_offset != IO_MEM_UNASSIGNED) { + ram_addr_t orig_memory = p->phys_offset; + target_phys_addr_t start_addr2, end_addr2; + int need_subpage = 0; + + CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, + need_subpage); + if (need_subpage) { + if (!(orig_memory & IO_MEM_SUBPAGE)) { + subpage = subpage_init((addr & TARGET_PAGE_MASK), + &p->phys_offset, orig_memory, + p->region_offset); + } else { + subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK) + >> IO_MEM_SHIFT]; + } + subpage_register(subpage, start_addr2, end_addr2, phys_offset, + region_offset); + p->region_offset = 0; + } else { + p->phys_offset = phys_offset; + if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM || + (phys_offset & IO_MEM_ROMD)) + phys_offset += TARGET_PAGE_SIZE; + } + } else { + p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1); + p->phys_offset = phys_offset; + p->region_offset = region_offset; + if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM || + (phys_offset & IO_MEM_ROMD)) { + phys_offset += TARGET_PAGE_SIZE; + } else { + target_phys_addr_t start_addr2, end_addr2; + int need_subpage = 0; + + CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, + end_addr2, need_subpage); + + if (need_subpage) { + subpage = subpage_init((addr & TARGET_PAGE_MASK), + &p->phys_offset, IO_MEM_UNASSIGNED, + addr & TARGET_PAGE_MASK); + subpage_register(subpage, start_addr2, end_addr2, + phys_offset, region_offset); + p->region_offset = 0; + } + } + } + region_offset += TARGET_PAGE_SIZE; + } + + /* since each CPU stores ram addresses in its TLB cache, we must + reset the modified entries */ +#ifndef VBOX + /* XXX: slow ! */ + for(env = first_cpu; env != NULL; env = env->next_cpu) { + tlb_flush(env, 1); + } +#else + /* We have one thread per CPU, so, one of the other EMTs might be executing + code right now and flushing the TLB may crash it. */ + env = first_cpu; + if (EMRemIsLockOwner(env->pVM)) + tlb_flush(env, 1); + else + ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request, + CPU_INTERRUPT_EXTERNAL_FLUSH_TLB); +#endif +} + +/* XXX: temporary until new memory mapping API */ +ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr) +{ + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) + return IO_MEM_UNASSIGNED; + return p->phys_offset; +} + +#ifndef VBOX + +void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size) +{ + if (kvm_enabled()) + kvm_coalesce_mmio_region(addr, size); +} + +void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size) +{ + if (kvm_enabled()) + kvm_uncoalesce_mmio_region(addr, size); +} + +void qemu_flush_coalesced_mmio_buffer(void) +{ + if (kvm_enabled()) + kvm_flush_coalesced_mmio_buffer(); +} + +#if defined(__linux__) && !defined(TARGET_S390X) + +#include <sys/vfs.h> + +#define HUGETLBFS_MAGIC 0x958458f6 + +static size_t gethugepagesize(const char *path) +{ + struct statfs fs; + int ret; + + do { + ret = statfs(path, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret != 0) { + perror(path); + return 0; + } + + if (fs.f_type != HUGETLBFS_MAGIC) + fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); + + return (size_t)fs.f_bsize; +} + +static void *file_ram_alloc(RAMBlock *block, + ram_addr_t memory, + const char *path) +{ + char *filename; + void *area; + int fd; +#ifdef MAP_POPULATE + int flags; +#endif + size_t hpagesize; + + hpagesize = gethugepagesize(path); + if (!hpagesize) { + return NULL; + } + + if (memory < hpagesize) { + return NULL; + } + + if (kvm_enabled() && !kvm_has_sync_mmu()) { + fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n"); + return NULL; + } + + if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) { + return NULL; + } + + fd = mkstemp(filename); + if (fd < 0) { + perror("unable to create backing store for hugepages"); + free(filename); + return NULL; + } + unlink(filename); + free(filename); + + memory = (memory+hpagesize-1) & ~(hpagesize-1); + + /* + * ftruncate is not supported by hugetlbfs in older + * hosts, so don't bother bailing out on errors. + * If anything goes wrong with it under other filesystems, + * mmap will fail. + */ + if (ftruncate(fd, memory)) + perror("ftruncate"); + +#ifdef MAP_POPULATE + /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case + * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED + * to sidestep this quirk. + */ + flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE; + area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0); +#else + area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); +#endif + if (area == MAP_FAILED) { + perror("file_ram_alloc: can't mmap RAM pages"); + close(fd); + return (NULL); + } + block->fd = fd; + return area; +} +#endif + +static ram_addr_t find_ram_offset(ram_addr_t size) +{ + RAMBlock *block, *next_block; + ram_addr_t offset = 0, mingap = ULONG_MAX; + + if (QLIST_EMPTY(&ram_list.blocks)) + return 0; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + ram_addr_t end, next = ULONG_MAX; + + end = block->offset + block->length; + + QLIST_FOREACH(next_block, &ram_list.blocks, next) { + if (next_block->offset >= end) { + next = MIN(next, next_block->offset); + } + } + if (next - end >= size && next - end < mingap) { + offset = end; + mingap = next - end; + } + } + return offset; +} + +static ram_addr_t last_ram_offset(void) +{ + RAMBlock *block; + ram_addr_t last = 0; + + QLIST_FOREACH(block, &ram_list.blocks, next) + last = MAX(last, block->offset + block->length); + + return last; +} + +ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, + ram_addr_t size, void *host) +{ + RAMBlock *new_block, *block; + + size = TARGET_PAGE_ALIGN(size); + new_block = qemu_mallocz(sizeof(*new_block)); + + if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) { + char *id = dev->parent_bus->info->get_dev_path(dev); + if (id) { + snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id); + qemu_free(id); + } + } + pstrcat(new_block->idstr, sizeof(new_block->idstr), name); + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (!strcmp(block->idstr, new_block->idstr)) { + fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", + new_block->idstr); + abort(); + } + } + + new_block->host = host; + + new_block->offset = find_ram_offset(size); + new_block->length = size; + + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + + ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, + last_ram_offset() >> TARGET_PAGE_BITS); + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), + 0xff, size >> TARGET_PAGE_BITS); + + if (kvm_enabled()) + kvm_setup_guest_memory(new_block->host, size); + + return new_block->offset; +} + +ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size) +{ + RAMBlock *new_block, *block; + + size = TARGET_PAGE_ALIGN(size); + new_block = qemu_mallocz(sizeof(*new_block)); + + if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) { + char *id = dev->parent_bus->info->get_dev_path(dev); + if (id) { + snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id); + qemu_free(id); + } + } + pstrcat(new_block->idstr, sizeof(new_block->idstr), name); + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (!strcmp(block->idstr, new_block->idstr)) { + fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", + new_block->idstr); + abort(); + } + } + + if (mem_path) { +#if defined (__linux__) && !defined(TARGET_S390X) + new_block->host = file_ram_alloc(new_block, size, mem_path); + if (!new_block->host) { + new_block->host = qemu_vmalloc(size); +#ifdef MADV_MERGEABLE + madvise(new_block->host, size, MADV_MERGEABLE); +#endif + } +#else + fprintf(stderr, "-mem-path option unsupported\n"); + exit(1); +#endif + } else { +#if defined(TARGET_S390X) && defined(CONFIG_KVM) + /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */ + new_block->host = mmap((void*)0x1000000, size, + PROT_EXEC|PROT_READ|PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); +#else + new_block->host = qemu_vmalloc(size); +#endif +#ifdef MADV_MERGEABLE + madvise(new_block->host, size, MADV_MERGEABLE); +#endif + } + new_block->offset = find_ram_offset(size); + new_block->length = size; + + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + + ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, + last_ram_offset() >> TARGET_PAGE_BITS); + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), + 0xff, size >> TARGET_PAGE_BITS); + + if (kvm_enabled()) + kvm_setup_guest_memory(new_block->host, size); + + return new_block->offset; +} + +void qemu_ram_free(ram_addr_t addr) +{ + RAMBlock *block; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (addr == block->offset) { + QLIST_REMOVE(block, next); + if (mem_path) { +#if defined (__linux__) && !defined(TARGET_S390X) + if (block->fd) { + munmap(block->host, block->length); + close(block->fd); + } else { + qemu_vfree(block->host); + } +#endif + } else { +#if defined(TARGET_S390X) && defined(CONFIG_KVM) + munmap(block->host, block->length); +#else + qemu_vfree(block->host); +#endif + } + qemu_free(block); + return; + } + } + +} + +/* Return a host pointer to ram allocated with qemu_ram_alloc. + With the exception of the softmmu code in this file, this should + only be used for local memory (e.g. video ram) that the device owns, + and knows it isn't going to access beyond the end of the block. + + It should not be used for general purpose DMA. + Use cpu_physical_memory_map/cpu_physical_memory_rw instead. + */ +void *qemu_get_ram_ptr(ram_addr_t addr) +{ + RAMBlock *block; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (addr - block->offset < block->length) { + QLIST_REMOVE(block, next); + QLIST_INSERT_HEAD(&ram_list.blocks, block, next); + return block->host + (addr - block->offset); + } + } + + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + + return NULL; +} + +/* Some of the softmmu routines need to translate from a host pointer + (typically a TLB entry) back to a ram offset. */ +ram_addr_t qemu_ram_addr_from_host(void *ptr) +{ + RAMBlock *block; + uint8_t *host = ptr; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (host - block->host < block->length) { + return block->offset + (host - block->host); + } + } + + fprintf(stderr, "Bad ram pointer %p\n", ptr); + abort(); + + return 0; +} + +#endif /* !VBOX */ + +static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem read " TARGET_FMT_plx "\n", addr); +#endif +#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE) + do_unassigned_access(addr, 0, 0, 0, 1); +#endif + return 0; +} + +static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem read " TARGET_FMT_plx "\n", addr); +#endif +#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE) + do_unassigned_access(addr, 0, 0, 0, 2); +#endif + return 0; +} + +static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem read " TARGET_FMT_plx "\n", addr); +#endif +#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE) + do_unassigned_access(addr, 0, 0, 0, 4); +#endif + return 0; +} + +static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val); +#endif +#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE) + do_unassigned_access(addr, 1, 0, 0, 1); +#endif +} + +static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val); +#endif +#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE) + do_unassigned_access(addr, 1, 0, 0, 2); +#endif +} + +static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val); +#endif +#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE) + do_unassigned_access(addr, 1, 0, 0, 4); +#endif +} + +static CPUReadMemoryFunc * const unassigned_mem_read[3] = { + unassigned_mem_readb, + unassigned_mem_readw, + unassigned_mem_readl, +}; + +static CPUWriteMemoryFunc * const unassigned_mem_write[3] = { + unassigned_mem_writeb, + unassigned_mem_writew, + unassigned_mem_writel, +}; + +static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr, + uint32_t val) +{ + int dirty_flags; + dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); + if (!(dirty_flags & CODE_DIRTY_FLAG)) { +#if !defined(CONFIG_USER_ONLY) + tb_invalidate_phys_page_fast(ram_addr, 1); + dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); +#endif + } +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) + remR3PhysWriteU8(ram_addr, val); +#else + stb_p(qemu_get_ram_ptr(ram_addr), val); +#endif + dirty_flags |= (0xff & ~CODE_DIRTY_FLAG); + cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags); + /* we remove the notdirty callback only if the code has been + flushed */ + if (dirty_flags == 0xff) + tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr); +} + +static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr, + uint32_t val) +{ + int dirty_flags; + dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); + if (!(dirty_flags & CODE_DIRTY_FLAG)) { +#if !defined(CONFIG_USER_ONLY) + tb_invalidate_phys_page_fast(ram_addr, 2); + dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); +#endif + } +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) + remR3PhysWriteU16(ram_addr, val); +#else + stw_p(qemu_get_ram_ptr(ram_addr), val); +#endif + dirty_flags |= (0xff & ~CODE_DIRTY_FLAG); + cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags); + /* we remove the notdirty callback only if the code has been + flushed */ + if (dirty_flags == 0xff) + tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr); +} + +static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr, + uint32_t val) +{ + int dirty_flags; + dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); + if (!(dirty_flags & CODE_DIRTY_FLAG)) { +#if !defined(CONFIG_USER_ONLY) + tb_invalidate_phys_page_fast(ram_addr, 4); + dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); +#endif + } +#if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB) + remR3PhysWriteU32(ram_addr, val); +#else + stl_p(qemu_get_ram_ptr(ram_addr), val); +#endif + dirty_flags |= (0xff & ~CODE_DIRTY_FLAG); + cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags); + /* we remove the notdirty callback only if the code has been + flushed */ + if (dirty_flags == 0xff) + tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr); +} + +static CPUReadMemoryFunc * const error_mem_read[3] = { + NULL, /* never used */ + NULL, /* never used */ + NULL, /* never used */ +}; + +static CPUWriteMemoryFunc * const notdirty_mem_write[3] = { + notdirty_mem_writeb, + notdirty_mem_writew, + notdirty_mem_writel, +}; + +/* Generate a debug exception if a watchpoint has been hit. */ +static void check_watchpoint(int offset, int len_mask, int flags) +{ + CPUState *env = cpu_single_env; + target_ulong pc, cs_base; + TranslationBlock *tb; + target_ulong vaddr; + CPUWatchpoint *wp; + int cpu_flags; + + if (env->watchpoint_hit) { + /* We re-entered the check after replacing the TB. Now raise + * the debug interrupt so that is will trigger after the + * current instruction. */ + cpu_interrupt(env, CPU_INTERRUPT_DEBUG); + return; + } + vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset; + QTAILQ_FOREACH(wp, &env->watchpoints, entry) { + if ((vaddr == (wp->vaddr & len_mask) || + (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) { + wp->flags |= BP_WATCHPOINT_HIT; + if (!env->watchpoint_hit) { + env->watchpoint_hit = wp; + tb = tb_find_pc(env->mem_io_pc); + if (!tb) { + cpu_abort(env, "check_watchpoint: could not find TB for " + "pc=%p", (void *)env->mem_io_pc); + } + cpu_restore_state(tb, env, env->mem_io_pc, NULL); + tb_phys_invalidate(tb, -1); + if (wp->flags & BP_STOP_BEFORE_ACCESS) { + env->exception_index = EXCP_DEBUG; + } else { + cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags); + tb_gen_code(env, pc, cs_base, cpu_flags, 1); + } + cpu_resume_from_signal(env, NULL); + } + } else { + wp->flags &= ~BP_WATCHPOINT_HIT; + } + } +} + +/* Watchpoint access routines. Watchpoints are inserted using TLB tricks, + so these check for a hit then pass through to the normal out-of-line + phys routines. */ +static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr) +{ + check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ); + return ldub_phys(addr); +} + +static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr) +{ + check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ); + return lduw_phys(addr); +} + +static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr) +{ + check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ); + return ldl_phys(addr); +} + +static void watch_mem_writeb(void *opaque, target_phys_addr_t addr, + uint32_t val) +{ + check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE); + stb_phys(addr, val); +} + +static void watch_mem_writew(void *opaque, target_phys_addr_t addr, + uint32_t val) +{ + check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE); + stw_phys(addr, val); +} + +static void watch_mem_writel(void *opaque, target_phys_addr_t addr, + uint32_t val) +{ + check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE); + stl_phys(addr, val); +} + +static CPUReadMemoryFunc * const watch_mem_read[3] = { + watch_mem_readb, + watch_mem_readw, + watch_mem_readl, +}; + +static CPUWriteMemoryFunc * const watch_mem_write[3] = { + watch_mem_writeb, + watch_mem_writew, + watch_mem_writel, +}; + +static inline uint32_t subpage_readlen (subpage_t *mmio, + target_phys_addr_t addr, + unsigned int len) +{ + unsigned int idx = SUBPAGE_IDX(addr); +#if defined(DEBUG_SUBPAGE) + printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__, + mmio, len, addr, idx); +#endif + + addr += mmio->region_offset[idx]; + idx = mmio->sub_io_index[idx]; + return io_mem_read[idx][len](io_mem_opaque[idx], addr); +} + +static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr, + uint32_t value, unsigned int len) +{ + unsigned int idx = SUBPAGE_IDX(addr); +#if defined(DEBUG_SUBPAGE) + printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n", + __func__, mmio, len, addr, idx, value); +#endif + + addr += mmio->region_offset[idx]; + idx = mmio->sub_io_index[idx]; + io_mem_write[idx][len](io_mem_opaque[idx], addr, value); +} + +static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr) +{ + return subpage_readlen(opaque, addr, 0); +} + +static void subpage_writeb (void *opaque, target_phys_addr_t addr, + uint32_t value) +{ + subpage_writelen(opaque, addr, value, 0); +} + +static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr) +{ + return subpage_readlen(opaque, addr, 1); +} + +static void subpage_writew (void *opaque, target_phys_addr_t addr, + uint32_t value) +{ + subpage_writelen(opaque, addr, value, 1); +} + +static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr) +{ + return subpage_readlen(opaque, addr, 2); +} + +static void subpage_writel (void *opaque, target_phys_addr_t addr, + uint32_t value) +{ + subpage_writelen(opaque, addr, value, 2); +} + +static CPUReadMemoryFunc * const subpage_read[] = { + &subpage_readb, + &subpage_readw, + &subpage_readl, +}; + +static CPUWriteMemoryFunc * const subpage_write[] = { + &subpage_writeb, + &subpage_writew, + &subpage_writel, +}; + +static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, + ram_addr_t memory, ram_addr_t region_offset) +{ + int idx, eidx; + + if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE) + return -1; + idx = SUBPAGE_IDX(start); + eidx = SUBPAGE_IDX(end); +#if defined(DEBUG_SUBPAGE) + printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__, + mmio, start, end, idx, eidx, memory); +#endif + memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + for (; idx <= eidx; idx++) { + mmio->sub_io_index[idx] = memory; + mmio->region_offset[idx] = region_offset; + } + + return 0; +} + +static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys, + ram_addr_t orig_memory, + ram_addr_t region_offset) +{ + subpage_t *mmio; + int subpage_memory; + + mmio = qemu_mallocz(sizeof(subpage_t)); + + mmio->base = base; + subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio); +#if defined(DEBUG_SUBPAGE) + printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__, + mmio, base, TARGET_PAGE_SIZE, subpage_memory); +#endif + *phys = subpage_memory | IO_MEM_SUBPAGE; + subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset); + + return mmio; +} + +static int get_free_io_mem_idx(void) +{ + int i; + + for (i = 0; i<IO_MEM_NB_ENTRIES; i++) + if (!io_mem_used[i]) { + io_mem_used[i] = 1; + return i; + } + fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES); + return -1; +} + +/* mem_read and mem_write are arrays of functions containing the + function to access byte (index 0), word (index 1) and dword (index + 2). Functions can be omitted with a NULL function pointer. + If io_index is non zero, the corresponding io zone is + modified. If it is zero, a new io zone is allocated. The return + value can be used with cpu_register_physical_memory(). (-1) is + returned if error. */ +static int cpu_register_io_memory_fixed(int io_index, + CPUReadMemoryFunc * const *mem_read, + CPUWriteMemoryFunc * const *mem_write, + void *opaque) +{ + int i; + + if (io_index <= 0) { + io_index = get_free_io_mem_idx(); + if (io_index == -1) + return io_index; + } else { + io_index >>= IO_MEM_SHIFT; + if (io_index >= IO_MEM_NB_ENTRIES) + return -1; + } + + for (i = 0; i < 3; ++i) { + io_mem_read[io_index][i] + = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]); + } + for (i = 0; i < 3; ++i) { + io_mem_write[io_index][i] + = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]); + } + io_mem_opaque[io_index] = opaque; + + return (io_index << IO_MEM_SHIFT); +} + +int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read, + CPUWriteMemoryFunc * const *mem_write, + void *opaque) +{ + return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque); +} + +void cpu_unregister_io_memory(int io_table_address) +{ + int i; + int io_index = io_table_address >> IO_MEM_SHIFT; + + for (i=0;i < 3; i++) { + io_mem_read[io_index][i] = unassigned_mem_read[i]; + io_mem_write[io_index][i] = unassigned_mem_write[i]; + } + io_mem_opaque[io_index] = NULL; + io_mem_used[io_index] = 0; +} + +static void io_mem_init(void) +{ + int i; + + cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read, unassigned_mem_write, NULL); + cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read, unassigned_mem_write, NULL); + cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read, notdirty_mem_write, NULL); + for (i=0; i<5; i++) + io_mem_used[i] = 1; + + io_mem_watch = cpu_register_io_memory(watch_mem_read, + watch_mem_write, NULL); +} + +#endif /* !defined(CONFIG_USER_ONLY) */ + +/* physical memory access (slow version, mainly for debug) */ +#if defined(CONFIG_USER_ONLY) +int cpu_memory_rw_debug(CPUState *env, target_ulong addr, + uint8_t *buf, int len, int is_write) +{ + int l, flags; + target_ulong page; + void * p; + + while (len > 0) { + page = addr & TARGET_PAGE_MASK; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) + l = len; + flags = page_get_flags(page); + if (!(flags & PAGE_VALID)) + return -1; + if (is_write) { + if (!(flags & PAGE_WRITE)) + return -1; + /* XXX: this code should not depend on lock_user */ + if (!(p = lock_user(VERIFY_WRITE, addr, l, 0))) + return -1; + memcpy(p, buf, l); + unlock_user(p, addr, l); + } else { + if (!(flags & PAGE_READ)) + return -1; + /* XXX: this code should not depend on lock_user */ + if (!(p = lock_user(VERIFY_READ, addr, l, 1))) + return -1; + memcpy(buf, p, l); + unlock_user(p, addr, 0); + } + len -= l; + buf += l; + addr += l; + } + return 0; +} + +#else +void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, + int len, int is_write) +{ + int l, io_index; + uint8_t *ptr; + uint32_t val; + target_phys_addr_t page; + ram_addr_t pd; + PhysPageDesc *p; + + while (len > 0) { + page = addr & TARGET_PAGE_MASK; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) + l = len; + p = phys_page_find(page >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if (is_write) { + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + target_phys_addr_t addr1 = addr; + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + /* XXX: could force cpu_single_env to NULL to avoid + potential bugs */ + if (l >= 4 && ((addr1 & 3) == 0)) { + /* 32 bit write access */ +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + val = ldl_p(buf); +#else + val = *(const uint32_t *)buf; +#endif + io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val); + l = 4; + } else if (l >= 2 && ((addr1 & 1) == 0)) { + /* 16 bit write access */ +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + val = lduw_p(buf); +#else + val = *(const uint16_t *)buf; +#endif + io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val); + l = 2; + } else { + /* 8 bit write access */ +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + val = ldub_p(buf); +#else + val = *(const uint8_t *)buf; +#endif + io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val); + l = 1; + } + } else { + ram_addr_t addr1; + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + /* RAM case */ +#ifdef VBOX + remR3PhysWrite(addr1, buf, l); NOREF(ptr); +#else + ptr = qemu_get_ram_ptr(addr1); + memcpy(ptr, buf, l); +#endif + if (!cpu_physical_memory_is_dirty(addr1)) { + /* invalidate code */ + tb_invalidate_phys_page_range(addr1, addr1 + l, 0); + /* set dirty bit */ + cpu_physical_memory_set_dirty_flags( + addr1, (0xff & ~CODE_DIRTY_FLAG)); + } + } + } else { + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && + !(pd & IO_MEM_ROMD)) { + target_phys_addr_t addr1 = addr; + /* I/O case */ + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + if (l >= 4 && ((addr1 & 3) == 0)) { + /* 32 bit read access */ + val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1); +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + stl_p(buf, val); +#else + *(uint32_t *)buf = val; +#endif + l = 4; + } else if (l >= 2 && ((addr1 & 1) == 0)) { + /* 16 bit read access */ + val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1); +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + stw_p(buf, val); +#else + *(uint16_t *)buf = val; +#endif + l = 2; + } else { + /* 8 bit read access */ + val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1); +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + stb_p(buf, val); +#else + *(uint8_t *)buf = val; +#endif + l = 1; + } + } else { + /* RAM case */ +#ifdef VBOX + remR3PhysRead((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), buf, l); NOREF(ptr); +#else + ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) + + (addr & ~TARGET_PAGE_MASK); + memcpy(buf, ptr, l); +#endif + } + } + len -= l; + buf += l; + addr += l; + } +} + +#ifndef VBOX + +/* used for ROM loading : can write in RAM and ROM */ +void cpu_physical_memory_write_rom(target_phys_addr_t addr, + const uint8_t *buf, int len) +{ + int l; + uint8_t *ptr; + target_phys_addr_t page; + ram_addr_t pd; + PhysPageDesc *p; + + while (len > 0) { + page = addr & TARGET_PAGE_MASK; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) + l = len; + p = phys_page_find(page >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM && + (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM && + !(pd & IO_MEM_ROMD)) { + /* do nothing */ + } else { + ram_addr_t addr1; + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + /* ROM/RAM case */ + ptr = qemu_get_ram_ptr(addr1); + memcpy(ptr, buf, l); + } + len -= l; + buf += l; + addr += l; + } +} + +typedef struct { + void *buffer; + target_phys_addr_t addr; + target_phys_addr_t len; +} BounceBuffer; + +static BounceBuffer bounce; + +typedef struct MapClient { + void *opaque; + void (*callback)(void *opaque); + QLIST_ENTRY(MapClient) link; +} MapClient; + +static QLIST_HEAD(map_client_list, MapClient) map_client_list + = QLIST_HEAD_INITIALIZER(map_client_list); + +void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque)) +{ + MapClient *client = qemu_malloc(sizeof(*client)); + + client->opaque = opaque; + client->callback = callback; + QLIST_INSERT_HEAD(&map_client_list, client, link); + return client; +} + +void cpu_unregister_map_client(void *_client) +{ + MapClient *client = (MapClient *)_client; + + QLIST_REMOVE(client, link); + qemu_free(client); +} + +static void cpu_notify_map_clients(void) +{ + MapClient *client; + + while (!QLIST_EMPTY(&map_client_list)) { + client = QLIST_FIRST(&map_client_list); + client->callback(client->opaque); + cpu_unregister_map_client(client); + } +} + +/* Map a physical memory region into a host virtual address. + * May map a subset of the requested range, given by and returned in *plen. + * May return NULL if resources needed to perform the mapping are exhausted. + * Use only for reads OR writes - not for read-modify-write operations. + * Use cpu_register_map_client() to know when retrying the map operation is + * likely to succeed. + */ +void *cpu_physical_memory_map(target_phys_addr_t addr, + target_phys_addr_t *plen, + int is_write) +{ + target_phys_addr_t len = *plen; + target_phys_addr_t done = 0; + int l; + uint8_t *ret = NULL; + uint8_t *ptr; + target_phys_addr_t page; + ram_addr_t pd; + PhysPageDesc *p; + ram_addr_t addr1; + + while (len > 0) { + page = addr & TARGET_PAGE_MASK; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) + l = len; + p = phys_page_find(page >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + if (done || bounce.buffer) { + break; + } + bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); + bounce.addr = addr; + bounce.len = l; + if (!is_write) { + cpu_physical_memory_rw(addr, bounce.buffer, l, 0); + } + ptr = bounce.buffer; + } else { + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + ptr = qemu_get_ram_ptr(addr1); + } + if (!done) { + ret = ptr; + } else if (ret + done != ptr) { + break; + } + + len -= l; + addr += l; + done += l; + } + *plen = done; + return ret; +} + +/* Unmaps a memory region previously mapped by cpu_physical_memory_map(). + * Will also mark the memory as dirty if is_write == 1. access_len gives + * the amount of memory that was actually read or written by the caller. + */ +void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, + int is_write, target_phys_addr_t access_len) +{ + if (buffer != bounce.buffer) { + if (is_write) { + ram_addr_t addr1 = qemu_ram_addr_from_host(buffer); + while (access_len) { + unsigned l; + l = TARGET_PAGE_SIZE; + if (l > access_len) + l = access_len; + if (!cpu_physical_memory_is_dirty(addr1)) { + /* invalidate code */ + tb_invalidate_phys_page_range(addr1, addr1 + l, 0); + /* set dirty bit */ + cpu_physical_memory_set_dirty_flags( + addr1, (0xff & ~CODE_DIRTY_FLAG)); + } + addr1 += l; + access_len -= l; + } + } + return; + } + if (is_write) { + cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len); + } + qemu_vfree(bounce.buffer); + bounce.buffer = NULL; + cpu_notify_map_clients(); +} + +#endif /* !VBOX */ + +/* warning: addr must be aligned */ +uint32_t ldl_phys(target_phys_addr_t addr) +{ + int io_index; + uint8_t *ptr; + uint32_t val; + ram_addr_t pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && + !(pd & IO_MEM_ROMD)) { + /* I/O case */ + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr); + } else { + /* RAM case */ +#ifndef VBOX + ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) + + (addr & ~TARGET_PAGE_MASK); + val = ldl_p(ptr); +#else + val = remR3PhysReadU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr); +#endif + } + return val; +} + +/* warning: addr must be aligned */ +uint64_t ldq_phys(target_phys_addr_t addr) +{ + int io_index; + uint8_t *ptr; + uint64_t val; + ram_addr_t pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && + !(pd & IO_MEM_ROMD)) { + /* I/O case */ + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; +#ifdef TARGET_WORDS_BIGENDIAN + val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32; + val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4); +#else + val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr); + val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32; +#endif + } else { + /* RAM case */ +#ifndef VBOX + ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) + + (addr & ~TARGET_PAGE_MASK); + val = ldq_p(ptr); +#else + val = remR3PhysReadU64((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr); +#endif + } + return val; +} + +/* XXX: optimize */ +uint32_t ldub_phys(target_phys_addr_t addr) +{ + uint8_t val; + cpu_physical_memory_read(addr, &val, 1); + return val; +} + +/* warning: addr must be aligned */ +uint32_t lduw_phys(target_phys_addr_t addr) +{ + int io_index; +#ifndef VBOX + uint8_t *ptr; +#endif + uint64_t val; + ram_addr_t pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && + !(pd & IO_MEM_ROMD)) { + /* I/O case */ + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr); + } else { + /* RAM case */ +#ifndef VBOX + ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) + + (addr & ~TARGET_PAGE_MASK); + val = lduw_p(ptr); +#else + val = remR3PhysReadU16((pd & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK)); +#endif + } + return val; +} + +/* warning: addr must be aligned. The ram page is not masked as dirty + and the code inside is not invalidated. It is useful if the dirty + bits are used to track modified PTEs */ +void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val) +{ + int io_index; + uint8_t *ptr; + ram_addr_t pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val); + } else { +#ifndef VBOX + ram_addr_t addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + ptr = qemu_get_ram_ptr(addr1); + stl_p(ptr, val); +#else + remR3PhysWriteU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr); +#endif + +#ifndef VBOX + if (unlikely(in_migration)) { + if (!cpu_physical_memory_is_dirty(addr1)) { + /* invalidate code */ + tb_invalidate_phys_page_range(addr1, addr1 + 4, 0); + /* set dirty bit */ + cpu_physical_memory_set_dirty_flags( + addr1, (0xff & ~CODE_DIRTY_FLAG)); + } + } +#endif /* !VBOX */ + } +} + +void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val) +{ + int io_index; + uint8_t *ptr; + ram_addr_t pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; +#ifdef TARGET_WORDS_BIGENDIAN + io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32); + io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val); +#else + io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val); + io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32); +#endif + } else { +#ifndef VBOX + ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) + + (addr & ~TARGET_PAGE_MASK); + stq_p(ptr, val); +#else + remR3PhysWriteU64((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr); +#endif + } +} + +/* warning: addr must be aligned */ +void stl_phys(target_phys_addr_t addr, uint32_t val) +{ + int io_index; + uint8_t *ptr; + ram_addr_t pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val); + } else { + ram_addr_t addr1; + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + /* RAM case */ +#ifndef VBOX + ptr = qemu_get_ram_ptr(addr1); + stl_p(ptr, val); +#else + remR3PhysWriteU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), val); NOREF(ptr); +#endif + if (!cpu_physical_memory_is_dirty(addr1)) { + /* invalidate code */ + tb_invalidate_phys_page_range(addr1, addr1 + 4, 0); + /* set dirty bit */ + cpu_physical_memory_set_dirty_flags(addr1, + (0xff & ~CODE_DIRTY_FLAG)); + } + } +} + +/* XXX: optimize */ +void stb_phys(target_phys_addr_t addr, uint32_t val) +{ + uint8_t v = val; + cpu_physical_memory_write(addr, &v, 1); +} + +/* warning: addr must be aligned */ +void stw_phys(target_phys_addr_t addr, uint32_t val) +{ + int io_index; + uint8_t *ptr; + ram_addr_t pd; + PhysPageDesc *p; + + p = phys_page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + pd = IO_MEM_UNASSIGNED; + } else { + pd = p->phys_offset; + } + + if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + if (p) + addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset; + io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val); + } else { + ram_addr_t addr1; + addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); + /* RAM case */ +#ifndef VBOX + ptr = qemu_get_ram_ptr(addr1); + stw_p(ptr, val); +#else + remR3PhysWriteU16(addr1, val); NOREF(ptr); +#endif + if (!cpu_physical_memory_is_dirty(addr1)) { + /* invalidate code */ + tb_invalidate_phys_page_range(addr1, addr1 + 2, 0); + /* set dirty bit */ + cpu_physical_memory_set_dirty_flags(addr1, + (0xff & ~CODE_DIRTY_FLAG)); + } + } +} + +/* XXX: optimize */ +void stq_phys(target_phys_addr_t addr, uint64_t val) +{ + val = tswap64(val); + cpu_physical_memory_write(addr, (const uint8_t *)&val, 8); +} + +#ifndef VBOX +/* virtual memory access for debug (includes writing to ROM) */ +int cpu_memory_rw_debug(CPUState *env, target_ulong addr, + uint8_t *buf, int len, int is_write) +{ + int l; + target_phys_addr_t phys_addr; + target_ulong page; + + while (len > 0) { + page = addr & TARGET_PAGE_MASK; + phys_addr = cpu_get_phys_page_debug(env, page); + /* if no physical page mapped, return an error */ + if (phys_addr == -1) + return -1; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) + l = len; + phys_addr += (addr & ~TARGET_PAGE_MASK); + if (is_write) + cpu_physical_memory_write_rom(phys_addr, buf, l); + else + cpu_physical_memory_rw(phys_addr, buf, l, is_write); + len -= l; + buf += l; + addr += l; + } + return 0; +} +#endif /* !VBOX */ +#endif + +/* in deterministic execution mode, instructions doing device I/Os + must be at the end of the TB */ +void cpu_io_recompile(CPUState *env, void *retaddr) +{ + TranslationBlock *tb; + uint32_t n, cflags; + target_ulong pc, cs_base; + uint64_t flags; + + tb = tb_find_pc((uintptr_t)retaddr); + if (!tb) { + cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", + retaddr); + } + n = env->icount_decr.u16.low + tb->icount; + cpu_restore_state(tb, env, (uintptr_t)retaddr, NULL); + /* Calculate how many instructions had been executed before the fault + occurred. */ + n = n - env->icount_decr.u16.low; + /* Generate a new TB ending on the I/O insn. */ + n++; + /* On MIPS and SH, delay slot instructions can only be restarted if + they were already the first instruction in the TB. If this is not + the first instruction in a TB then re-execute the preceding + branch. */ +#if defined(TARGET_MIPS) + if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) { + env->active_tc.PC -= 4; + env->icount_decr.u16.low++; + env->hflags &= ~MIPS_HFLAG_BMASK; + } +#elif defined(TARGET_SH4) + if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 + && n > 1) { + env->pc -= 2; + env->icount_decr.u16.low++; + env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + } +#endif + /* This should never happen. */ + if (n > CF_COUNT_MASK) + cpu_abort(env, "TB too big during recompile"); + + cflags = n | CF_LAST_IO; + pc = tb->pc; + cs_base = tb->cs_base; + flags = tb->flags; + tb_phys_invalidate(tb, -1); + /* FIXME: In theory this could raise an exception. In practice + we have already translated the block once so it's probably ok. */ + tb_gen_code(env, pc, cs_base, flags, cflags); + /** @todo If env->pc != tb->pc (i.e. the faulting instruction was not + the first in the TB) then we end up generating a whole new TB and + repeating the fault, which is horribly inefficient. + Better would be to execute just this insn uncached, or generate a + second new TB. */ + cpu_resume_from_signal(env, NULL); +} + +#if !defined(CONFIG_USER_ONLY) + +#ifndef VBOX +void dump_exec_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +{ + int i, target_code_size, max_target_code_size; + int direct_jmp_count, direct_jmp2_count, cross_page; + TranslationBlock *tb; + + target_code_size = 0; + max_target_code_size = 0; + cross_page = 0; + direct_jmp_count = 0; + direct_jmp2_count = 0; + for(i = 0; i < nb_tbs; i++) { + tb = &tbs[i]; + target_code_size += tb->size; + if (tb->size > max_target_code_size) + max_target_code_size = tb->size; + if (tb->page_addr[1] != -1) + cross_page++; + if (tb->tb_next_offset[0] != 0xffff) { + direct_jmp_count++; + if (tb->tb_next_offset[1] != 0xffff) { + direct_jmp2_count++; + } + } + } + /* XXX: avoid using doubles ? */ + cpu_fprintf(f, "Translation buffer state:\n"); + cpu_fprintf(f, "gen code size %ld/%ld\n", + code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size); + cpu_fprintf(f, "TB count %d/%d\n", + nb_tbs, code_gen_max_blocks); + cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", + nb_tbs ? target_code_size / nb_tbs : 0, + max_target_code_size); + cpu_fprintf(f, "TB avg host size %d bytes (expansion ratio: %0.1f)\n", + nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0, + target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0); + cpu_fprintf(f, "cross page TB count %d (%d%%)\n", + cross_page, + nb_tbs ? (cross_page * 100) / nb_tbs : 0); + cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n", + direct_jmp_count, + nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0, + direct_jmp2_count, + nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0); + cpu_fprintf(f, "\nStatistics:\n"); + cpu_fprintf(f, "TB flush count %d\n", tb_flush_count); + cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count); + cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); + tcg_dump_info(f, cpu_fprintf); +} +#endif /* !VBOX */ + +#define MMUSUFFIX _cmmu +#define GETPC() NULL +#define env cpu_single_env +#define SOFTMMU_CODE_ACCESS + +#define SHIFT 0 +#include "softmmu_template.h" + +#define SHIFT 1 +#include "softmmu_template.h" + +#define SHIFT 2 +#include "softmmu_template.h" + +#define SHIFT 3 +#include "softmmu_template.h" + +#undef env + +#endif diff --git a/src/recompiler/fpu/Makefile.kup b/src/recompiler/fpu/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/fpu/Makefile.kup diff --git a/src/recompiler/fpu/softfloat-macros.h b/src/recompiler/fpu/softfloat-macros.h new file mode 100644 index 00000000..78382282 --- /dev/null +++ b/src/recompiler/fpu/softfloat-macros.h @@ -0,0 +1,719 @@ + +/*============================================================================ + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. + +=============================================================================*/ + +/*---------------------------------------------------------------------------- +| Shifts `a' right by the number of bits given in `count'. If any nonzero +| bits are shifted off, they are ``jammed'' into the least significant bit of +| the result by setting the least significant bit to 1. The value of `count' +| can be arbitrarily large; in particular, if `count' is greater than 32, the +| result will be either 0 or 1, depending on whether `a' is zero or nonzero. +| The result is stored in the location pointed to by `zPtr'. +*----------------------------------------------------------------------------*/ + +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/*---------------------------------------------------------------------------- +| Shifts `a' right by the number of bits given in `count'. If any nonzero +| bits are shifted off, they are ``jammed'' into the least significant bit of +| the result by setting the least significant bit to 1. The value of `count' +| can be arbitrarily large; in particular, if `count' is greater than 64, the +| result will be either 0 or 1, depending on whether `a' is zero or nonzero. +| The result is stored in the location pointed to by `zPtr'. +*----------------------------------------------------------------------------*/ + +INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) +{ + bits64 z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 64 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 +| _plus_ the number of bits given in `count'. The shifted result is at most +| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The +| bits shifted off form a second 64-bit result as follows: The _last_ bit +| shifted off is the most-significant bit of the extra result, and the other +| 63 bits of the extra result are all zero if and only if _all_but_the_last_ +| bits shifted off were all zero. This extra result is stored in the location +| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. +| (This routine makes more sense if `a0' and `a1' are considered to form +| a fixed-point value with binary point between `a0' and `a1'. This fixed- +| point value is shifted right by the number of bits given in `count', and +| the integer part of the result is returned at the location pointed to by +| `z0Ptr'. The fractional part of the result may be slightly corrupted as +| described above, and is returned at the location pointed to by `z1Ptr'.) +*----------------------------------------------------------------------------*/ + +INLINE void + shift64ExtraRightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1 != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +| number of bits given in `count'. Any bits shifted off are lost. The value +| of `count' can be arbitrarily large; in particular, if `count' is greater +| than 128, the result will be 0. The result is broken into two 64-bit pieces +| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + shift128Right( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +| number of bits given in `count'. If any nonzero bits are shifted off, they +| are ``jammed'' into the least significant bit of the result by setting the +| least significant bit to 1. The value of `count' can be arbitrarily large; +| in particular, if `count' is greater than 128, the result will be either +| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or +| nonzero. The result is broken into two 64-bit pieces which are stored at +| the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + shift128RightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 128 ) { + z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right +| by 64 _plus_ the number of bits given in `count'. The shifted result is +| at most 128 nonzero bits; these are broken into two 64-bit pieces which are +| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted +| off form a third 64-bit result as follows: The _last_ bit shifted off is +| the most-significant bit of the extra result, and the other 63 bits of the +| extra result are all zero if and only if _all_but_the_last_ bits shifted off +| were all zero. This extra result is stored in the location pointed to by +| `z2Ptr'. The value of `count' can be arbitrarily large. +| (This routine makes more sense if `a0', `a1', and `a2' are considered +| to form a fixed-point value with binary point between `a1' and `a2'. This +| fixed-point value is shifted right by the number of bits given in `count', +| and the integer part of the result is returned at the locations pointed to +| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly +| corrupted as described above, and is returned at the location pointed to by +| `z2Ptr'.) +*----------------------------------------------------------------------------*/ + +INLINE void + shift128ExtraRightJamming( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z2 = a2; + z1 = a1; + z0 = a0; + } + else { + if ( count < 64 ) { + z2 = a1<<negCount; + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 128 ) { + z2 = a0<<negCount; + z1 = a0>>( count & 63 ); + } + else { + z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +| number of bits given in `count'. Any bits shifted off are lost. The value +| of `count' must be less than 64. The result is broken into two 64-bit +| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + shortShift128Left( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1<<count; + *z0Ptr = + ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); + +} + +/*---------------------------------------------------------------------------- +| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left +| by the number of bits given in `count'. Any bits shifted off are lost. +| The value of `count' must be less than 64. The result is broken into three +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', +| `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + shortShift192Left( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount; + + z2 = a2<<count; + z1 = a1<<count; + z0 = a0<<count; + if ( 0 < count ) { + negCount = ( ( - count ) & 63 ); + z1 |= a2>>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit +| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so +| any carry out is lost. The result is broken into two 64-bit pieces which +| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + add128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/*---------------------------------------------------------------------------- +| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the +| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +| modulo 2^192, so any carry out is lost. The result is broken into three +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', +| `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + add192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the +| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +| 2^128, so any borrow out (carry out) is lost. The result is broken into two +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and +| `z1Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + sub128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/*---------------------------------------------------------------------------- +| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' +| from the 192-bit value formed by concatenating `a0', `a1', and `a2'. +| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The +| result is broken into three 64-bit pieces which are stored at the locations +| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + sub192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken +| into two 64-bit pieces which are stored at the locations pointed to by +| `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits32 aHigh, aLow, bHigh, bLow; + bits64 z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>32; + bLow = b; + bHigh = b>>32; + z1 = ( (bits64) aLow ) * bLow; + zMiddleA = ( (bits64) aLow ) * bHigh; + zMiddleB = ( (bits64) aHigh ) * bLow; + z0 = ( (bits64) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + zMiddleA <<= 32; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by +| `b' to obtain a 192-bit product. The product is broken into three 64-bit +| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +| `z2Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + mul128By64To192( + bits64 a0, + bits64 a1, + bits64 b, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2, more1; + + mul64To128( a1, b, &z1, &z2 ); + mul64To128( a0, b, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the +| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit +| product. The product is broken into four 64-bit pieces which are stored at +| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +*----------------------------------------------------------------------------*/ + +INLINE void + mul128To256( + bits64 a0, + bits64 a1, + bits64 b0, + bits64 b1, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr, + bits64 *z3Ptr + ) +{ + bits64 z0, z1, z2, z3; + bits64 more1, more2; + + mul64To128( a1, b1, &z2, &z3 ); + mul64To128( a1, b0, &z1, &more2 ); + add128( z1, more2, 0, z2, &z1, &z2 ); + mul64To128( a0, b0, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + mul64To128( a0, b1, &more1, &more2 ); + add128( more1, more2, 0, z2, &more1, &z2 ); + add128( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/*---------------------------------------------------------------------------- +| Returns an approximation to the 64-bit integer quotient obtained by dividing +| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The +| divisor `b' must be at least 2^63. If q is the exact quotient truncated +| toward zero, the approximation returned lies between q and q + 2 inclusive. +| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit +| unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) +{ + bits64 b0, b1; + bits64 rem0, rem1, term0, term1; + bits64 z; + + if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); + b0 = b>>32; + z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; + mul64To128( b, z, &term0, &term1 ); + sub128( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits64) rem0 ) < 0 ) { + z -= LIT64( 0x100000000 ); + b1 = b<<32; + add128( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<32 ) | ( rem1>>32 ); + z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns an approximation to the square root of the 32-bit significand given +| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +| `aExp' (the least significant bit) is 1, the integer returned approximates +| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +| case, the approximation returned lies strictly within +/-2 of the exact +| value. +*----------------------------------------------------------------------------*/ + +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 index; + bits32 z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + } + return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); + +} + +/*---------------------------------------------------------------------------- +| Returns the number of leading 0 bits before the most-significant 1 bit of +| `a'. If `a' is zero, 32 is returned. +*----------------------------------------------------------------------------*/ + +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/*---------------------------------------------------------------------------- +| Returns the number of leading 0 bits before the most-significant 1 bit of +| `a'. If `a' is zero, 64 is returned. +*----------------------------------------------------------------------------*/ + +static int8 countLeadingZeros64( bits64 a ) +{ + int8 shiftCount; + + shiftCount = 0; + if ( a < ( (bits64) 1 )<<32 ) { + shiftCount += 32; + } + else { + a >>= 32; + } + shiftCount += countLeadingZeros32( a ); + return shiftCount; + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' +| is equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +| than or equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, +| returns 0. +*----------------------------------------------------------------------------*/ + +INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is +| not equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} diff --git a/src/recompiler/fpu/softfloat-native.c b/src/recompiler/fpu/softfloat-native.c new file mode 100644 index 00000000..7c7820ab --- /dev/null +++ b/src/recompiler/fpu/softfloat-native.c @@ -0,0 +1,521 @@ +/* Native implementation of soft float functions. Only a single status + context is supported */ +#include "softfloat.h" +#include <math.h> +#if defined(CONFIG_SOLARIS) +#include <fenv.h> +#endif + +void set_float_rounding_mode(int val STATUS_PARAM) +{ + STATUS(float_rounding_mode) = val; +#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__)) || \ + (defined(CONFIG_SOLARIS) && (CONFIG_SOLARIS_VERSION < 10 || CONFIG_SOLARIS_VERSION == 11)) /* VBOX adds sol 11 */ + fpsetround(val); +#else + fesetround(val); +#endif +} + +#ifdef FLOATX80 +void set_floatx80_rounding_precision(int val STATUS_PARAM) +{ + STATUS(floatx80_rounding_precision) = val; +} +#endif + +#if defined(CONFIG_BSD) || \ + (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10) +#define lrint(d) ((int32_t)rint(d)) +#define llrint(d) ((int64_t)rint(d)) +#define lrintf(f) ((int32_t)rint(f)) +#define llrintf(f) ((int64_t)rint(f)) +#define sqrtf(f) ((float)sqrt(f)) +#define remainderf(fa, fb) ((float)remainder(fa, fb)) +#define rintf(f) ((float)rint(f)) +# if defined(VBOX) && defined(HOST_BSD) /* Some defines which only apply to *BSD */ +# define lrintl(f) ((int32_t)rint(f)) +# define llrintl(f) ((int64_t)rint(f)) +# define rintl(d) ((int32_t)rint(d)) +# define sqrtl(f) (sqrt(f)) +# define remainderl(fa, fb) (remainder(fa, fb)) +# endif /* VBOX && _BSD */ +#if !defined(__sparc__) && \ + (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10) +extern long double rintl(long double); +extern long double scalbnl(long double, int); + +long long +llrintl(long double x) { + return ((long long) rintl(x)); +} + +long +lrintl(long double x) { + return ((long) rintl(x)); +} + +long double +ldexpl(long double x, int n) { + return (scalbnl(x, n)); +} +#endif +#endif + +#if defined(_ARCH_PPC) + +/* correct (but slow) PowerPC rint() (glibc version is incorrect) */ +static double qemu_rint(double x) +{ + double y = 4503599627370496.0; + if (fabs(x) >= y) + return x; + if (x < 0) + y = -y; + y = (x + y) - y; + if (y == 0.0) + y = copysign(y, x); + return y; +} + +#define rint qemu_rint +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ +float32 int32_to_float32(int v STATUS_PARAM) +{ + return (float32)v; +} + +float32 uint32_to_float32(unsigned int v STATUS_PARAM) +{ + return (float32)v; +} + +float64 int32_to_float64(int v STATUS_PARAM) +{ + return (float64)v; +} + +float64 uint32_to_float64(unsigned int v STATUS_PARAM) +{ + return (float64)v; +} + +#ifdef FLOATX80 +floatx80 int32_to_floatx80(int v STATUS_PARAM) +{ + return (floatx80)v; +} +#endif +float32 int64_to_float32( int64_t v STATUS_PARAM) +{ + return (float32)v; +} +float32 uint64_to_float32( uint64_t v STATUS_PARAM) +{ + return (float32)v; +} +float64 int64_to_float64( int64_t v STATUS_PARAM) +{ + return (float64)v; +} +float64 uint64_to_float64( uint64_t v STATUS_PARAM) +{ + return (float64)v; +} +#ifdef FLOATX80 +floatx80 int64_to_floatx80( int64_t v STATUS_PARAM) +{ + return (floatx80)v; +} +#endif + +/* XXX: this code implements the x86 behaviour, not the IEEE one. */ +#if HOST_LONG_BITS == 32 +static inline int long_to_int32(long a) +{ + return a; +} +#else +static inline int long_to_int32(long a) +{ + if (a != (int32_t)a) + a = 0x80000000; + return a; +} +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision conversion routines. +*----------------------------------------------------------------------------*/ +int float32_to_int32( float32 a STATUS_PARAM) +{ + return long_to_int32(lrintf(a)); +} +int float32_to_int32_round_to_zero( float32 a STATUS_PARAM) +{ + return (int)a; +} +int64_t float32_to_int64( float32 a STATUS_PARAM) +{ + return llrintf(a); +} + +int64_t float32_to_int64_round_to_zero( float32 a STATUS_PARAM) +{ + return (int64_t)a; +} + +float64 float32_to_float64( float32 a STATUS_PARAM) +{ + return a; +} +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 a STATUS_PARAM) +{ + return a; +} +#endif + +unsigned int float32_to_uint32( float32 a STATUS_PARAM) +{ + int64_t v; + unsigned int res; + + v = llrintf(a); + if (v < 0) { + res = 0; + } else if (v > 0xffffffff) { + res = 0xffffffff; + } else { + res = v; + } + return res; +} +unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM) +{ + int64_t v; + unsigned int res; + + v = (int64_t)a; + if (v < 0) { + res = 0; + } else if (v > 0xffffffff) { + res = 0xffffffff; + } else { + res = v; + } + return res; +} + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 float32_round_to_int( float32 a STATUS_PARAM) +{ + return rintf(a); +} + +float32 float32_rem( float32 a, float32 b STATUS_PARAM) +{ + return remainderf(a, b); +} + +float32 float32_sqrt( float32 a STATUS_PARAM) +{ + return sqrtf(a); +} +int float32_compare( float32 a, float32 b STATUS_PARAM ) +{ + if (a < b) { + return float_relation_less; + } else if (a == b) { + return float_relation_equal; + } else if (a > b) { + return float_relation_greater; + } else { + return float_relation_unordered; + } +} +int float32_compare_quiet( float32 a, float32 b STATUS_PARAM ) +{ + if (isless(a, b)) { + return float_relation_less; + } else if (a == b) { + return float_relation_equal; + } else if (isgreater(a, b)) { + return float_relation_greater; + } else { + return float_relation_unordered; + } +} +int float32_is_signaling_nan( float32 a1) +{ + float32u u; + uint32_t a; + u.f = a1; + a = u.i; + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); +} + +int float32_is_nan( float32 a1 ) +{ + float32u u; + uint64_t a; + u.f = a1; + a = u.i; + return ( 0xFF800000 < ( a<<1 ) ); +} + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int float64_to_int32( float64 a STATUS_PARAM) +{ + return long_to_int32(lrint(a)); +} +int float64_to_int32_round_to_zero( float64 a STATUS_PARAM) +{ + return (int)a; +} +int64_t float64_to_int64( float64 a STATUS_PARAM) +{ + return llrint(a); +} +int64_t float64_to_int64_round_to_zero( float64 a STATUS_PARAM) +{ + return (int64_t)a; +} +float32 float64_to_float32( float64 a STATUS_PARAM) +{ + return a; +} +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 a STATUS_PARAM) +{ + return a; +} +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 a STATUS_PARAM) +{ + return a; +} +#endif + +unsigned int float64_to_uint32( float64 a STATUS_PARAM) +{ + int64_t v; + unsigned int res; + + v = llrint(a); + if (v < 0) { + res = 0; + } else if (v > 0xffffffff) { + res = 0xffffffff; + } else { + res = v; + } + return res; +} +unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM) +{ + int64_t v; + unsigned int res; + + v = (int64_t)a; + if (v < 0) { + res = 0; + } else if (v > 0xffffffff) { + res = 0xffffffff; + } else { + res = v; + } + return res; +} +uint64_t float64_to_uint64 (float64 a STATUS_PARAM) +{ + int64_t v; + + v = llrint(a + (float64)INT64_MIN); + + return v - INT64_MIN; +} +uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM) +{ + int64_t v; + + v = (int64_t)(a + (float64)INT64_MIN); + + return v - INT64_MIN; +} + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +#if defined(__sun__) && \ + (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10) +static inline float64 trunc(float64 x) +{ + return x < 0 ? -floor(-x) : floor(x); +} +#endif +float64 float64_trunc_to_int( float64 a STATUS_PARAM ) +{ + return trunc(a); +} + +float64 float64_round_to_int( float64 a STATUS_PARAM ) +{ + return rint(a); +} + +float64 float64_rem( float64 a, float64 b STATUS_PARAM) +{ + return remainder(a, b); +} + +float64 float64_sqrt( float64 a STATUS_PARAM) +{ + return sqrt(a); +} +int float64_compare( float64 a, float64 b STATUS_PARAM ) +{ + if (a < b) { + return float_relation_less; + } else if (a == b) { + return float_relation_equal; + } else if (a > b) { + return float_relation_greater; + } else { + return float_relation_unordered; + } +} +int float64_compare_quiet( float64 a, float64 b STATUS_PARAM ) +{ + if (isless(a, b)) { + return float_relation_less; + } else if (a == b) { + return float_relation_equal; + } else if (isgreater(a, b)) { + return float_relation_greater; + } else { + return float_relation_unordered; + } +} +int float64_is_signaling_nan( float64 a1) +{ + float64u u; + uint64_t a; + u.f = a1; + a = u.i; + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +int float64_is_nan( float64 a1 ) +{ + float64u u; + uint64_t a; + u.f = a1; + a = u.i; + + return ( LIT64( 0xFFF0000000000000 ) < (bits64) ( a<<1 ) ); + +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int floatx80_to_int32( floatx80 a STATUS_PARAM) +{ + return long_to_int32(lrintl(a)); +} +int floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM) +{ + return (int)a; +} +int64_t floatx80_to_int64( floatx80 a STATUS_PARAM) +{ + return llrintl(a); +} +int64_t floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM) +{ + return (int64_t)a; +} +float32 floatx80_to_float32( floatx80 a STATUS_PARAM) +{ + return a; +} +float64 floatx80_to_float64( floatx80 a STATUS_PARAM) +{ + return a; +} + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision operations. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM) +{ + return rintl(a); +} +floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM) +{ + return remainderl(a, b); +} +floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM) +{ + return sqrtl(a); +} +int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM ) +{ + if (a < b) { + return float_relation_less; + } else if (a == b) { + return float_relation_equal; + } else if (a > b) { + return float_relation_greater; + } else { + return float_relation_unordered; + } +} +int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM ) +{ + if (isless(a, b)) { + return float_relation_less; + } else if (a == b) { + return float_relation_equal; + } else if (isgreater(a, b)) { + return float_relation_greater; + } else { + return float_relation_unordered; + } +} +int floatx80_is_signaling_nan( floatx80 a1) +{ + floatx80u u; + uint64_t aLow; + u.f = a1; + + aLow = u.i.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( u.i.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( u.i.low == aLow ); +} + +int floatx80_is_nan( floatx80 a1 ) +{ + floatx80u u; + u.f = a1; + return ( ( u.i.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( u.i.low<<1 ); +} + +#endif diff --git a/src/recompiler/fpu/softfloat-native.h b/src/recompiler/fpu/softfloat-native.h new file mode 100644 index 00000000..13be732a --- /dev/null +++ b/src/recompiler/fpu/softfloat-native.h @@ -0,0 +1,493 @@ +/* Native implementation of soft float functions */ +#define __C99FEATURES__ +#include <math.h> + +#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__) && !defined(__FreeBSD__)) \ + || defined(CONFIG_SOLARIS) /* VBox: Added __FreeBSD__ */ +#include <ieeefp.h> +#define fabsf(f) ((float)fabs(f)) +#else +#include <fenv.h> +#endif + +#if defined(__OpenBSD__) || defined(__NetBSD__) +#include <sys/param.h> +#endif + +/* + * Define some C99-7.12.3 classification macros and + * some C99-.12.4 for Solaris systems OS less than 10, + * or Solaris 10 systems running GCC 3.x or less. + * Solaris 10 with GCC4 does not need these macros as they + * are defined in <iso/math_c99.h> with a compiler directive + */ +#if defined(CONFIG_SOLARIS) && \ + ((CONFIG_SOLARIS_VERSION <= 9 ) || \ + ((CONFIG_SOLARIS_VERSION == 10) && (__GNUC__ < 4))) \ + || (defined(__OpenBSD__) && (OpenBSD < 200811)) +/* + * C99 7.12.3 classification macros + * and + * C99 7.12.14 comparison macros + * + * ... do not work on Solaris 10 using GNU CC 3.4.x. + * Try to workaround the missing / broken C99 math macros. + */ +#if defined(__OpenBSD__) +#define unordered(x, y) (isnan(x) || isnan(y)) +#endif + +#ifdef __NetBSD__ +#ifndef isgreater +#define isgreater(x, y) __builtin_isgreater(x, y) +#endif +#ifndef isgreaterequal +#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y) +#endif +#ifndef isless +#define isless(x, y) __builtin_isless(x, y) +#endif +#ifndef islessequal +#define islessequal(x, y) __builtin_islessequal(x, y) +#endif +#ifndef isunordered +#define isunordered(x, y) __builtin_isunordered(x, y) +#endif +#endif + + +#define isnormal(x) (fpclass(x) >= FP_NZERO) +#define isgreater(x, y) ((!unordered(x, y)) && ((x) > (y))) +#define isgreaterequal(x, y) ((!unordered(x, y)) && ((x) >= (y))) +#define isless(x, y) ((!unordered(x, y)) && ((x) < (y))) +#define islessequal(x, y) ((!unordered(x, y)) && ((x) <= (y))) +#define isunordered(x,y) unordered(x, y) +#endif + +#if defined(__sun__) && !defined(CONFIG_NEEDS_LIBSUNMATH) + +#ifndef isnan +# define isnan(x) \ + (sizeof (x) == sizeof (long double) ? isnan_ld (x) \ + : sizeof (x) == sizeof (double) ? isnan_d (x) \ + : isnan_f (x)) +static inline int isnan_f (float x) { return x != x; } +static inline int isnan_d (double x) { return x != x; } +static inline int isnan_ld (long double x) { return x != x; } +#endif + +#ifndef isinf +# define isinf(x) \ + (sizeof (x) == sizeof (long double) ? isinf_ld (x) \ + : sizeof (x) == sizeof (double) ? isinf_d (x) \ + : isinf_f (x)) +static inline int isinf_f (float x) { return isnan (x - x); } +static inline int isinf_d (double x) { return isnan (x - x); } +static inline int isinf_ld (long double x) { return isnan (x - x); } +#endif +#endif + +typedef float float32; +typedef double float64; +#ifdef FLOATX80 +typedef long double floatx80; +#endif + +typedef union { + float32 f; + uint32_t i; +} float32u; +typedef union { + float64 f; + uint64_t i; +} float64u; +#ifdef FLOATX80 +typedef union { + floatx80 f; + struct { + uint64_t low; + uint16_t high; + } i; +} floatx80u; +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point rounding mode. +*----------------------------------------------------------------------------*/ +#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__)) \ + || defined(CONFIG_SOLARIS) +#if defined(__OpenBSD__) +#define FE_RM FP_RM +#define FE_RP FP_RP +#define FE_RZ FP_RZ +#endif +enum { + float_round_nearest_even = FP_RN, + float_round_down = FP_RM, + float_round_up = FP_RP, + float_round_to_zero = FP_RZ +}; +#else +enum { + float_round_nearest_even = FE_TONEAREST, + float_round_down = FE_DOWNWARD, + float_round_up = FE_UPWARD, + float_round_to_zero = FE_TOWARDZERO +}; +#endif + +typedef struct float_status { + int float_rounding_mode; +#ifdef FLOATX80 + int floatx80_rounding_precision; +#endif +} float_status; + +void set_float_rounding_mode(int val STATUS_PARAM); +#ifdef FLOATX80 +void set_floatx80_rounding_precision(int val STATUS_PARAM); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ +float32 int32_to_float32( int STATUS_PARAM); +float32 uint32_to_float32( unsigned int STATUS_PARAM); +float64 int32_to_float64( int STATUS_PARAM); +float64 uint32_to_float64( unsigned int STATUS_PARAM); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( int STATUS_PARAM); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( int STATUS_PARAM); +#endif +float32 int64_to_float32( int64_t STATUS_PARAM); +float32 uint64_to_float32( uint64_t STATUS_PARAM); +float64 int64_to_float64( int64_t STATUS_PARAM); +float64 uint64_to_float64( uint64_t v STATUS_PARAM); +#ifdef FLOATX80 +floatx80 int64_to_floatx80( int64_t STATUS_PARAM); +#endif +#ifdef FLOAT128 +float128 int64_to_float128( int64_t STATUS_PARAM); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision conversion routines. +*----------------------------------------------------------------------------*/ +int float32_to_int32( float32 STATUS_PARAM); +int float32_to_int32_round_to_zero( float32 STATUS_PARAM); +unsigned int float32_to_uint32( float32 a STATUS_PARAM); +unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM); +int64_t float32_to_int64( float32 STATUS_PARAM); +int64_t float32_to_int64_round_to_zero( float32 STATUS_PARAM); +float64 float32_to_float64( float32 STATUS_PARAM); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 STATUS_PARAM); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 STATUS_PARAM); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 float32_round_to_int( float32 STATUS_PARAM); +INLINE float32 float32_add( float32 a, float32 b STATUS_PARAM) +{ + return a + b; +} +INLINE float32 float32_sub( float32 a, float32 b STATUS_PARAM) +{ + return a - b; +} +INLINE float32 float32_mul( float32 a, float32 b STATUS_PARAM) +{ + return a * b; +} +INLINE float32 float32_div( float32 a, float32 b STATUS_PARAM) +{ + return a / b; +} +float32 float32_rem( float32, float32 STATUS_PARAM); +float32 float32_sqrt( float32 STATUS_PARAM); +INLINE int float32_eq( float32 a, float32 b STATUS_PARAM) +{ + return a == b; +} +INLINE int float32_le( float32 a, float32 b STATUS_PARAM) +{ + return a <= b; +} +INLINE int float32_lt( float32 a, float32 b STATUS_PARAM) +{ + return a < b; +} +INLINE int float32_eq_signaling( float32 a, float32 b STATUS_PARAM) +{ + return a <= b && a >= b; +} +INLINE int float32_le_quiet( float32 a, float32 b STATUS_PARAM) +{ + return islessequal(a, b); +} +INLINE int float32_lt_quiet( float32 a, float32 b STATUS_PARAM) +{ + return isless(a, b); +} +INLINE int float32_unordered( float32 a, float32 b STATUS_PARAM) +{ + return isunordered(a, b); + +} +int float32_compare( float32, float32 STATUS_PARAM ); +int float32_compare_quiet( float32, float32 STATUS_PARAM ); +int float32_is_signaling_nan( float32 ); +int float32_is_nan( float32 ); + +INLINE float32 float32_abs(float32 a) +{ + return fabsf(a); +} + +INLINE float32 float32_chs(float32 a) +{ + return -a; +} + +INLINE float32 float32_is_infinity(float32 a) +{ + return fpclassify(a) == FP_INFINITE; +} + +INLINE float32 float32_is_neg(float32 a) +{ + float32u u; + u.f = a; + return u.i >> 31; +} + +INLINE float32 float32_is_zero(float32 a) +{ + return fpclassify(a) == FP_ZERO; +} + +INLINE float32 float32_scalbn(float32 a, int n) +{ + return scalbnf(a, n); +} + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int float64_to_int32( float64 STATUS_PARAM ); +int float64_to_int32_round_to_zero( float64 STATUS_PARAM ); +unsigned int float64_to_uint32( float64 STATUS_PARAM ); +unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM ); +int64_t float64_to_int64( float64 STATUS_PARAM ); +int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM ); +uint64_t float64_to_uint64( float64 STATUS_PARAM ); +uint64_t float64_to_uint64_round_to_zero( float64 STATUS_PARAM ); +float32 float64_to_float32( float64 STATUS_PARAM ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 STATUS_PARAM ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 STATUS_PARAM ); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +float64 float64_round_to_int( float64 STATUS_PARAM ); +float64 float64_trunc_to_int( float64 STATUS_PARAM ); +INLINE float64 float64_add( float64 a, float64 b STATUS_PARAM) +{ + return a + b; +} +INLINE float64 float64_sub( float64 a, float64 b STATUS_PARAM) +{ + return a - b; +} +INLINE float64 float64_mul( float64 a, float64 b STATUS_PARAM) +{ + return a * b; +} +INLINE float64 float64_div( float64 a, float64 b STATUS_PARAM) +{ + return a / b; +} +float64 float64_rem( float64, float64 STATUS_PARAM ); +float64 float64_sqrt( float64 STATUS_PARAM ); +INLINE int float64_eq( float64 a, float64 b STATUS_PARAM) +{ + return a == b; +} +INLINE int float64_le( float64 a, float64 b STATUS_PARAM) +{ + return a <= b; +} +INLINE int float64_lt( float64 a, float64 b STATUS_PARAM) +{ + return a < b; +} +INLINE int float64_eq_signaling( float64 a, float64 b STATUS_PARAM) +{ + return a <= b && a >= b; +} +INLINE int float64_le_quiet( float64 a, float64 b STATUS_PARAM) +{ + return islessequal(a, b); +} +INLINE int float64_lt_quiet( float64 a, float64 b STATUS_PARAM) +{ + return isless(a, b); + +} +INLINE int float64_unordered( float64 a, float64 b STATUS_PARAM) +{ + return isunordered(a, b); + +} +int float64_compare( float64, float64 STATUS_PARAM ); +int float64_compare_quiet( float64, float64 STATUS_PARAM ); +int float64_is_signaling_nan( float64 ); +int float64_is_nan( float64 ); + +INLINE float64 float64_abs(float64 a) +{ + return fabs(a); +} + +INLINE float64 float64_chs(float64 a) +{ + return -a; +} + +INLINE float64 float64_is_infinity(float64 a) +{ + return fpclassify(a) == FP_INFINITE; +} + +INLINE float64 float64_is_neg(float64 a) +{ + float64u u; + u.f = a; + return u.i >> 63; +} + +INLINE float64 float64_is_zero(float64 a) +{ + return fpclassify(a) == FP_ZERO; +} + +INLINE float64 float64_scalbn(float64 a, int n) +{ + return scalbn(a, n); +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int floatx80_to_int32( floatx80 STATUS_PARAM ); +int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM ); +int64_t floatx80_to_int64( floatx80 STATUS_PARAM); +int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM); +float32 floatx80_to_float32( floatx80 STATUS_PARAM ); +float64 floatx80_to_float64( floatx80 STATUS_PARAM ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 STATUS_PARAM ); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision operations. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM ); +INLINE floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a + b; +} +INLINE floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a - b; +} +INLINE floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a * b; +} +INLINE floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a / b; +} +floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM ); +floatx80 floatx80_sqrt( floatx80 STATUS_PARAM ); +INLINE int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a == b; +} +INLINE int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a <= b; +} +INLINE int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a < b; +} +INLINE int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM) +{ + return a <= b && a >= b; +} +INLINE int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM) +{ + return islessequal(a, b); +} +INLINE int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM) +{ + return isless(a, b); + +} +INLINE int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM) +{ + return isunordered(a, b); + +} +int floatx80_compare( floatx80, floatx80 STATUS_PARAM ); +int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM ); +int floatx80_is_signaling_nan( floatx80 ); +int floatx80_is_nan( floatx80 ); + +INLINE floatx80 floatx80_abs(floatx80 a) +{ + return fabsl(a); +} + +INLINE floatx80 floatx80_chs(floatx80 a) +{ + return -a; +} + +INLINE floatx80 floatx80_is_infinity(floatx80 a) +{ + return fpclassify(a) == FP_INFINITE; +} + +INLINE floatx80 floatx80_is_neg(floatx80 a) +{ + floatx80u u; + u.f = a; + return u.i.high >> 15; +} + +INLINE floatx80 floatx80_is_zero(floatx80 a) +{ + return fpclassify(a) == FP_ZERO; +} + +INLINE floatx80 floatx80_scalbn(floatx80 a, int n) +{ + return scalbnl(a, n); +} + +#endif diff --git a/src/recompiler/fpu/softfloat-specialize.h b/src/recompiler/fpu/softfloat-specialize.h new file mode 100644 index 00000000..8e6aceb5 --- /dev/null +++ b/src/recompiler/fpu/softfloat-specialize.h @@ -0,0 +1,581 @@ + +/*============================================================================ + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. + +=============================================================================*/ + +#if defined(TARGET_MIPS) || defined(TARGET_HPPA) +#define SNAN_BIT_IS_ONE 1 +#else +#define SNAN_BIT_IS_ONE 0 +#endif + +/*---------------------------------------------------------------------------- +| Raises the exceptions specified by `flags'. Floating-point traps can be +| defined here if desired. It is currently not possible for such a trap +| to substitute a result value. If traps are not implemented, this routine +| should be simply `float_exception_flags |= flags;'. +*----------------------------------------------------------------------------*/ + +void float_raise( int8 flags STATUS_PARAM ) +{ + STATUS(float_exception_flags) |= flags; +} + +/*---------------------------------------------------------------------------- +| Internal canonical NaN format. +*----------------------------------------------------------------------------*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/*---------------------------------------------------------------------------- +| The pattern for a default generated single-precision NaN. +*----------------------------------------------------------------------------*/ +#if defined(TARGET_SPARC) +#define float32_default_nan make_float32(0x7FFFFFFF) +#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) +#define float32_default_nan make_float32(0x7FC00000) +#elif defined(TARGET_HPPA) +#define float32_default_nan make_float32(0x7FA00000) +#elif SNAN_BIT_IS_ONE +#define float32_default_nan make_float32(0x7FBFFFFF) +#else +#define float32_default_nan make_float32(0xFFC00000) +#endif + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is a quiet +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int float32_is_nan( float32 a_ ) +{ + uint32_t a = float32_val(a_); +#if SNAN_BIT_IS_ONE + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); +#else + return ( 0xFF800000 <= (bits32) ( a<<1 ) ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int float32_is_signaling_nan( float32 a_ ) +{ + uint32_t a = float32_val(a_); +#if SNAN_BIT_IS_ONE + return ( 0xFF800000 <= (bits32) ( a<<1 ) ); +#else + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +static commonNaNT float32ToCommonNaN( float32 a STATUS_PARAM ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR ); + z.sign = float32_val(a)>>31; + z.low = 0; + z.high = ( (bits64) float32_val(a) )<<41; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the single- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +static float32 commonNaNToFloat32( commonNaNT a ) +{ + bits32 mantissa = a.high>>41; + if ( mantissa ) + return make_float32( + ( ( (bits32) a.sign )<<31 ) | 0x7F800000 | ( a.high>>41 ) ); + else + return float32_default_nan; +} + +/*---------------------------------------------------------------------------- +| Takes two single-precision floating-point values `a' and `b', one of which +| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +| signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static float32 propagateFloat32NaN( float32 a, float32 b STATUS_PARAM) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + bits32 av, bv, res; + + if ( STATUS(default_nan_mode) ) + return float32_default_nan; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + av = float32_val(a); + bv = float32_val(b); +#if SNAN_BIT_IS_ONE + av &= ~0x00400000; + bv &= ~0x00400000; +#else + av |= 0x00400000; + bv |= 0x00400000; +#endif + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR); + if ( aIsSignalingNaN ) { + if ( bIsSignalingNaN ) goto returnLargerSignificand; + res = bIsNaN ? bv : av; + } + else if ( aIsNaN ) { + if ( bIsSignalingNaN || ! bIsNaN ) + res = av; + else { + returnLargerSignificand: + if ( (bits32) ( av<<1 ) < (bits32) ( bv<<1 ) ) + res = bv; + else if ( (bits32) ( bv<<1 ) < (bits32) ( av<<1 ) ) + res = av; + else + res = ( av < bv ) ? av : bv; + } + } + else { + res = bv; + } + return make_float32(res); +} + +/*---------------------------------------------------------------------------- +| The pattern for a default generated double-precision NaN. +*----------------------------------------------------------------------------*/ +#if defined(TARGET_SPARC) +#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF )) +#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) +#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 )) +#elif defined(TARGET_HPPA) +#define float64_default_nan make_float64(LIT64( 0x7FF4000000000000 )) +#elif SNAN_BIT_IS_ONE +#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF )) +#else +#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 )) +#endif + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is a quiet +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int float64_is_nan( float64 a_ ) +{ + bits64 a = float64_val(a_); +#if SNAN_BIT_IS_ONE + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); +#else + return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int float64_is_signaling_nan( float64 a_ ) +{ + bits64 a = float64_val(a_); +#if SNAN_BIT_IS_ONE + return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) ); +#else + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +static commonNaNT float64ToCommonNaN( float64 a STATUS_PARAM) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR); + z.sign = float64_val(a)>>63; + z.low = 0; + z.high = float64_val(a)<<12; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the double- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +static float64 commonNaNToFloat64( commonNaNT a ) +{ + bits64 mantissa = a.high>>12; + + if ( mantissa ) + return make_float64( + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF0000000000000 ) + | ( a.high>>12 )); + else + return float64_default_nan; +} + +/*---------------------------------------------------------------------------- +| Takes two double-precision floating-point values `a' and `b', one of which +| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +| signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static float64 propagateFloat64NaN( float64 a, float64 b STATUS_PARAM) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + bits64 av, bv, res; + + if ( STATUS(default_nan_mode) ) + return float64_default_nan; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + av = float64_val(a); + bv = float64_val(b); +#if SNAN_BIT_IS_ONE + av &= ~LIT64( 0x0008000000000000 ); + bv &= ~LIT64( 0x0008000000000000 ); +#else + av |= LIT64( 0x0008000000000000 ); + bv |= LIT64( 0x0008000000000000 ); +#endif + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR); + if ( aIsSignalingNaN ) { + if ( bIsSignalingNaN ) goto returnLargerSignificand; + res = bIsNaN ? bv : av; + } + else if ( aIsNaN ) { + if ( bIsSignalingNaN || ! bIsNaN ) + res = av; + else { + returnLargerSignificand: + if ( (bits64) ( av<<1 ) < (bits64) ( bv<<1 ) ) + res = bv; + else if ( (bits64) ( bv<<1 ) < (bits64) ( av<<1 ) ) + res = av; + else + res = ( av < bv ) ? av : bv; + } + } + else { + res = bv; + } + return make_float64(res); +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. The +| `high' and `low' values hold the most- and least-significant bits, +| respectively. +*----------------------------------------------------------------------------*/ +#if SNAN_BIT_IS_ONE +#define floatx80_default_nan_high 0x7FFF +#define floatx80_default_nan_low LIT64( 0xBFFFFFFFFFFFFFFF ) +#else +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xC000000000000000 ) +#endif + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is a +| quiet NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int floatx80_is_nan( floatx80 a ) +{ +#if SNAN_BIT_IS_ONE + bits64 aLow; + + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); +#else + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int floatx80_is_signaling_nan( floatx80 a ) +{ +#if SNAN_BIT_IS_ONE + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); +#else + bits64 aLow; + + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +| invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static commonNaNT floatx80ToCommonNaN( floatx80 a STATUS_PARAM) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the extended +| double-precision floating-point format. +*----------------------------------------------------------------------------*/ + +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + if (a.high) + z.low = a.high; + else + z.low = floatx80_default_nan_low; + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; +} + +/*---------------------------------------------------------------------------- +| Takes two extended double-precision floating-point values `a' and `b', one +| of which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + if ( STATUS(default_nan_mode) ) { + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); +#if SNAN_BIT_IS_ONE + a.low &= ~LIT64( 0xC000000000000000 ); + b.low &= ~LIT64( 0xC000000000000000 ); +#else + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); +#endif + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR); + if ( aIsSignalingNaN ) { + if ( bIsSignalingNaN ) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if ( aIsNaN ) { + if ( bIsSignalingNaN || ! bIsNaN ) return a; + returnLargerSignificand: + if ( a.low < b.low ) return b; + if ( b.low < a.low ) return a; + return ( a.high < b.high ) ? a : b; + } + else { + return b; + } +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| The pattern for a default generated quadruple-precision NaN. The `high' and +| `low' values hold the most- and least-significant bits, respectively. +*----------------------------------------------------------------------------*/ +#if SNAN_BIT_IS_ONE +#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF ) +#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) +#else +#define float128_default_nan_high LIT64( 0xFFFF800000000000 ) +#define float128_default_nan_low LIT64( 0x0000000000000000 ) +#endif + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is a quiet +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int float128_is_nan( float128 a ) +{ +#if SNAN_BIT_IS_ONE + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); +#else + return + ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +int float128_is_signaling_nan( float128 a ) +{ +#if SNAN_BIT_IS_ONE + return + ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); +#else + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); +#endif +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +static commonNaNT float128ToCommonNaN( float128 a STATUS_PARAM) +{ + commonNaNT z; + + if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR); + z.sign = a.high>>63; + shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the quadruple- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +static float128 commonNaNToFloat128( commonNaNT a ) +{ + float128 z; + + shift128Right( a.high, a.low, 16, &z.high, &z.low ); + z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF000000000000 ); + return z; +} + +/*---------------------------------------------------------------------------- +| Takes two quadruple-precision floating-point values `a' and `b', one of +| which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static float128 propagateFloat128NaN( float128 a, float128 b STATUS_PARAM) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + if ( STATUS(default_nan_mode) ) { + a.low = float128_default_nan_low; + a.high = float128_default_nan_high; + return a; + } + + aIsNaN = float128_is_nan( a ); + aIsSignalingNaN = float128_is_signaling_nan( a ); + bIsNaN = float128_is_nan( b ); + bIsSignalingNaN = float128_is_signaling_nan( b ); +#if SNAN_BIT_IS_ONE + a.high &= ~LIT64( 0x0000800000000000 ); + b.high &= ~LIT64( 0x0000800000000000 ); +#else + a.high |= LIT64( 0x0000800000000000 ); + b.high |= LIT64( 0x0000800000000000 ); +#endif + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR); + if ( aIsSignalingNaN ) { + if ( bIsSignalingNaN ) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if ( aIsNaN ) { + if ( bIsSignalingNaN || ! bIsNaN ) return a; + returnLargerSignificand: + if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b; + if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a; + return ( a.high < b.high ) ? a : b; + } + else { + return b; + } +} + +#endif diff --git a/src/recompiler/fpu/softfloat.c b/src/recompiler/fpu/softfloat.c new file mode 100644 index 00000000..6806ebc2 --- /dev/null +++ b/src/recompiler/fpu/softfloat.c @@ -0,0 +1,5883 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. + +=============================================================================*/ + +/* FIXME: Flush-To-Zero only effects results. Denormal inputs should also + be flushed to zero. */ +#include "softfloat.h" + +/*---------------------------------------------------------------------------- +| Primitive arithmetic functions, including multi-word arithmetic, and +| division and square root approximations. (Can be specialized to target if +| desired.) +*----------------------------------------------------------------------------*/ +#include "softfloat-macros.h" + +/*---------------------------------------------------------------------------- +| Functions and definitions to determine: (1) whether tininess for underflow +| is detected before or after rounding by default, (2) what (if anything) +| happens when exceptions are raised, (3) how signaling NaNs are distinguished +| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +| are propagated from function inputs to output. These details are target- +| specific. +*----------------------------------------------------------------------------*/ +#include "softfloat-specialize.h" + +void set_float_rounding_mode(int val STATUS_PARAM) +{ + STATUS(float_rounding_mode) = val; +} + +void set_float_exception_flags(int val STATUS_PARAM) +{ + STATUS(float_exception_flags) = val; +} + +#ifdef FLOATX80 +void set_floatx80_rounding_precision(int val STATUS_PARAM) +{ + STATUS(floatx80_rounding_precision) = val; +} +#endif + +/*---------------------------------------------------------------------------- +| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 +| and 7, and returns the properly rounded 32-bit integer corresponding to the +| input. If `zSign' is 1, the input is negated before being converted to an +| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input +| is simply rounded to an integer, with the inexact exception raised if the +| input cannot be represented exactly as an integer. However, if the fixed- +| point input is too large, the invalid exception is raised and the largest +| positive or negative integer is returned. +*----------------------------------------------------------------------------*/ + +static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + int32 z; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_raise( float_flag_invalid STATUS_VAR); + return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; + return z; + +} + +/*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit integer corresponding to the input. +| If `zSign' is 1, the input is negated before being converted to an integer. +| Ordinarily, the fixed-point input is simply rounded to an integer, with +| the inexact exception raised if the input cannot be represented exactly as +| an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the largest positive or negative integer is +| returned. +*----------------------------------------------------------------------------*/ + +static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven, increment; + int64 z; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = ( roundingMode == float_round_nearest_even ); + increment = ( (sbits64) absZ1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && absZ1; + } + else { + increment = ( roundingMode == float_round_up ) && absZ1; + } + } + } + if ( increment ) { + ++absZ0; + if ( absZ0 == 0 ) goto overflow; + absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); + } + z = absZ0; + if ( zSign ) z = - z; + if ( z && ( ( z < 0 ) ^ zSign ) ) { + overflow: + float_raise( float_flag_invalid STATUS_VAR); + return + zSign ? (sbits64) LIT64( 0x8000000000000000 ) + : LIT64( 0x7FFFFFFFFFFFFFFF ); + } + if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the single-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return float32_val(a) & 0x007FFFFF; + +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the single-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( float32_val(a)>>23 ) & 0xFF; + +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the single-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE flag extractFloat32Sign( float32 a ) +{ + + return float32_val(a)>>31; + +} + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal single-precision floating-point value represented +| by the denormalized significand `aSig'. The normalized exponent and +| significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| single-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + + return make_float32( + ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig); + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper single-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the single-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal single- +| precision floating-point number. +| The input significand `zSig' has its binary point between bits 30 +| and 29, which is 7 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + flag isTiny; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x7F; + if ( 0xFD <= (bits16) zExp ) { + if ( ( 0xFD < zExp ) + || ( ( zExp == 0xFD ) + && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 )); + } + if ( zExp < 0 ) { + if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 ); + isTiny = + ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); + shift32RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x7F; + if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR); + } + } + if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper single-precision floating- +| point value corresponding to the abstract input. This routine is just like +| `roundAndPackFloat32' except that `zSig' does not have to be normalized. +| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +| floating-point exponent. +*----------------------------------------------------------------------------*/ + +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR); + +} + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the double-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE bits64 extractFloat64Frac( float64 a ) +{ + + return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF ); + +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the double-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE int16 extractFloat64Exp( float64 a ) +{ + + return ( float64_val(a)>>52 ) & 0x7FF; + +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the double-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE flag extractFloat64Sign( float64 a ) +{ + + return float64_val(a)>>63; + +} + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal double-precision floating-point value represented +| by the denormalized significand `aSig'. The normalized exponent and +| significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +static void + normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ) - 11; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| double-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + + return make_float64( + ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig); + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper double-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the double-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded +| to a subnormal number, and the underflow and inexact exceptions are raised +| if the abstract input cannot be represented exactly as a subnormal double- +| precision floating-point number. +| The input significand `zSig' has its binary point between bits 62 +| and 61, which is 10 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven; + int16 roundIncrement, roundBits; + flag isTiny; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x200; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x3FF; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x3FF; + if ( 0x7FD <= (bits16) zExp ) { + if ( ( 0x7FD < zExp ) + || ( ( zExp == 0x7FD ) + && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 )); + } + if ( zExp < 0 ) { + if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 ); + isTiny = + ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); + shift64RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x3FF; + if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR); + } + } + if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>10; + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat64( zSign, zExp, zSig ); + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper double-precision floating- +| point value corresponding to the abstract input. This routine is just like +| `roundAndPackFloat64' except that `zSig' does not have to be normalized. +| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +| floating-point exponent. +*----------------------------------------------------------------------------*/ + +static float64 + normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( zSig ) - 1; + return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR); + +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +INLINE bits64 extractFloatx80Frac( floatx80 a ) +{ + + return a.low; + +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +INLINE int32 extractFloatx80Exp( floatx80 a ) +{ + + return a.high & 0x7FFF; + +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the extended double-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +INLINE flag extractFloatx80Sign( floatx80 a ) +{ + + return a.high>>15; + +} + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal extended double-precision floating-point value +| represented by the denormalized significand `aSig'. The normalized exponent +| and significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +static void + normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ); + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an +| extended double-precision floating-point value, returning the result. +*----------------------------------------------------------------------------*/ + +INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) +{ + floatx80 z; + + z.low = zSig; + z.high = ( ( (bits16) zSign )<<15 ) + zExp; + return z; + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and extended significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. Ordinarily, the abstract value is +| rounded and packed into the extended double-precision format, with the +| inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal extended +| double-precision floating-point number. +| If `roundingPrecision' is 32 or 64, the result is rounded to the same +| number of bits as single or double precision, respectively. Otherwise, the +| result is rounded to the full precision of the extended double-precision +| format. +| The input significand must be normalized or smaller. If the input +| significand is not normalized, `zExp' must be 0; in that case, the result +| returned is a subnormal number, and it must not require rounding. The +| handling of underflow and overflow follows the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static floatx80 + roundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + int64 roundIncrement, roundMask, roundBits; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } + else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } + else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + goto overflow; + } + if ( zExp <= 0 ) { + if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 ); + isTiny = + ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ( zSig0 <= zSig0 + roundIncrement ); + shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); + zExp = 0; + roundBits = zSig0 & roundMask; + if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR); + if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; + zSig0 += roundIncrement; + if ( (sbits64) zSig0 < 0 ) zExp = 1; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + precision80: + increment = ( (sbits64) zSig1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + } + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) + && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) + && increment + ) + ) { + roundMask = 0; + overflow: + float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( zExp <= 0 ) { + isTiny = + ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ! increment + || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); + shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); + zExp = 0; + if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR); + if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; + if ( roundNearestEven ) { + increment = ( (sbits64) zSig1 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + if ( increment ) { + ++zSig0; + zSig0 &= + ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + if ( (sbits64) zSig0 < 0 ) zExp = 1; + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + else { + zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + } + } + else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent +| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. This routine is just like +| `roundAndPackFloatx80' except that the input significand does not have to be +| normalized. +*----------------------------------------------------------------------------*/ + +static floatx80 + normalizeRoundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + STATUS_PARAM) +{ + int8 shiftCount; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ); + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + zExp -= shiftCount; + return + roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR); + +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the least-significant 64 fraction bits of the quadruple-precision +| floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE bits64 extractFloat128Frac1( float128 a ) +{ + + return a.low; + +} + +/*---------------------------------------------------------------------------- +| Returns the most-significant 48 fraction bits of the quadruple-precision +| floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE bits64 extractFloat128Frac0( float128 a ) +{ + + return a.high & LIT64( 0x0000FFFFFFFFFFFF ); + +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the quadruple-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +INLINE int32 extractFloat128Exp( float128 a ) +{ + + return ( a.high>>48 ) & 0x7FFF; + +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the quadruple-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +INLINE flag extractFloat128Sign( float128 a ) +{ + + return a.high>>63; + +} + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal quadruple-precision floating-point value +| represented by the denormalized significand formed by the concatenation of +| `aSig0' and `aSig1'. The normalized exponent is stored at the location +| pointed to by `zExpPtr'. The most significant 49 bits of the normalized +| significand are stored at the location pointed to by `zSig0Ptr', and the +| least significant 64 bits of the normalized significand are stored at the +| location pointed to by `zSig1Ptr'. +*----------------------------------------------------------------------------*/ + +static void + normalizeFloat128Subnormal( + bits64 aSig0, + bits64 aSig1, + int32 *zExpPtr, + bits64 *zSig0Ptr, + bits64 *zSig1Ptr + ) +{ + int8 shiftCount; + + if ( aSig0 == 0 ) { + shiftCount = countLeadingZeros64( aSig1 ) - 15; + if ( shiftCount < 0 ) { + *zSig0Ptr = aSig1>>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 63 ); + } + else { + *zSig0Ptr = aSig1<<shiftCount; + *zSig1Ptr = 0; + } + *zExpPtr = - shiftCount - 63; + } + else { + shiftCount = countLeadingZeros64( aSig0 ) - 15; + shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); + *zExpPtr = 1 - shiftCount; + } + +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', the exponent `zExp', and the significand formed +| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision +| floating-point value, returning the result. After being shifted into the +| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply +| added together to form the most significant 32 bits of the result. This +| means that any integer portion of `zSig0' will be added into the exponent. +| Since a properly normalized significand will have an integer portion equal +| to 1, the `zExp' input should be 1 less than the desired result exponent +| whenever `zSig0' and `zSig1' concatenated form a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +INLINE float128 + packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) +{ + float128 z; + + z.low = zSig1; + z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; + return z; + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and extended significand formed by the concatenation of `zSig0', `zSig1', +| and `zSig2', and returns the proper quadruple-precision floating-point value +| corresponding to the abstract input. Ordinarily, the abstract value is +| simply rounded and packed into the quadruple-precision format, with the +| inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal quadruple- +| precision floating-point number. +| The input significand must be normalized or smaller. If the input +| significand is not normalized, `zExp' must be 0; in that case, the result +| returned is a subnormal number, and it must not require rounding. In the +| usual case that the input significand is normalized, `zExp' must be 1 less +| than the ``true'' floating-point exponent. The handling of underflow and +| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float128 + roundAndPackFloat128( + flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = ( roundingMode == float_round_nearest_even ); + increment = ( (sbits64) zSig2 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + if ( 0x7FFD <= (bits32) zExp ) { + if ( ( 0x7FFD < zExp ) + || ( ( zExp == 0x7FFD ) + && eq128( + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ), + zSig0, + zSig1 + ) + && increment + ) + ) { + float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return + packFloat128( + zSign, + 0x7FFE, + LIT64( 0x0000FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( zExp < 0 ) { + if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 ); + isTiny = + ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ! increment + || lt128( + zSig0, + zSig1, + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); + zExp = 0; + if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR); + if ( roundNearestEven ) { + increment = ( (sbits64) zSig2 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + } + if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact; + if ( increment ) { + add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); + zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + } + else { + if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; + } + return packFloat128( zSign, zExp, zSig0, zSig1 ); + +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand formed by the concatenation of `zSig0' and `zSig1', and +| returns the proper quadruple-precision floating-point value corresponding +| to the abstract input. This routine is just like `roundAndPackFloat128' +| except that the input significand has fewer bits and does not have to be +| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- +| point exponent. +*----------------------------------------------------------------------------*/ + +static float128 + normalizeRoundAndPackFloat128( + flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM) +{ + int8 shiftCount; + bits64 zSig2; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ) - 15; + if ( 0 <= shiftCount ) { + zSig2 = 0; + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + } + else { + shift128ExtraRightJamming( + zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); + } + zExp -= shiftCount; + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR); + +} + +#endif + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the single-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 int32_to_float32( int32 a STATUS_PARAM ) +{ + flag zSign; + + if ( a == 0 ) return float32_zero; + if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); + zSign = ( a < 0 ); + return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the double-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 int32_to_float64( int32 a STATUS_PARAM ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return float64_zero; + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 21; + zSig = absA; + return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount ); + +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 int32_to_floatx80( int32 a STATUS_PARAM ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 32; + zSig = absA; + return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); + +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' to +| the quadruple-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 int32_to_float128( int32 a STATUS_PARAM ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig0; + + if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 17; + zSig0 = absA; + return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); + +} + +#endif + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit two's complement integer `a' +| to the single-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 int64_to_float32( int64 a STATUS_PARAM ) +{ + flag zSign; + uint64 absA; + int8 shiftCount; + + if ( a == 0 ) return float32_zero; + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros64( absA ) - 40; + if ( 0 <= shiftCount ) { + return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount ); + } + else { + shiftCount += 7; + if ( shiftCount < 0 ) { + shift64RightJamming( absA, - shiftCount, &absA ); + } + else { + absA <<= shiftCount; + } + return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR ); + } + +} + +float32 uint64_to_float32( uint64 a STATUS_PARAM ) +{ + int8 shiftCount; + + if ( a == 0 ) return float32_zero; + shiftCount = countLeadingZeros64( a ) - 40; + if ( 0 <= shiftCount ) { + return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount ); + } + else { + shiftCount += 7; + if ( shiftCount < 0 ) { + shift64RightJamming( a, - shiftCount, &a ); + } + else { + a <<= shiftCount; + } + return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR ); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit two's complement integer `a' +| to the double-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 int64_to_float64( int64 a STATUS_PARAM ) +{ + flag zSign; + + if ( a == 0 ) return float64_zero; + if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) { + return packFloat64( 1, 0x43E, 0 ); + } + zSign = ( a < 0 ); + return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR ); + +} + +float64 uint64_to_float64( uint64 a STATUS_PARAM ) +{ + if ( a == 0 ) return float64_zero; + return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR ); + +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit two's complement integer `a' +| to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 int64_to_floatx80( int64 a STATUS_PARAM ) +{ + flag zSign; + uint64 absA; + int8 shiftCount; + + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros64( absA ); + return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount ); + +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit two's complement integer `a' to +| the quadruple-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 int64_to_float128( int64 a STATUS_PARAM ) +{ + flag zSign; + uint64 absA; + int8 shiftCount; + int32 zExp; + bits64 zSig0, zSig1; + + if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros64( absA ) + 49; + zExp = 0x406E - shiftCount; + if ( 64 <= shiftCount ) { + zSig1 = 0; + zSig0 = absA; + shiftCount -= 64; + } + else { + zSig1 = absA; + zSig0 = 0; + } + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + return packFloat128( zSign, zExp, zSig0, zSig1 ); + +} + +#endif + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. Otherwise, if the conversion overflows, the +| largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int32 float32_to_int32( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 aSig64; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( ( aExp == 0xFF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= 0x00800000; + shiftCount = 0xAF - aExp; + aSig64 = aSig; + aSig64 <<= 32; + if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); + return roundAndPackInt32( aSign, aSig64 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN, the largest positive integer is returned. Otherwise, if +| the conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + int32 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( float32_val(a) != 0xCF000000 ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + } + return (sbits32) 0x80000000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact; + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + if ( aSign ) z = - z; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. Otherwise, if the conversion overflows, the +| largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int64 float32_to_int64( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 aSig64, aSigExtra; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = 0xBE - aExp; + if ( shiftCount < 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + if ( aExp ) aSig |= 0x00800000; + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); + return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. If +| `a' is a NaN, the largest positive integer is returned. Otherwise, if the +| conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 aSig64; + int64 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0xBE; + if ( 0 <= shiftCount ) { + if ( float32_val(a) != 0xDF000000 ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact; + return 0; + } + aSig64 = aSig | 0x00800000; + aSig64 <<= 40; + z = aSig64>>( - shiftCount ); + if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + if ( aSign ) z = - z; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the double-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float32_to_float64( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR )); + return packFloat64( aSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); + +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 float32_to_floatx80( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + aSig |= 0x00800000; + return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); + +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the double-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float32_to_float128( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); + +} + +#endif + +/*---------------------------------------------------------------------------- +| Rounds the single-precision floating-point value `a' to an integer, and +| returns the result as a single-precision floating-point value. The +| operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_round_to_int( float32 a STATUS_PARAM) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + bits32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a STATUS_VAR ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a; + STATUS(float_exception_flags) |= float_flag_inexact; + aSign = extractFloat32Sign( a ); + switch ( STATUS(float_rounding_mode) ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_down: + return make_float32(aSign ? 0xBF800000 : 0); + case float_round_up: + return make_float32(aSign ? 0x80000000 : 0x3F800000); + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = float32_val(a); + roundingMode = STATUS(float_rounding_mode); + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact; + return make_float32(z); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the single-precision +| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +| before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + return a; + } + if ( aExp == 0 ) { + if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 ); + return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + } + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the single- +| precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the single-precision floating-point values `a' +| and `b'. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_add( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign STATUS_VAR); + } + else { + return subFloat32Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the single-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_sub( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign STATUS_VAR ); + } + else { + return addFloat32Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the single-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_mul( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig; + bits64 zSig64; + bits32 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b STATUS_VAR ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); + zSig = zSig64; + if ( 0 <= (sbits32) ( zSig<<1 ) ) { + zSig <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the single-precision floating-point value `a' +| by the corresponding value `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_div( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + float_raise( float_flag_divbyzero STATUS_VAR); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = ( ( (bits64) aSig )<<32 ) / bSig; + if ( ( zSig & 0x3F ) == 0 ) { + zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 ); + } + return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the single-precision floating-point value `a' +| with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_rem( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig; + bits32 q; + bits64 aSig64, bSig64, q64; + bits32 alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b STATUS_VAR ); + } + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig |= 0x00800000; + bSig |= 0x00800000; + if ( expDiff < 32 ) { + aSig <<= 8; + bSig <<= 8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + if ( 0 < expDiff ) { + q = ( ( (bits64) aSig )<<32 ) / bSig; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + } + else { + if ( bSig <= aSig ) aSig -= bSig; + aSig64 = ( (bits64) aSig )<<40; + bSig64 = ( (bits64) bSig )<<40; + expDiff -= 64; + while ( 0 < expDiff ) { + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + aSig64 = - ( ( bSig * q64 )<<38 ); + expDiff -= 62; + } + expDiff += 64; + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + q = q64>>( 64 - expDiff ); + bSig <<= 6; + aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the single-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_sqrt( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig; + bits64 rem, term; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return float32_zero; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0x7FFFFFFF; + goto roundAndPack; + } + aSig >>= aExp & 1; + term = ( (bits64) zSig ) * zSig; + rem = ( ( (bits64) aSig )<<32 ) - term; + while ( (sbits64) rem < 0 ) { + --zSig; + rem += ( ( (bits64) zSig )<<1 ) | 1; + } + zSig |= ( rem != 0 ); + } + shift32RightJamming( zSig, 1, &zSig ); + roundAndPack: + return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the binary exponential of the single-precision floating-point value +| `a'. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +| +| Uses the following identities: +| +| 1. ------------------------------------------------------------------------- +| x x*ln(2) +| 2 = e +| +| 2. ------------------------------------------------------------------------- +| 2 3 4 5 n +| x x x x x x x +| e = 1 + --- + --- + --- + --- + --- + ... + --- + ... +| 1! 2! 3! 4! 5! n! +*----------------------------------------------------------------------------*/ + +static const float64 float32_exp2_coefficients[15] = +{ + make_float64( 0x3ff0000000000000ll ), /* 1 */ + make_float64( 0x3fe0000000000000ll ), /* 2 */ + make_float64( 0x3fc5555555555555ll ), /* 3 */ + make_float64( 0x3fa5555555555555ll ), /* 4 */ + make_float64( 0x3f81111111111111ll ), /* 5 */ + make_float64( 0x3f56c16c16c16c17ll ), /* 6 */ + make_float64( 0x3f2a01a01a01a01all ), /* 7 */ + make_float64( 0x3efa01a01a01a01all ), /* 8 */ + make_float64( 0x3ec71de3a556c734ll ), /* 9 */ + make_float64( 0x3e927e4fb7789f5cll ), /* 10 */ + make_float64( 0x3e5ae64567f544e4ll ), /* 11 */ + make_float64( 0x3e21eed8eff8d898ll ), /* 12 */ + make_float64( 0x3de6124613a86d09ll ), /* 13 */ + make_float64( 0x3da93974a8c07c9dll ), /* 14 */ + make_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ +}; + +float32 float32_exp2( float32 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + float64 r, x, xn; + int i; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + + if ( aExp == 0xFF) { + if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR ); + return (aSign) ? float32_zero : a; + } + if (aExp == 0) { + if (aSig == 0) return float32_one; + } + + float_raise( float_flag_inexact STATUS_VAR); + + /* ******************************* */ + /* using float64 for approximation */ + /* ******************************* */ + x = float32_to_float64(a STATUS_VAR); + x = float64_mul(x, float64_ln2 STATUS_VAR); + + xn = x; + r = float64_one; + for (i = 0 ; i < 15 ; i++) { + float64 f; + + f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR); + r = float64_add(r, f STATUS_VAR); + + xn = float64_mul(xn, x STATUS_VAR); + } + + return float64_to_float32(r, status); +} + +/*---------------------------------------------------------------------------- +| Returns the binary log of the single-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ +float32 float32_log2( float32 a STATUS_PARAM ) +{ + flag aSign, zSign; + int16 aExp; + bits32 aSig, zSig, i; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( aSign ) { + float_raise( float_flag_invalid STATUS_VAR); + return float32_default_nan; + } + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR ); + return a; + } + + aExp -= 0x7F; + aSig |= 0x00800000; + zSign = aExp < 0; + zSig = aExp << 23; + + for (i = 1 << 22; i > 0; i >>= 1) { + aSig = ( (bits64)aSig * aSig ) >> 23; + if ( aSig & 0x01000000 ) { + aSig >>= 1; + zSig |= i; + } + } + + if ( zSign ) + zSig = -zSig; + + return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR ); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_eq( float32 a, float32 b STATUS_PARAM ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + return ( float32_val(a) == float32_val(b) ) || + ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is less than +| or equal to the corresponding value `b', and 0 otherwise. The comparison +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_le( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits32 av, bv; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_lt( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits32 av, bv; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. Otherwise, the comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_eq_signaling( float32 a, float32 b STATUS_PARAM ) +{ + bits32 av, bv; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + av = float32_val(a); + bv = float32_val(b); + return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is less than or +| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +| cause an exception. Otherwise, the comparison is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_le_quiet( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits32 av, bv; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +| exception. Otherwise, the comparison is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_lt_quiet( float32 a, float32 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits32 av, bv; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. Otherwise, if the conversion overflows, the +| largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int32 float64_to_int32( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN, the largest positive integer is returned. Otherwise, if +| the conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FF ) { + if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact; + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x433 - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. Otherwise, if the conversion overflows, the +| largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int64 float64_to_int64( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, aSigExtra; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x433 - aExp; + if ( shiftCount <= 0 ) { + if ( 0x43E < aExp ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign + || ( ( aExp == 0x7FF ) + && ( aSig != LIT64( 0x0010000000000000 ) ) ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + aSigExtra = 0; + aSig <<= - shiftCount; + } + else { + shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); + } + return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN, the largest positive integer is returned. Otherwise, if +| the conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + int64 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = aExp - 0x433; + if ( 0 <= shiftCount ) { + if ( 0x43E <= aExp ) { + if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign + || ( ( aExp == 0x7FF ) + && ( aSig != LIT64( 0x0010000000000000 ) ) ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + z = aSig<<shiftCount; + } + else { + if ( aExp < 0x3FE ) { + if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact; + return 0; + } + z = aSig>>( - shiftCount ); + if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + } + if ( aSign ) z = - z; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the single-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float64_to_float32( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + bits32 zSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) ); + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 22, &aSig ); + zSig = aSig; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x381; + } + return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR ); + +} + + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| half-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ +static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig) +{ + return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig; +} + +/* Half precision floats come in two formats: standard IEEE and "ARM" format. + The latter gains extra exponent range by omitting the NaN/Inf encodings. */ + +float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSign = a >> 15; + aExp = (a >> 10) & 0x1f; + aSig = a & 0x3ff; + + if (aExp == 0x1f && ieee) { + if (aSig) { + /* Make sure correct exceptions are raised. */ + float32ToCommonNaN(a STATUS_VAR); + aSig |= 0x200; + } + return packFloat32(aSign, 0xff, aSig << 13); + } + if (aExp == 0) { + int8 shiftCount; + + if (aSig == 0) { + return packFloat32(aSign, 0, 0); + } + + shiftCount = countLeadingZeros32( aSig ) - 21; + aSig = aSig << shiftCount; + aExp = -shiftCount; + } + return packFloat32( aSign, aExp + 0x70, aSig << 13); +} + +bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM) +{ + flag aSign; + int16 aExp; + bits32 aSig; + bits32 mask; + bits32 increment; + int8 roundingMode; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if (aSig) { + /* Make sure correct exceptions are raised. */ + float32ToCommonNaN(a STATUS_VAR); + aSig |= 0x00400000; + } + return packFloat16(aSign, 0x1f, aSig >> 13); + } + if (aExp == 0 && aSign == 0) { + return packFloat16(aSign, 0, 0); + } + /* Decimal point between bits 22 and 23. */ + aSig |= 0x00800000; + aExp -= 0x7f; + if (aExp < -14) { + mask = 0x007fffff; + if (aExp < -24) { + aExp = -25; + } else { + mask >>= 24 + aExp; + } + } else { + mask = 0x00001fff; + } + if (aSig & mask) { + float_raise( float_flag_underflow STATUS_VAR ); + roundingMode = STATUS(float_rounding_mode); + switch (roundingMode) { + case float_round_nearest_even: + increment = (mask + 1) >> 1; + if ((aSig & mask) == increment) { + increment = aSig & (increment << 1); + } + break; + case float_round_up: + increment = aSign ? 0 : mask; + break; + case float_round_down: + increment = aSign ? mask : 0; + break; + default: /* round_to_zero */ + increment = 0; + break; + } + aSig += increment; + if (aSig >= 0x01000000) { + aSig >>= 1; + aExp++; + } + } else if (aExp < -14 + && STATUS(float_detect_tininess) == float_tininess_before_rounding) { + float_raise( float_flag_underflow STATUS_VAR); + } + + if (ieee) { + if (aExp > 15) { + float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + return packFloat16(aSign, 0x1f, 0); + } + } else { + if (aExp > 16) { + float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); + return packFloat16(aSign, 0x1f, 0x3ff); + } + } + if (aExp < -24) { + return packFloat16(aSign, 0, 0); + } + if (aExp < -14) { + aSig >>= -14 - aExp; + aExp = -14; + } + return packFloat16(aSign, aExp + 14, aSig >> 13); +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 float64_to_floatx80( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the quadruple-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float64_to_float128( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); + +} + +#endif + +/*---------------------------------------------------------------------------- +| Rounds the double-precision floating-point value `a' to an integer, and +| returns the result as a double-precision floating-point value. The +| operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_round_to_int( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + bits64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { + return propagateFloat64NaN( a, a STATUS_VAR ); + } + return a; + } + if ( aExp < 0x3FF ) { + if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a; + STATUS(float_exception_flags) |= float_flag_inexact; + aSign = extractFloat64Sign( a ); + switch ( STATUS(float_rounding_mode) ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { + return packFloat64( aSign, 0x3FF, 0 ); + } + break; + case float_round_down: + return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0); + case float_round_up: + return make_float64( + aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 )); + } + return packFloat64( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x433 - aExp; + roundBitsMask = lastBitMask - 1; + z = float64_val(a); + roundingMode = STATUS(float_rounding_mode); + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != float64_val(a) ) + STATUS(float_exception_flags) |= float_flag_inexact; + return make_float64(z); + +} + +float64 float64_trunc_to_int( float64 a STATUS_PARAM) +{ + int oldmode; + float64 res; + oldmode = STATUS(float_rounding_mode); + STATUS(float_rounding_mode) = float_round_to_zero; + res = float64_round_to_int(a STATUS_VAR); + STATUS(float_rounding_mode) = oldmode; + return res; +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the double-precision +| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +| before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 9; + bSig <<= 9; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + return a; + } + if ( aExp == 0 ) { + if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 ); + return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); + } + zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= LIT64( 0x2000000000000000 ); + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits64) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the double- +| precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 10; + bSig <<= 10; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + return packFloat64( zSign ^ 1, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + bSig |= LIT64( 0x4000000000000000 ); + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + aSig |= LIT64( 0x4000000000000000 ); + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the double-precision floating-point values `a' +| and `b'. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_add( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign STATUS_VAR ); + } + else { + return subFloat64Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the double-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_sub( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign STATUS_VAR ); + } + else { + return addFloat64Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the double-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_mul( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b STATUS_VAR ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FF; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits64) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the double-precision floating-point value `a' +| by the corresponding value `b'. The operation is performed according to +| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_div( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + bits64 rem0, rem1; + bits64 term0, term1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + return packFloat64( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + float_raise( float_flag_divbyzero STATUS_VAR); + return packFloat64( zSign, 0x7FF, 0 ); + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FD; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv128To64( aSig, 0, bSig ); + if ( ( zSig & 0x1FF ) <= 2 ) { + mul64To128( bSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the double-precision floating-point value `a' +| with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_rem( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, zSign; + int16 aExp, bExp, expDiff; + bits64 aSig, bSig; + bits64 q, alternateASig; + sbits64 sigMean; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b STATUS_VAR ); + } + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits64) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits64) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the double-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_sqrt( float64 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp, zExp; + bits64 aSig, zSig, doubleZSig; + bits64 rem0, rem1, term0, term1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return float64_zero; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig |= LIT64( 0x0010000000000000 ); + zSig = estimateSqrt32( aExp, aSig>>21 ); + aSig <<= 9 - ( aExp & 1 ); + zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); + if ( ( zSig & 0x1FF ) <= 5 ) { + doubleZSig = zSig<<1; + mul64To128( zSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + doubleZSig -= 2; + add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the binary log of the double-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ +float64 float64_log2( float64 a STATUS_PARAM ) +{ + flag aSign, zSign; + int16 aExp; + bits64 aSig, aSig0, aSig1, zSig, i; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( aSign ) { + float_raise( float_flag_invalid STATUS_VAR); + return float64_default_nan; + } + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR ); + return a; + } + + aExp -= 0x3FF; + aSig |= LIT64( 0x0010000000000000 ); + zSign = aExp < 0; + zSig = (bits64)aExp << 52; + for (i = 1LL << 51; i > 0; i >>= 1) { + mul64To128( aSig, aSig, &aSig0, &aSig1 ); + aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 ); + if ( aSig & LIT64( 0x0020000000000000 ) ) { + aSig >>= 1; + zSig |= i; + } + } + + if ( zSign ) + zSig = -zSig; + return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR ); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is equal to the +| corresponding value `b', and 0 otherwise. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_eq( float64 a, float64 b STATUS_PARAM ) +{ + bits64 av, bv; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + av = float64_val(a); + bv = float64_val(b); + return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is less than or +| equal to the corresponding value `b', and 0 otherwise. The comparison is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_le( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits64 av, bv; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_lt( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits64 av, bv; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is equal to the +| corresponding value `b', and 0 otherwise. The invalid exception is raised +| if either operand is a NaN. Otherwise, the comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_eq_signaling( float64 a, float64 b STATUS_PARAM ) +{ + bits64 av, bv; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + av = float64_val(a); + bv = float64_val(b); + return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is less than or +| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +| cause an exception. Otherwise, the comparison is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_le_quiet( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits64 av, bv; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +| exception. Otherwise, the comparison is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_lt_quiet( float64 a, float64 b STATUS_PARAM ) +{ + flag aSign, bSign; + bits64 av, bv; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, the +| largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int32 floatx80_to_int32( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( 0x401E < aExp ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FFF ) { + if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact; + return 0; + } + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, +| the largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int64 floatx80_to_int64( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig, aSigExtra; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount <= 0 ) { + if ( shiftCount ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign + || ( ( aExp == 0x7FFF ) + && ( aSig != LIT64( 0x8000000000000000 ) ) ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + aSigExtra = 0; + } + else { + shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); + } + return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig; + int64 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = aExp - 0x403E; + if ( 0 <= shiftCount ) { + aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); + if ( ( a.high != 0xC03E ) || aSig ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + else if ( aExp < 0x3FFF ) { + if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact; + return 0; + } + z = aSig>>( - shiftCount ); + if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + if ( aSign ) z = - z; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the single-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 floatx80_to_float32( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 33, &aSig ); + if ( aExp || aSig ) aExp -= 0x3F81; + return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the double-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 floatx80_to_float64( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 aSig, zSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shift64RightJamming( aSig, 1, &zSig ); + if ( aExp || aSig ) aExp -= 0x3C01; + return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR ); + +} + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the quadruple-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 floatx80_to_float128( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) ); + } + shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp, zSig0, zSig1 ); + +} + +#endif + +/*---------------------------------------------------------------------------- +| Rounds the extended double-precision floating-point value `a' to an integer, +| and returns the result as an extended quadruple-precision floating-point +| value. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + floatx80 z; + + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaN( a, a STATUS_VAR ); + } + return a; + } + if ( aExp < 0x3FFF ) { + if ( ( aExp == 0 ) + && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { + return a; + } + STATUS(float_exception_flags) |= float_flag_inexact; + aSign = extractFloatx80Sign( a ); + switch ( STATUS(float_rounding_mode) ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = STATUS(float_rounding_mode); + if ( roundingMode == float_round_nearest_even ) { + z.low += lastBitMask>>1; + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z.low += roundBitsMask; + } + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the extended double- +| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is +| negated before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b STATUS_VAR ); + } + return a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + goto roundAndPack; + } + zExp = aExp; + goto shiftRight1; + } + zSig0 = aSig + bSig; + if ( (sbits64) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return + roundAndPackFloatx80( + STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the extended +| double-precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b STATUS_VAR ); + } + float_raise( float_flag_invalid STATUS_VAR); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return + normalizeRoundAndPackFloatx80( + STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the extended double-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs( a, b, aSign STATUS_VAR ); + } + else { + return subFloatx80Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs( a, b, aSign STATUS_VAR ); + } + else { + return addFloatx80Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b STATUS_VAR ); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (sbits64) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return + roundAndPackFloatx80( + STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the extended double-precision floating-point +| value `a' by the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + bits64 rem0, rem1, rem2, term0, term1, term2; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero STATUS_VAR); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (bits64) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return + roundAndPackFloatx80( + STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig; + bits64 q, term0, term1, alternateASig0, alternateASig1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b STATUS_VAR ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( (bits64) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the extended double-precision floating-point +| value `a'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); + doubleZSig0 = zSig0<<1; + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( doubleZSig0, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift128Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= doubleZSig0; + return + roundAndPackFloatx80( + STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is +| equal to the corresponding value `b', and 0 otherwise. The comparison is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is +| less than or equal to the corresponding value `b', and 0 otherwise. The +| comparison is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is +| less than the corresponding value `b', and 0 otherwise. The comparison +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is equal +| to the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. Otherwise, the comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is less +| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs +| do not cause an exception. Otherwise, the comparison is performed according +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is less +| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause +| an exception. Otherwise, the comparison is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the 32-bit two's complement integer format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. Otherwise, if the conversion overflows, the +| largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int32 float128_to_int32( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + aSig0 |= ( aSig1 != 0 ); + shiftCount = 0x4028 - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); + return roundAndPackInt32( aSign, aSig0 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the 32-bit two's complement integer format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. If +| `a' is a NaN, the largest positive integer is returned. Otherwise, if the +| conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1, savedASig; + int32 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + aSig0 |= ( aSig1 != 0 ); + if ( 0x401E < aExp ) { + if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FFF ) { + if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact; + return 0; + } + aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = 0x402F - aExp; + savedASig = aSig0; + aSig0 >>= shiftCount; + z = aSig0; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig0<<shiftCount ) != savedASig ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the 64-bit two's complement integer format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. Otherwise, if the conversion overflows, the +| largest integer with the same sign as `a' is returned. +*----------------------------------------------------------------------------*/ + +int64 float128_to_int64( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = 0x402F - aExp; + if ( shiftCount <= 0 ) { + if ( 0x403E < aExp ) { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign + || ( ( aExp == 0x7FFF ) + && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) + ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); + } + else { + shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); + } + return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the 64-bit two's complement integer format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN, the largest positive integer is returned. Otherwise, if +| the conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1; + int64 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = aExp - 0x402F; + if ( 0 < shiftCount ) { + if ( 0x403E <= aExp ) { + aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); + if ( ( a.high == LIT64( 0xC03E000000000000 ) ) + && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { + if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; + } + else { + float_raise( float_flag_invalid STATUS_VAR); + if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (sbits64) LIT64( 0x8000000000000000 ); + } + z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); + if ( (bits64) ( aSig1<<shiftCount ) ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + } + else { + if ( aExp < 0x3FFF ) { + if ( aExp | aSig0 | aSig1 ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return 0; + } + z = aSig0>>( - shiftCount ); + if ( aSig1 + || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + } + if ( aSign ) z = - z; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the single-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float128_to_float32( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + bits32 zSig; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + aSig0 |= ( aSig1 != 0 ); + shift64RightJamming( aSig0, 18, &aSig0 ); + zSig = aSig0; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x3F81; + } + return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float128_to_float64( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + aSig0 |= ( aSig1 != 0 ); + if ( aExp || aSig0 ) { + aSig0 |= LIT64( 0x4000000000000000 ); + aExp -= 0x3C01; + } + return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR ); + +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the extended double-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 float128_to_floatx80( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) ); + } + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); + return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR ); + +} + +#endif + +/*---------------------------------------------------------------------------- +| Rounds the quadruple-precision floating-point value `a' to an integer, and +| returns the result as a quadruple-precision floating-point value. The +| operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_round_to_int( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float128 z; + + aExp = extractFloat128Exp( a ); + if ( 0x402F <= aExp ) { + if ( 0x406F <= aExp ) { + if ( ( aExp == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) + ) { + return propagateFloat128NaN( a, a STATUS_VAR ); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = STATUS(float_rounding_mode); + if ( roundingMode == float_round_nearest_even ) { + if ( lastBitMask ) { + add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (sbits64) z.low < 0 ) { + ++z.high; + if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + } + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp < 0x3FFF ) { + if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + STATUS(float_exception_flags) |= float_flag_inexact; + aSign = extractFloat128Sign( a ); + switch ( STATUS(float_rounding_mode) ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) + && ( extractFloat128Frac0( a ) + | extractFloat128Frac1( a ) ) + ) { + return packFloat128( aSign, 0x3FFF, 0, 0 ); + } + break; + case float_round_down: + return + aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) + : packFloat128( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat128( 1, 0, 0, 0 ) + : packFloat128( 0, 0x3FFF, 0, 0 ); + } + return packFloat128( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x402F - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + roundingMode = STATUS(float_rounding_mode); + if ( roundingMode == float_round_nearest_even ) { + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the quadruple-precision +| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +| before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int32 expDiff; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b STATUS_VAR ); + } + return a; + } + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) { + if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 ); + return packFloat128( zSign, 0, zSig0, zSig1 ); + } + zSig2 = 0; + zSig0 |= LIT64( 0x0002000000000000 ); + zExp = aExp; + goto shiftRight1; + } + aSig0 |= LIT64( 0x0001000000000000 ); + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; + ++zExp; + shiftRight1: + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the quadruple- +| precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int32 expDiff; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b STATUS_VAR ); + } + float_raise( float_flag_invalid STATUS_VAR); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= LIT64( 0x4000000000000000 ); + bBigger: + sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= LIT64( 0x4000000000000000 ); + aBigger: + sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the quadruple-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_add( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return addFloat128Sigs( a, b, aSign STATUS_VAR ); + } + else { + return subFloat128Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the quadruple-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_sub( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return subFloat128Sigs( a, b, aSign STATUS_VAR ); + } + else { + return addFloat128Sigs( a, b, aSign STATUS_VAR ); + } + +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the quadruple-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_mul( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b STATUS_VAR ); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x4000; + aSig0 |= LIT64( 0x0001000000000000 ); + shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); + mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the quadruple-precision floating-point value +| `a' by the corresponding value `b'. The operation is performed according to +| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_div( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + goto invalid; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + return packFloat128( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero STATUS_VAR); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FFD; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { + shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); + mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 4 ) { + mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the quadruple-precision floating-point value `a' +| with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_rem( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; + bits64 allZero, alternateASig0, alternateASig1, sigMean1; + sbits64 sigMean0; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b STATUS_VAR ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR ); + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), + aSig1, + 15 - ( expDiff < 0 ), + &aSig0, + &aSig1 + ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + q = le128( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); + shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); + sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 61; + } + if ( -64 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + expDiff += 52; + if ( expDiff < 0 ) { + shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (sbits64) aSig0 ); + add128( + aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (sbits64) aSig0 < 0 ); + if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return + normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the quadruple-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_sqrt( float128 a STATUS_PARAM ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid STATUS_VAR); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; + aSig0 |= LIT64( 0x0001000000000000 ); + zSig0 = estimateSqrt32( aExp, aSig0>>17 ); + shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); + doubleZSig0 = zSig0<<1; + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & 0x1FFF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( doubleZSig0, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift128Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_eq( float128 a, float128 b STATUS_PARAM ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is less than +| or equal to the corresponding value `b', and 0 otherwise. The comparison +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_le( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_lt( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. Otherwise, the comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_eq_signaling( float128 a, float128 b STATUS_PARAM ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid STATUS_VAR); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is less than +| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +| cause an exception. Otherwise, the comparison is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_le_quiet( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +| exception. Otherwise, the comparison is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_lt_quiet( float128 a, float128 b STATUS_PARAM ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + +/* misc functions */ +float32 uint32_to_float32( unsigned int a STATUS_PARAM ) +{ + return int64_to_float32(a STATUS_VAR); +} + +float64 uint32_to_float64( unsigned int a STATUS_PARAM ) +{ + return int64_to_float64(a STATUS_VAR); +} + +unsigned int float32_to_uint32( float32 a STATUS_PARAM ) +{ + int64_t v; + unsigned int res; + + v = float32_to_int64(a STATUS_VAR); + if (v < 0) { + res = 0; + float_raise( float_flag_invalid STATUS_VAR); + } else if (v > 0xffffffff) { + res = 0xffffffff; + float_raise( float_flag_invalid STATUS_VAR); + } else { + res = v; + } + return res; +} + +unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM ) +{ + int64_t v; + unsigned int res; + + v = float32_to_int64_round_to_zero(a STATUS_VAR); + if (v < 0) { + res = 0; + float_raise( float_flag_invalid STATUS_VAR); + } else if (v > 0xffffffff) { + res = 0xffffffff; + float_raise( float_flag_invalid STATUS_VAR); + } else { + res = v; + } + return res; +} + +unsigned int float64_to_uint32( float64 a STATUS_PARAM ) +{ + int64_t v; + unsigned int res; + + v = float64_to_int64(a STATUS_VAR); + if (v < 0) { + res = 0; + float_raise( float_flag_invalid STATUS_VAR); + } else if (v > 0xffffffff) { + res = 0xffffffff; + float_raise( float_flag_invalid STATUS_VAR); + } else { + res = v; + } + return res; +} + +unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM ) +{ + int64_t v; + unsigned int res; + + v = float64_to_int64_round_to_zero(a STATUS_VAR); + if (v < 0) { + res = 0; + float_raise( float_flag_invalid STATUS_VAR); + } else if (v > 0xffffffff) { + res = 0xffffffff; + float_raise( float_flag_invalid STATUS_VAR); + } else { + res = v; + } + return res; +} + +/* FIXME: This looks broken. */ +uint64_t float64_to_uint64 (float64 a STATUS_PARAM) +{ + int64_t v; + + v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); + v += float64_val(a); + v = float64_to_int64(make_float64(v) STATUS_VAR); + + return v - INT64_MIN; +} + +uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM) +{ + int64_t v; + + v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); + v += float64_val(a); + v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR); + + return v - INT64_MIN; +} + +#define COMPARE(s, nan_exp) \ +INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \ + int is_quiet STATUS_PARAM ) \ +{ \ + flag aSign, bSign; \ + bits ## s av, bv; \ + \ + if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \ + extractFloat ## s ## Frac( a ) ) || \ + ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \ + extractFloat ## s ## Frac( b ) )) { \ + if (!is_quiet || \ + float ## s ## _is_signaling_nan( a ) || \ + float ## s ## _is_signaling_nan( b ) ) { \ + float_raise( float_flag_invalid STATUS_VAR); \ + } \ + return float_relation_unordered; \ + } \ + aSign = extractFloat ## s ## Sign( a ); \ + bSign = extractFloat ## s ## Sign( b ); \ + av = float ## s ## _val(a); \ + bv = float ## s ## _val(b); \ + if ( aSign != bSign ) { \ + if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) { \ + /* zero case */ \ + return float_relation_equal; \ + } else { \ + return 1 - (2 * aSign); \ + } \ + } else { \ + if (av == bv) { \ + return float_relation_equal; \ + } else { \ + return 1 - 2 * (aSign ^ ( av < bv )); \ + } \ + } \ +} \ + \ +int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \ +{ \ + return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \ +} \ + \ +int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \ +{ \ + return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \ +} + +COMPARE(32, 0xff) +COMPARE(64, 0x7ff) + +INLINE int float128_compare_internal( float128 a, float128 b, + int is_quiet STATUS_PARAM ) +{ + flag aSign, bSign; + + if (( ( extractFloat128Exp( a ) == 0x7fff ) && + ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) || + ( ( extractFloat128Exp( b ) == 0x7fff ) && + ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) { + if (!is_quiet || + float128_is_signaling_nan( a ) || + float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid STATUS_VAR); + } + return float_relation_unordered; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) { + /* zero case */ + return float_relation_equal; + } else { + return 1 - (2 * aSign); + } + } else { + if (a.low == b.low && a.high == b.high) { + return float_relation_equal; + } else { + return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); + } + } +} + +int float128_compare( float128 a, float128 b STATUS_PARAM ) +{ + return float128_compare_internal(a, b, 0 STATUS_VAR); +} + +int float128_compare_quiet( float128 a, float128 b STATUS_PARAM ) +{ + return float128_compare_internal(a, b, 1 STATUS_VAR); +} + +/* Multiply A by 2 raised to the power N. */ +float32 float32_scalbn( float32 a, int n STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + + if ( aExp == 0xFF ) { + return a; + } + if ( aExp != 0 ) + aSig |= 0x00800000; + else if ( aSig == 0 ) + return a; + + aExp += n - 1; + aSig <<= 7; + return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR ); +} + +float64 float64_scalbn( float64 a, int n STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + + if ( aExp == 0x7FF ) { + return a; + } + if ( aExp != 0 ) + aSig |= LIT64( 0x0010000000000000 ); + else if ( aSig == 0 ) + return a; + + aExp += n - 1; + aSig <<= 10; + return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR ); +} + +#ifdef FLOATX80 +floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FF ) { + return a; + } + if (aExp == 0 && aSig == 0) + return a; + + aExp += n; + return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision), + aSign, aExp, aSig, 0 STATUS_VAR ); +} +#endif + +#ifdef FLOAT128 +float128 float128_scalbn( float128 a, int n STATUS_PARAM ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + return a; + } + if ( aExp != 0 ) + aSig0 |= LIT64( 0x0001000000000000 ); + else if ( aSig0 == 0 && aSig1 == 0 ) + return a; + + aExp += n - 1; + return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1 + STATUS_VAR ); + +} +#endif diff --git a/src/recompiler/fpu/softfloat.h b/src/recompiler/fpu/softfloat.h new file mode 100644 index 00000000..c209cd97 --- /dev/null +++ b/src/recompiler/fpu/softfloat.h @@ -0,0 +1,537 @@ +/*============================================================================ + +This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. + +=============================================================================*/ + +#ifndef SOFTFLOAT_H +#define SOFTFLOAT_H + +#ifdef VBOX +#include <VBox/types.h> +#endif + +#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH) +#include <sunmath.h> +#endif + +#include <inttypes.h> +#include "config.h" + +/*---------------------------------------------------------------------------- +| Each of the following `typedef's defines the most convenient type that holds +| integers of at least as many bits as specified. For example, `uint8' should +| be the most convenient type that can hold unsigned integers of as many as +| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most +| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed +| to the same as `int'. +*----------------------------------------------------------------------------*/ +typedef uint8_t flag; +typedef uint8_t uint8; +typedef int8_t int8; +#ifndef _AIX +typedef int uint16; +typedef int int16; +#endif +typedef unsigned int uint32; +typedef signed int int32; +typedef uint64_t uint64; +typedef int64_t int64; + +/*---------------------------------------------------------------------------- +| Each of the following `typedef's defines a type that holds integers +| of _exactly_ the number of bits specified. For instance, for most +| implementation of C, `bits16' and `sbits16' should be `typedef'ed to +| `unsigned short int' and `signed short int' (or `short int'), respectively. +*----------------------------------------------------------------------------*/ +typedef uint8_t bits8; +typedef int8_t sbits8; +typedef uint16_t bits16; +typedef int16_t sbits16; +typedef uint32_t bits32; +typedef int32_t sbits32; +typedef uint64_t bits64; +typedef int64_t sbits64; + +#define LIT64( a ) a##LL +#define INLINE static inline + +/*---------------------------------------------------------------------------- +| The macro `FLOATX80' must be defined to enable the extended double-precision +| floating-point format `floatx80'. If this macro is not defined, the +| `floatx80' type will not be defined, and none of the functions that either +| input or output the `floatx80' type will be defined. The same applies to +| the `FLOAT128' macro and the quadruple-precision format `float128'. +*----------------------------------------------------------------------------*/ +#ifdef CONFIG_SOFTFLOAT +/* bit exact soft float support */ +#define FLOATX80 +#define FLOAT128 +#else +/* native float support */ +#if (defined(__i386__) || defined(__x86_64__)) && (!defined(CONFIG_BSD) || defined(VBOX)) /** @todo VBOX: not correct on windows */ +#define FLOATX80 +#endif +#endif /* !CONFIG_SOFTFLOAT */ + +#if defined(VBOX) && (!defined(FLOATX80) || defined(CONFIG_SOFTFLOAT)) +# error misconfigured +#endif + +#define STATUS_PARAM , float_status *status +#define STATUS(field) status->field +#define STATUS_VAR , status + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point ordering relations +*----------------------------------------------------------------------------*/ +enum { + float_relation_less = -1, + float_relation_equal = 0, + float_relation_greater = 1, + float_relation_unordered = 2 +}; + +#ifdef CONFIG_SOFTFLOAT +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point types. +*----------------------------------------------------------------------------*/ +/* Use structures for soft-float types. This prevents accidentally mixing + them with native int/float types. A sufficiently clever compiler and + sane ABI should be able to see though these structs. However + x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */ +//#define USE_SOFTFLOAT_STRUCT_TYPES +#ifdef USE_SOFTFLOAT_STRUCT_TYPES +typedef struct { + uint32_t v; +} float32; +/* The cast ensures an error if the wrong type is passed. */ +#define float32_val(x) (((float32)(x)).v) +#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; }) +typedef struct { + uint64_t v; +} float64; +#define float64_val(x) (((float64)(x)).v) +#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; }) +#else +typedef uint32_t float32; +typedef uint64_t float64; +#define float32_val(x) (x) +#define float64_val(x) (x) +#define make_float32(x) (x) +#define make_float64(x) (x) +#endif +#ifdef FLOATX80 +typedef struct { + uint64_t low; + uint16_t high; +} floatx80; +#endif +#ifdef FLOAT128 +typedef struct { +#ifdef HOST_WORDS_BIGENDIAN + uint64_t high, low; +#else + uint64_t low, high; +#endif +} float128; +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point underflow tininess-detection mode. +*----------------------------------------------------------------------------*/ +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point rounding mode. +*----------------------------------------------------------------------------*/ +enum { + float_round_nearest_even = 0, + float_round_down = 1, + float_round_up = 2, + float_round_to_zero = 3 +}; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point exception flags. +*----------------------------------------------------------------------------*/ +enum { + float_flag_invalid = 1, + float_flag_divbyzero = 4, + float_flag_overflow = 8, + float_flag_underflow = 16, + float_flag_inexact = 32 +}; + +typedef struct float_status { + signed char float_detect_tininess; + signed char float_rounding_mode; + signed char float_exception_flags; +#ifdef FLOATX80 + signed char floatx80_rounding_precision; +#endif + flag flush_to_zero; + flag default_nan_mode; +} float_status; + +void set_float_rounding_mode(int val STATUS_PARAM); +void set_float_exception_flags(int val STATUS_PARAM); +INLINE void set_flush_to_zero(flag val STATUS_PARAM) +{ + STATUS(flush_to_zero) = val; +} +INLINE void set_default_nan_mode(flag val STATUS_PARAM) +{ + STATUS(default_nan_mode) = val; +} +INLINE int get_float_exception_flags(float_status *status) +{ + return STATUS(float_exception_flags); +} +#ifdef FLOATX80 +void set_floatx80_rounding_precision(int val STATUS_PARAM); +#endif + +/*---------------------------------------------------------------------------- +| Routine to raise any or all of the software IEC/IEEE floating-point +| exception flags. +*----------------------------------------------------------------------------*/ +void float_raise( int8 flags STATUS_PARAM); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ +float32 int32_to_float32( int STATUS_PARAM ); +float64 int32_to_float64( int STATUS_PARAM ); +float32 uint32_to_float32( unsigned int STATUS_PARAM ); +float64 uint32_to_float64( unsigned int STATUS_PARAM ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( int STATUS_PARAM ); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( int STATUS_PARAM ); +#endif +float32 int64_to_float32( int64_t STATUS_PARAM ); +float32 uint64_to_float32( uint64_t STATUS_PARAM ); +float64 int64_to_float64( int64_t STATUS_PARAM ); +float64 uint64_to_float64( uint64_t STATUS_PARAM ); +#ifdef FLOATX80 +floatx80 int64_to_floatx80( int64_t STATUS_PARAM ); +#endif +#ifdef FLOAT128 +float128 int64_to_float128( int64_t STATUS_PARAM ); +#endif + +/*---------------------------------------------------------------------------- +| Software half-precision conversion routines. +*----------------------------------------------------------------------------*/ +bits16 float32_to_float16( float32, flag STATUS_PARAM ); +float32 float16_to_float32( bits16, flag STATUS_PARAM ); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision conversion routines. +*----------------------------------------------------------------------------*/ +int float32_to_int32( float32 STATUS_PARAM ); +int float32_to_int32_round_to_zero( float32 STATUS_PARAM ); +unsigned int float32_to_uint32( float32 STATUS_PARAM ); +unsigned int float32_to_uint32_round_to_zero( float32 STATUS_PARAM ); +int64_t float32_to_int64( float32 STATUS_PARAM ); +int64_t float32_to_int64_round_to_zero( float32 STATUS_PARAM ); +float64 float32_to_float64( float32 STATUS_PARAM ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 STATUS_PARAM ); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 STATUS_PARAM ); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 float32_round_to_int( float32 STATUS_PARAM ); +float32 float32_add( float32, float32 STATUS_PARAM ); +float32 float32_sub( float32, float32 STATUS_PARAM ); +float32 float32_mul( float32, float32 STATUS_PARAM ); +float32 float32_div( float32, float32 STATUS_PARAM ); +float32 float32_rem( float32, float32 STATUS_PARAM ); +float32 float32_sqrt( float32 STATUS_PARAM ); +float32 float32_exp2( float32 STATUS_PARAM ); +float32 float32_log2( float32 STATUS_PARAM ); +int float32_eq( float32, float32 STATUS_PARAM ); +int float32_le( float32, float32 STATUS_PARAM ); +int float32_lt( float32, float32 STATUS_PARAM ); +int float32_eq_signaling( float32, float32 STATUS_PARAM ); +int float32_le_quiet( float32, float32 STATUS_PARAM ); +int float32_lt_quiet( float32, float32 STATUS_PARAM ); +int float32_compare( float32, float32 STATUS_PARAM ); +int float32_compare_quiet( float32, float32 STATUS_PARAM ); +int float32_is_nan( float32 ); +int float32_is_signaling_nan( float32 ); +float32 float32_scalbn( float32, int STATUS_PARAM ); + +INLINE float32 float32_abs(float32 a) +{ + return make_float32(float32_val(a) & 0x7fffffff); +} + +INLINE float32 float32_chs(float32 a) +{ + return make_float32(float32_val(a) ^ 0x80000000); +} + +INLINE int float32_is_infinity(float32 a) +{ + return (float32_val(a) & 0x7fffffff) == 0x7f800000; +} + +INLINE int float32_is_neg(float32 a) +{ + return float32_val(a) >> 31; +} + +INLINE int float32_is_zero(float32 a) +{ + return (float32_val(a) & 0x7fffffff) == 0; +} + +#define float32_zero make_float32(0) +#define float32_one make_float32(0x3f800000) +#define float32_ln2 make_float32(0x3f317218) + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int float64_to_int32( float64 STATUS_PARAM ); +int float64_to_int32_round_to_zero( float64 STATUS_PARAM ); +unsigned int float64_to_uint32( float64 STATUS_PARAM ); +unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM ); +int64_t float64_to_int64( float64 STATUS_PARAM ); +int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM ); +uint64_t float64_to_uint64 (float64 a STATUS_PARAM); +uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM); +float32 float64_to_float32( float64 STATUS_PARAM ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 STATUS_PARAM ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 STATUS_PARAM ); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +float64 float64_round_to_int( float64 STATUS_PARAM ); +float64 float64_trunc_to_int( float64 STATUS_PARAM ); +float64 float64_add( float64, float64 STATUS_PARAM ); +float64 float64_sub( float64, float64 STATUS_PARAM ); +float64 float64_mul( float64, float64 STATUS_PARAM ); +float64 float64_div( float64, float64 STATUS_PARAM ); +float64 float64_rem( float64, float64 STATUS_PARAM ); +float64 float64_sqrt( float64 STATUS_PARAM ); +float64 float64_log2( float64 STATUS_PARAM ); +int float64_eq( float64, float64 STATUS_PARAM ); +int float64_le( float64, float64 STATUS_PARAM ); +int float64_lt( float64, float64 STATUS_PARAM ); +int float64_eq_signaling( float64, float64 STATUS_PARAM ); +int float64_le_quiet( float64, float64 STATUS_PARAM ); +int float64_lt_quiet( float64, float64 STATUS_PARAM ); +int float64_compare( float64, float64 STATUS_PARAM ); +int float64_compare_quiet( float64, float64 STATUS_PARAM ); +int float64_is_nan( float64 a ); +int float64_is_signaling_nan( float64 ); +float64 float64_scalbn( float64, int STATUS_PARAM ); + +INLINE float64 float64_abs(float64 a) +{ + return make_float64(float64_val(a) & 0x7fffffffffffffffLL); +} + +INLINE float64 float64_chs(float64 a) +{ + return make_float64(float64_val(a) ^ 0x8000000000000000LL); +} + +INLINE int float64_is_infinity(float64 a) +{ + return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL; +} + +INLINE int float64_is_neg(float64 a) +{ + return float64_val(a) >> 63; +} + +INLINE int float64_is_zero(float64 a) +{ + return (float64_val(a) & 0x7fffffffffffffffLL) == 0; +} + +#define float64_zero make_float64(0) +#define float64_one make_float64(0x3ff0000000000000LL) +#define float64_ln2 make_float64(0x3fe62e42fefa39efLL) + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int floatx80_to_int32( floatx80 STATUS_PARAM ); +int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM ); +int64_t floatx80_to_int64( floatx80 STATUS_PARAM ); +int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM ); +float32 floatx80_to_float32( floatx80 STATUS_PARAM ); +float64 floatx80_to_float64( floatx80 STATUS_PARAM ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 STATUS_PARAM ); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision operations. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM ); +floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM ); +floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM ); +floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM ); +floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM ); +floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM ); +floatx80 floatx80_sqrt( floatx80 STATUS_PARAM ); +int floatx80_eq( floatx80, floatx80 STATUS_PARAM ); +int floatx80_le( floatx80, floatx80 STATUS_PARAM ); +int floatx80_lt( floatx80, floatx80 STATUS_PARAM ); +int floatx80_eq_signaling( floatx80, floatx80 STATUS_PARAM ); +int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM ); +int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM ); +int floatx80_is_nan( floatx80 ); +int floatx80_is_signaling_nan( floatx80 ); +floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM ); + +INLINE floatx80 floatx80_abs(floatx80 a) +{ + a.high &= 0x7fff; + return a; +} + +INLINE floatx80 floatx80_chs(floatx80 a) +{ + a.high ^= 0x8000; + return a; +} + +INLINE int floatx80_is_infinity(floatx80 a) +{ + return (a.high & 0x7fff) == 0x7fff && a.low == 0; +} + +INLINE int floatx80_is_neg(floatx80 a) +{ + return a.high >> 15; +} + +INLINE int floatx80_is_zero(floatx80 a) +{ + return (a.high & 0x7fff) == 0 && a.low == 0; +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE quadruple-precision conversion routines. +*----------------------------------------------------------------------------*/ +int float128_to_int32( float128 STATUS_PARAM ); +int float128_to_int32_round_to_zero( float128 STATUS_PARAM ); +int64_t float128_to_int64( float128 STATUS_PARAM ); +int64_t float128_to_int64_round_to_zero( float128 STATUS_PARAM ); +float32 float128_to_float32( float128 STATUS_PARAM ); +float64 float128_to_float64( float128 STATUS_PARAM ); +#ifdef FLOATX80 +floatx80 float128_to_floatx80( float128 STATUS_PARAM ); +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE quadruple-precision operations. +*----------------------------------------------------------------------------*/ +float128 float128_round_to_int( float128 STATUS_PARAM ); +float128 float128_add( float128, float128 STATUS_PARAM ); +float128 float128_sub( float128, float128 STATUS_PARAM ); +float128 float128_mul( float128, float128 STATUS_PARAM ); +float128 float128_div( float128, float128 STATUS_PARAM ); +float128 float128_rem( float128, float128 STATUS_PARAM ); +float128 float128_sqrt( float128 STATUS_PARAM ); +int float128_eq( float128, float128 STATUS_PARAM ); +int float128_le( float128, float128 STATUS_PARAM ); +int float128_lt( float128, float128 STATUS_PARAM ); +int float128_eq_signaling( float128, float128 STATUS_PARAM ); +int float128_le_quiet( float128, float128 STATUS_PARAM ); +int float128_lt_quiet( float128, float128 STATUS_PARAM ); +int float128_compare( float128, float128 STATUS_PARAM ); +int float128_compare_quiet( float128, float128 STATUS_PARAM ); +int float128_is_nan( float128 ); +int float128_is_signaling_nan( float128 ); +float128 float128_scalbn( float128, int STATUS_PARAM ); + +INLINE float128 float128_abs(float128 a) +{ + a.high &= 0x7fffffffffffffffLL; + return a; +} + +INLINE float128 float128_chs(float128 a) +{ + a.high ^= 0x8000000000000000LL; + return a; +} + +INLINE int float128_is_infinity(float128 a) +{ + return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0; +} + +INLINE int float128_is_neg(float128 a) +{ + return a.high >> 63; +} + +INLINE int float128_is_zero(float128 a) +{ + return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0; +} + +#endif + +#else /* CONFIG_SOFTFLOAT */ + +#include "softfloat-native.h" + +#endif /* !CONFIG_SOFTFLOAT */ + +#endif /* !SOFTFLOAT_H */ diff --git a/src/recompiler/gen-icount.h b/src/recompiler/gen-icount.h new file mode 100644 index 00000000..13512960 --- /dev/null +++ b/src/recompiler/gen-icount.h @@ -0,0 +1,48 @@ +#include "qemu-timer.h" + +/* Helpers for instruction counting code generation. */ + +static TCGArg *icount_arg; +static int icount_label; + +static inline void gen_icount_start(void) +{ + TCGv_i32 count; + + if (!use_icount) + return; + + icount_label = gen_new_label(); + count = tcg_temp_local_new_i32(); + tcg_gen_ld_i32(count, cpu_env, offsetof(CPUState, icount_decr.u32)); + /* This is a horrid hack to allow fixing up the value later. */ + icount_arg = gen_opparam_ptr + 1; + tcg_gen_subi_i32(count, count, 0xdeadbeef); + + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label); + tcg_gen_st16_i32(count, cpu_env, offsetof(CPUState, icount_decr.u16.low)); + tcg_temp_free_i32(count); +} + +static void gen_icount_end(TranslationBlock *tb, int num_insns) +{ + if (use_icount) { + *icount_arg = num_insns; + gen_set_label(icount_label); + tcg_gen_exit_tb((uintptr_t)(tb + 2)); + } +} + +static inline void gen_io_start(void) +{ + TCGv_i32 tmp = tcg_const_i32(1); + tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, can_do_io)); + tcg_temp_free_i32(tmp); +} + +static inline void gen_io_end(void) +{ + TCGv_i32 tmp = tcg_const_i32(0); + tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, can_do_io)); + tcg_temp_free_i32(tmp); +} diff --git a/src/recompiler/host-utils.c b/src/recompiler/host-utils.c new file mode 100644 index 00000000..4afee79e --- /dev/null +++ b/src/recompiler/host-utils.c @@ -0,0 +1,109 @@ +/* + * Utility compute operations used by translated code. + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2007 Aurelien Jarno + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdlib.h> +#ifndef VBOX +#include <stdint.h> +#else +# include <iprt/types.h> +#endif +#include "host-utils.h" + +//#define DEBUG_MULDIV + +/* Long integer helpers */ +#if !defined(__x86_64__) +static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ + *plow += a; + /* carry test */ + if (*plow < a) + (*phigh)++; + *phigh += b; +} + +static void neg128 (uint64_t *plow, uint64_t *phigh) +{ + *plow = ~*plow; + *phigh = ~*phigh; + add128(plow, phigh, 1, 0); +} + +static void mul64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ + uint32_t a0, a1, b0, b1; + uint64_t v; + + a0 = a; + a1 = a >> 32; + + b0 = b; + b1 = b >> 32; + + v = (uint64_t)a0 * (uint64_t)b0; + *plow = v; + *phigh = 0; + + v = (uint64_t)a0 * (uint64_t)b1; + add128(plow, phigh, v << 32, v >> 32); + + v = (uint64_t)a1 * (uint64_t)b0; + add128(plow, phigh, v << 32, v >> 32); + + v = (uint64_t)a1 * (uint64_t)b1; + *phigh += v; +} + +/* Unsigned 64x64 -> 128 multiplication */ +void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ + mul64(plow, phigh, a, b); +#if defined(DEBUG_MULDIV) + printf("mulu64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n", + a, b, *phigh, *plow); +#endif +} + +/* Signed 64x64 -> 128 multiplication */ +void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) +{ + int sa, sb; + + sa = (a < 0); + if (sa) + a = -a; + sb = (b < 0); + if (sb) + b = -b; + mul64(plow, phigh, a, b); + if (sa ^ sb) { + neg128(plow, phigh); + } +#if defined(DEBUG_MULDIV) + printf("muls64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n", + a, b, *phigh, *plow); +#endif +} +#endif /* !defined(__x86_64__) */ diff --git a/src/recompiler/host-utils.h b/src/recompiler/host-utils.h new file mode 100644 index 00000000..0ddc1765 --- /dev/null +++ b/src/recompiler/host-utils.h @@ -0,0 +1,236 @@ +/* + * Utility compute operations used by translated code. + * + * Copyright (c) 2007 Thiemo Seufer + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "osdep.h" + +#if defined(__x86_64__) +#define __HAVE_FAST_MULU64__ +static inline void mulu64(uint64_t *plow, uint64_t *phigh, + uint64_t a, uint64_t b) +{ + __asm__ ("mul %0\n\t" + : "=d" (*phigh), "=a" (*plow) + : "a" (a), "0" (b)); +} +#define __HAVE_FAST_MULS64__ +static inline void muls64(uint64_t *plow, uint64_t *phigh, + int64_t a, int64_t b) +{ + __asm__ ("imul %0\n\t" + : "=d" (*phigh), "=a" (*plow) + : "a" (a), "0" (b)); +} +#else +void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); +void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b); +#endif + +/* Binary search for leading zeros. */ + +static inline int clz32(uint32_t val) +{ +#if QEMU_GNUC_PREREQ(3, 4) + if (val) + return __builtin_clz(val); + else + return 32; +#else + int cnt = 0; + + if (!(val & 0xFFFF0000U)) { + cnt += 16; + val <<= 16; + } + if (!(val & 0xFF000000U)) { + cnt += 8; + val <<= 8; + } + if (!(val & 0xF0000000U)) { + cnt += 4; + val <<= 4; + } + if (!(val & 0xC0000000U)) { + cnt += 2; + val <<= 2; + } + if (!(val & 0x80000000U)) { + cnt++; + val <<= 1; + } + if (!(val & 0x80000000U)) { + cnt++; + } + return cnt; +#endif +} + +static inline int clo32(uint32_t val) +{ + return clz32(~val); +} + +static inline int clz64(uint64_t val) +{ +#if QEMU_GNUC_PREREQ(3, 4) + if (val) + return __builtin_clzll(val); + else + return 64; +#else + int cnt = 0; + + if (!(val >> 32)) { + cnt += 32; + } else { + val >>= 32; + } + + return cnt + clz32(val); +#endif +} + +static inline int clo64(uint64_t val) +{ + return clz64(~val); +} + +static inline int ctz32(uint32_t val) +{ +#if QEMU_GNUC_PREREQ(3, 4) + if (val) + return __builtin_ctz(val); + else + return 32; +#else + int cnt; + + cnt = 0; + if (!(val & 0x0000FFFFUL)) { + cnt += 16; + val >>= 16; + } + if (!(val & 0x000000FFUL)) { + cnt += 8; + val >>= 8; + } + if (!(val & 0x0000000FUL)) { + cnt += 4; + val >>= 4; + } + if (!(val & 0x00000003UL)) { + cnt += 2; + val >>= 2; + } + if (!(val & 0x00000001UL)) { + cnt++; + val >>= 1; + } + if (!(val & 0x00000001UL)) { + cnt++; + } + + return cnt; +#endif +} + +static inline int cto32(uint32_t val) +{ + return ctz32(~val); +} + +static inline int ctz64(uint64_t val) +{ +#if QEMU_GNUC_PREREQ(3, 4) + if (val) + return __builtin_ctzll(val); + else + return 64; +#else + int cnt; + + cnt = 0; + if (!((uint32_t)val)) { + cnt += 32; + val >>= 32; + } + + return cnt + ctz32(val); +#endif +} + +static inline int cto64(uint64_t val) +{ + return ctz64(~val); +} + +static inline int ctpop8(uint8_t val) +{ + val = (val & 0x55) + ((val >> 1) & 0x55); + val = (val & 0x33) + ((val >> 2) & 0x33); + val = (val & 0x0f) + ((val >> 4) & 0x0f); + + return val; +} + +static inline int ctpop16(uint16_t val) +{ + val = (val & 0x5555) + ((val >> 1) & 0x5555); + val = (val & 0x3333) + ((val >> 2) & 0x3333); + val = (val & 0x0f0f) + ((val >> 4) & 0x0f0f); + val = (val & 0x00ff) + ((val >> 8) & 0x00ff); + + return val; +} + +static inline int ctpop32(uint32_t val) +{ +#if QEMU_GNUC_PREREQ(3, 4) + return __builtin_popcount(val); +#else + val = (val & 0x55555555) + ((val >> 1) & 0x55555555); + val = (val & 0x33333333) + ((val >> 2) & 0x33333333); + val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); + val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff); + val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff); + + return val; +#endif +} + +static inline int ctpop64(uint64_t val) +{ +#if QEMU_GNUC_PREREQ(3, 4) + return __builtin_popcountll(val); +#else + val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); + val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); + val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 0x0f0f0f0f0f0f0f0fULL); + val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 0x00ff00ff00ff00ffULL); + val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 0x0000ffff0000ffffULL); + val = (val & 0x00000000ffffffffULL) + ((val >> 32) & 0x00000000ffffffffULL); + + return val; +#endif +} diff --git a/src/recompiler/hostregs_helper.h b/src/recompiler/hostregs_helper.h new file mode 100644 index 00000000..c6c1ddb9 --- /dev/null +++ b/src/recompiler/hostregs_helper.h @@ -0,0 +1,70 @@ +/* + * Save/restore host registers. + * + * Copyright (c) 2007 CodeSourcery + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +/* The GCC global register variable extension is used to reserve some + host registers for use by generated code. However only the core parts of + the translation engine are compiled with these settings. We must manually + save/restore these registers when called from regular code. + It is not sufficient to save/restore T0 et. al. as these may be declared + with a datatype smaller than the actual register. */ + +#if defined(DECLARE_HOST_REGS) + +#define DO_REG(REG) \ + register host_reg_t reg_AREG##REG asm(AREG##REG); \ + volatile host_reg_t saved_AREG##REG; + +#elif defined(SAVE_HOST_REGS) + +#define DO_REG(REG) \ + __asm__ __volatile__ ("" : "=r" (reg_AREG##REG)); \ + saved_AREG##REG = reg_AREG##REG; + +#else + +#define DO_REG(REG) \ + reg_AREG##REG = saved_AREG##REG; \ + __asm__ __volatile__ ("" : : "r" (reg_AREG##REG)); + +#endif + +#ifdef AREG0 +DO_REG(0) +#endif + +#ifdef AREG1 +DO_REG(1) +#endif + +#ifdef AREG2 +DO_REG(2) +#endif + +#undef SAVE_HOST_REGS +#undef DECLARE_HOST_REGS +#undef DO_REG diff --git a/src/recompiler/ioport.h b/src/recompiler/ioport.h new file mode 100644 index 00000000..8df11ce0 --- /dev/null +++ b/src/recompiler/ioport.h @@ -0,0 +1,71 @@ +/* + * defines ioport related functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +/************************************************************************** + * IO ports API + */ + +#ifndef IOPORT_H +#define IOPORT_H + +#include "qemu-common.h" + +typedef uint32_t pio_addr_t; +#define FMT_pioaddr PRIx32 + +#define MAX_IOPORTS (64 * 1024) +#define IOPORTS_MASK (MAX_IOPORTS - 1) + +/* These should really be in isa.h, but are here to make pc.h happy. */ +typedef void (IOPortWriteFunc)(void *opaque, uint32_t address, uint32_t data); +typedef uint32_t (IOPortReadFunc)(void *opaque, uint32_t address); + +int register_ioport_read(pio_addr_t start, int length, int size, + IOPortReadFunc *func, void *opaque); +int register_ioport_write(pio_addr_t start, int length, int size, + IOPortWriteFunc *func, void *opaque); +void isa_unassign_ioport(pio_addr_t start, int length); + + +#ifndef VBOX +void cpu_outb(pio_addr_t addr, uint8_t val); +void cpu_outw(pio_addr_t addr, uint16_t val); +void cpu_outl(pio_addr_t addr, uint32_t val); +uint8_t cpu_inb(pio_addr_t addr); +uint16_t cpu_inw(pio_addr_t addr); +uint32_t cpu_inl(pio_addr_t addr); +#else +void cpu_outb(CPUX86State *env, pio_addr_t addr, uint8_t val); +void cpu_outw(CPUX86State *env, pio_addr_t addr, uint16_t val); +void cpu_outl(CPUX86State *env, pio_addr_t addr, uint32_t val); +uint8_t cpu_inb(CPUX86State *env, pio_addr_t addr); +uint16_t cpu_inw(CPUX86State *env, pio_addr_t addr); +uint32_t cpu_inl(CPUX86State *env, pio_addr_t addr); +#endif + +#endif /* IOPORT_H */ diff --git a/src/recompiler/osdep.h b/src/recompiler/osdep.h new file mode 100644 index 00000000..b69d157f --- /dev/null +++ b/src/recompiler/osdep.h @@ -0,0 +1,166 @@ +#ifndef QEMU_OSDEP_H +#define QEMU_OSDEP_H + +#ifdef VBOX /** @todo clean up this, it's not fully synched. */ + +#include <iprt/alloc.h> +#ifndef RT_OS_WINDOWS +# include <iprt/alloca.h> +#endif +#include <iprt/stdarg.h> +#include <iprt/string.h> + +#include "config.h" + +#define VBOX_ONLY(x) x + +#define qemu_snprintf(pszBuf, cbBuf, ...) RTStrPrintf((pszBuf), (cbBuf), __VA_ARGS__) +#define qemu_vsnprintf(pszBuf, cbBuf, pszFormat, args) \ + RTStrPrintfV((pszBuf), (cbBuf), (pszFormat), (args)) +#define qemu_vprintf(pszFormat, args) \ + RTLogPrintfV((pszFormat), (args)) + +/**@todo the following macros belongs elsewhere */ +#define qemu_malloc(cb) RTMemAlloc(cb) +#define qemu_mallocz(cb) RTMemAllocZ(cb) +#define qemu_realloc(ptr, cb) RTMemRealloc(ptr, cb) +#define qemu_free(pv) RTMemFree(pv) +#define qemu_strdup(psz) RTStrDup(psz) + +/* Misc wrappers */ +#define fflush(file) RTLogFlush(NULL) +#define printf(...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__)) +/* If DEBUG_TMP_LOGGING - goes to QEMU log file */ +#ifndef DEBUG_TMP_LOGGING +# define fprintf(logfile, ...) LogIt(0, LOG_GROUP_REM_PRINTF, (__VA_ARGS__)) +#endif + +#define assert(cond) Assert(cond) + +#else /* !VBOX */ + +#include <stdarg.h> +#include <stddef.h> + +#define VBOX_ONLY(x) /* nike */ +#define qemu_snprintf snprintf /* bird */ +#define qemu_vsnprintf vsnprintf /* bird */ +#define qemu_vprintf vprintf /* bird */ + +#endif /* !VBOX */ + +#ifdef __OpenBSD__ +#include <sys/types.h> +#include <sys/signal.h> +#endif + +#ifndef VBOX +#ifndef _WIN32 +#include <sys/time.h> +#endif +#endif /* !VBOX */ + +#ifndef glue +#define xglue(x, y) x ## y +#define glue(x, y) xglue(x, y) +#define stringify(s) tostring(s) +#define tostring(s) #s +#endif + +#ifndef likely +#if __GNUC__ < 3 +#define __builtin_expect(x, n) (x) +#endif + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif + +#ifdef CONFIG_NEED_OFFSETOF +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *) 0)->MEMBER) +#endif +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof(((type *) 0)->member) *__mptr = (ptr); \ + (type *) ((char *) __mptr - offsetof(type, member));}) +#endif + +/* Convert from a base type to a parent type, with compile time checking. */ +#ifdef __GNUC__ +#define DO_UPCAST(type, field, dev) ( __extension__ ( { \ + char __attribute__((unused)) offset_must_be_zero[ \ + -offsetof(type, field)]; \ + container_of(dev, type, field);})) +#else +#define DO_UPCAST(type, field, dev) container_of(dev, type, field) +#endif + +#define typeof_field(type, field) typeof(((type *)0)->field) +#define type_check(t1,t2) ((t1*)0 - (t2*)0) + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#ifndef always_inline +#if !((__GNUC__ < 3) || defined(__APPLE__)) +#ifdef __OPTIMIZE__ +#define inline __attribute__ (( always_inline )) __inline__ +#endif +#endif +#else +#define inline always_inline +#endif + +#ifdef __i386__ +#define REGPARM __attribute((regparm(3))) +#else +#define REGPARM +#endif + +#ifndef VBOX +#define qemu_printf printf +#else /*VBOX*/ +#define qemu_printf RTLogPrintf +#endif /*VBOX*/ + +#if defined (__GNUC__) && defined (__GNUC_MINOR__) +# define QEMU_GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define QEMU_GNUC_PREREQ(maj, min) 0 +#endif + +#ifndef VBOX +void *qemu_memalign(size_t alignment, size_t size); +void *qemu_vmalloc(size_t size); +void qemu_vfree(void *ptr); + +int qemu_create_pidfile(const char *filename); + +#ifdef _WIN32 +int ffs(int i); + +typedef struct { + long tv_sec; + long tv_usec; +} qemu_timeval; +int qemu_gettimeofday(qemu_timeval *tp); +#else +typedef struct timeval qemu_timeval; +#define qemu_gettimeofday(tp) gettimeofday(tp, NULL); +#endif /* !_WIN32 */ +#else /* VBOX */ +# define qemu_memalign(alignment, size) ( (alignment) <= PAGE_SIZE ? RTMemPageAlloc((size)) : NULL ) +# define qemu_vfree(pv) RTMemPageFree(pv, missing_size_parameter) +# define qemu_vmalloc(cb) RTMemPageAlloc(cb) +#endif /* VBOX */ + +#endif diff --git a/src/recompiler/qemu-barrier.h b/src/recompiler/qemu-barrier.h new file mode 100644 index 00000000..b77fce23 --- /dev/null +++ b/src/recompiler/qemu-barrier.h @@ -0,0 +1,10 @@ +#ifndef __QEMU_BARRIER_H +#define __QEMU_BARRIER_H 1 + +/* FIXME: arch dependant, x86 version */ +#define smp_wmb() asm volatile("" ::: "memory") + +/* Compiler barrier */ +#define barrier() asm volatile("" ::: "memory") + +#endif diff --git a/src/recompiler/qemu-common.h b/src/recompiler/qemu-common.h new file mode 100644 index 00000000..ca6e3ce2 --- /dev/null +++ b/src/recompiler/qemu-common.h @@ -0,0 +1,348 @@ +/* Common header file that is included by all of qemu. */ +#ifndef QEMU_COMMON_H +#define QEMU_COMMON_H + +#include "config-host.h" + +#ifdef VBOX + +# include <iprt/string.h> +# include <iprt/types.h> +# include <iprt/ctype.h> + +void pstrcpy(char *buf, int buf_size, const char *str); +char *pstrcat(char *buf, int buf_size, const char *s); +# define snprintf RTStrPrintf + +# define qemu_isalnum(c) RT_C_IS_ALNUM((unsigned char)(c)) +# define qemu_isalpha(c) RT_C_IS_ALPHA((unsigned char)(c)) +# define qemu_iscntrl(c) RT_C_IS_CNTRL((unsigned char)(c)) +# define qemu_isdigit(c) RT_C_IS_DIGIT((unsigned char)(c)) +# define qemu_isgraph(c) RT_C_IS_GRAPH((unsigned char)(c)) +# define qemu_islower(c) RT_C_IS_LOWER((unsigned char)(c)) +# define qemu_isprint(c) RT_C_IS_PRINT((unsigned char)(c)) +# define qemu_ispunct(c) RT_C_IS_PUNCT((unsigned char)(c)) +# define qemu_isspace(c) RT_C_IS_SPACE((unsigned char)(c)) +# define qemu_isupper(c) RT_C_IS_UPPER((unsigned char)(c)) +# define qemu_isxdigit(c) RT_C_IS_XDIGIT((unsigned char)(c)) +# define qemu_tolower(c) RT_C_TO_LOWER((unsigned char)(c)) +# define qemu_toupper(c) RT_C_TO_UPPER((unsigned char)(c)) +# define qemu_isascii(c) RT_C_IS_ASCII((unsigned char)(c)) +# define qemu_toascii(c) RT_C_TO_ASCII((unsigned char)(c)) + +# define qemu_init_vcpu(env) do { } while (0) /* we don't need this :-) */ + +# define QEMU_NORETURN __attribute__((__noreturn__)) +# ifdef CONFIG_GCC_ATTRIBUTE_WARN_UNUSED_RESULT +# define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +# else +# define QEMU_WARN_UNUSED_RESULT +# endif +#define QEMU_BUILD_BUG_ON(x) typedef char __build_bug_on__##__LINE__[(x)?-1:1]; + +#include <stdio.h> +#include "cpu.h" + + +#else /* !VBOX */ +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define WINVER 0x0501 /* needed for ipv6 bits */ +#include <windows.h> +#endif + +#define QEMU_NORETURN __attribute__ ((__noreturn__)) +#ifdef CONFIG_GCC_ATTRIBUTE_WARN_UNUSED_RESULT +#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#else +#define QEMU_WARN_UNUSED_RESULT +#endif + +#define QEMU_BUILD_BUG_ON(x) typedef char __build_bug_on__##__LINE__[(x)?-1:1]; + +typedef struct QEMUTimer QEMUTimer; +typedef struct QEMUFile QEMUFile; +typedef struct QEMUBH QEMUBH; +typedef struct DeviceState DeviceState; + + +/* Hack around the mess dyngen-exec.h causes: We need QEMU_NORETURN in files that + cannot include the following headers without conflicts. This condition has + to be removed once dyngen is gone. */ +#ifndef __DYNGEN_EXEC_H__ + +/* we put basic includes here to avoid repeating them in device drivers */ +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include <strings.h> +#include <inttypes.h> +#include <limits.h> +#include <time.h> +#include <ctype.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <assert.h> + +#ifndef O_LARGEFILE +#define O_LARGEFILE 0 +#endif +#ifndef O_BINARY +#define O_BINARY 0 +#endif +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif +#ifndef ENOMEDIUM +#define ENOMEDIUM ENODEV +#endif +#if !defined(ENOTSUP) +#define ENOTSUP 4096 +#endif + +#ifndef CONFIG_IOVEC +#define CONFIG_IOVEC +struct iovec { + void *iov_base; + size_t iov_len; +}; +/* + * Use the same value as Linux for now. + */ +#define IOV_MAX 1024 +#else +#include <sys/uio.h> +#endif + +#ifdef _WIN32 +#define fsync _commit +#define lseek _lseeki64 +extern int qemu_ftruncate64(int, int64_t); +#define ftruncate qemu_ftruncate64 + +static inline char *realpath(const char *path, char *resolved_path) +{ + _fullpath(resolved_path, path, _MAX_PATH); + return resolved_path; +} + +#define PRId64 "I64d" +#define PRIx64 "I64x" +#define PRIu64 "I64u" +#define PRIo64 "I64o" +#endif + +/* FIXME: Remove NEED_CPU_H. */ +#ifndef NEED_CPU_H + +#include <setjmp.h> +#include "osdep.h" +#include "bswap.h" + +#else + +#include "cpu.h" + +#endif /* !defined(NEED_CPU_H) */ + +/* bottom halves */ +typedef void QEMUBHFunc(void *opaque); + +void async_context_push(void); +void async_context_pop(void); +int get_async_context_id(void); + +QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); +void qemu_bh_schedule(QEMUBH *bh); +/* Bottom halfs that are scheduled from a bottom half handler are instantly + * invoked. This can create an infinite loop if a bottom half handler + * schedules itself. qemu_bh_schedule_idle() avoids this infinite loop by + * ensuring that the bottom half isn't executed until the next main loop + * iteration. + */ +void qemu_bh_schedule_idle(QEMUBH *bh); +void qemu_bh_cancel(QEMUBH *bh); +void qemu_bh_delete(QEMUBH *bh); +int qemu_bh_poll(void); +void qemu_bh_update_timeout(int *timeout); + +uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c); + +void qemu_get_timedate(struct tm *tm, int offset); +int qemu_timedate_diff(struct tm *tm); + +/* cutils.c */ +void pstrcpy(char *buf, int buf_size, const char *str); +char *pstrcat(char *buf, int buf_size, const char *s); +int strstart(const char *str, const char *val, const char **ptr); +int stristart(const char *str, const char *val, const char **ptr); +int qemu_strnlen(const char *s, int max_len); +time_t mktimegm(struct tm *tm); +int qemu_fls(int i); +int qemu_fdatasync(int fd); +int fcntl_setfl(int fd, int flag); + +/* path.c */ +void init_paths(const char *prefix); +const char *path(const char *pathname); + +#define qemu_isalnum(c) isalnum((unsigned char)(c)) +#define qemu_isalpha(c) isalpha((unsigned char)(c)) +#define qemu_iscntrl(c) iscntrl((unsigned char)(c)) +#define qemu_isdigit(c) isdigit((unsigned char)(c)) +#define qemu_isgraph(c) isgraph((unsigned char)(c)) +#define qemu_islower(c) islower((unsigned char)(c)) +#define qemu_isprint(c) isprint((unsigned char)(c)) +#define qemu_ispunct(c) ispunct((unsigned char)(c)) +#define qemu_isspace(c) isspace((unsigned char)(c)) +#define qemu_isupper(c) isupper((unsigned char)(c)) +#define qemu_isxdigit(c) isxdigit((unsigned char)(c)) +#define qemu_tolower(c) tolower((unsigned char)(c)) +#define qemu_toupper(c) toupper((unsigned char)(c)) +#define qemu_isascii(c) isascii((unsigned char)(c)) +#define qemu_toascii(c) toascii((unsigned char)(c)) + +void *qemu_malloc(size_t size); +void *qemu_realloc(void *ptr, size_t size); +void *qemu_mallocz(size_t size); +void qemu_free(void *ptr); +char *qemu_strdup(const char *str); +char *qemu_strndup(const char *str, size_t size); + +void qemu_mutex_lock_iothread(void); +void qemu_mutex_unlock_iothread(void); + +int qemu_open(const char *name, int flags, ...); +ssize_t qemu_write_full(int fd, const void *buf, size_t count) + QEMU_WARN_UNUSED_RESULT; +void qemu_set_cloexec(int fd); + +#ifndef _WIN32 +int qemu_eventfd(int pipefd[2]); +int qemu_pipe(int pipefd[2]); +#endif + +/* Error handling. */ + +void QEMU_NORETURN hw_error(const char *fmt, ...) + __attribute__ ((__format__ (__printf__, 1, 2))); + +/* IO callbacks. */ +typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size); +typedef int IOCanReadHandler(void *opaque); +typedef void IOHandler(void *opaque); + +struct ParallelIOArg { + void *buffer; + int count; +}; + +typedef int (*DMA_transfer_handler) (void *opaque, int nchan, int pos, int size); + +/* A load of opaque types so that device init declarations don't have to + pull in all the real definitions. */ +typedef struct NICInfo NICInfo; +typedef struct HCIInfo HCIInfo; +typedef struct AudioState AudioState; +typedef struct BlockDriverState BlockDriverState; +typedef struct DisplayState DisplayState; +typedef struct DisplayChangeListener DisplayChangeListener; +typedef struct DisplaySurface DisplaySurface; +typedef struct DisplayAllocator DisplayAllocator; +typedef struct PixelFormat PixelFormat; +typedef struct TextConsole TextConsole; +typedef TextConsole QEMUConsole; +typedef struct CharDriverState CharDriverState; +typedef struct MACAddr MACAddr; +typedef struct VLANState VLANState; +typedef struct VLANClientState VLANClientState; +typedef struct i2c_bus i2c_bus; +typedef struct i2c_slave i2c_slave; +typedef struct SMBusDevice SMBusDevice; +typedef struct PCIHostState PCIHostState; +typedef struct PCIExpressHost PCIExpressHost; +typedef struct PCIBus PCIBus; +typedef struct PCIDevice PCIDevice; +typedef struct SerialState SerialState; +typedef struct IRQState *qemu_irq; +typedef struct PCMCIACardState PCMCIACardState; +typedef struct MouseTransformInfo MouseTransformInfo; +typedef struct uWireSlave uWireSlave; +typedef struct I2SCodec I2SCodec; +typedef struct SSIBus SSIBus; +typedef struct EventNotifier EventNotifier; +typedef struct VirtIODevice VirtIODevice; + +typedef uint64_t pcibus_t; + +void cpu_exec_init_all(unsigned long tb_size); + +/* CPU save/load. */ +void cpu_save(QEMUFile *f, void *opaque); +int cpu_load(QEMUFile *f, void *opaque, int version_id); + +/* Force QEMU to stop what it's doing and service IO */ +void qemu_service_io(void); + +/* Force QEMU to process pending events */ +void qemu_notify_event(void); + +/* Unblock cpu */ +void qemu_cpu_kick(void *env); +int qemu_cpu_self(void *env); + +/* work queue */ +struct qemu_work_item { + struct qemu_work_item *next; + void (*func)(void *data); + void *data; + int done; +}; + +#ifdef CONFIG_USER_ONLY +#define qemu_init_vcpu(env) do { } while (0) +#else +void qemu_init_vcpu(void *env); +#endif + +typedef struct QEMUIOVector { + struct iovec *iov; + int niov; + int nalloc; + size_t size; +} QEMUIOVector; + +void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); +void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); +void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); +void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); +void qemu_iovec_destroy(QEMUIOVector *qiov); +void qemu_iovec_reset(QEMUIOVector *qiov); +void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); +void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); + +struct Monitor; +typedef struct Monitor Monitor; + +/* Convert a byte between binary and BCD. */ +static inline uint8_t to_bcd(uint8_t val) +{ + return ((val / 10) << 4) | (val % 10); +} + +static inline uint8_t from_bcd(uint8_t val) +{ + return ((val >> 4) * 10) + (val & 0x0f); +} + +#include "module.h" + +#endif /* dyngen-exec.h hack */ + +#endif /* !VBOX */ + +#endif diff --git a/src/recompiler/qemu-lock.h b/src/recompiler/qemu-lock.h new file mode 100644 index 00000000..1234a5b6 --- /dev/null +++ b/src/recompiler/qemu-lock.h @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +/* Locking primitives. Most of this code should be redundant - + system emulation doesn't need/use locking, NPTL userspace uses + pthread mutexes, and non-NPTL userspace isn't threadsafe anyway. + In either case a spinlock is probably the wrong kind of lock. + Spinlocks are only good if you know annother CPU has the lock and is + likely to release it soon. In environments where you have more threads + than physical CPUs (the extreme case being a single CPU host) a spinlock + simply wastes CPU until the OS decides to preempt it. */ +#if defined(CONFIG_USE_NPTL) + +#include <pthread.h> +#define spin_lock pthread_mutex_lock +#define spin_unlock pthread_mutex_unlock +#define spinlock_t pthread_mutex_t +#define SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER + +#else + +#if defined(__hppa__) + +typedef int spinlock_t[4]; + +#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 } + +static inline void resetlock (spinlock_t *p) +{ + (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1; +} + +#else + +typedef int spinlock_t; + +#define SPIN_LOCK_UNLOCKED 0 + +static inline void resetlock (spinlock_t *p) +{ + *p = SPIN_LOCK_UNLOCKED; +} + +#endif + +#ifdef VBOX +DECLINLINE(int) testandset (int *p) +{ + return ASMAtomicCmpXchgU32((volatile uint32_t *)p, 1, 0) ? 0 : 1; +} +#elif defined(_ARCH_PPC) +static inline int testandset (int *p) +{ + int ret; + __asm__ __volatile__ ( + " lwarx %0,0,%1\n" + " xor. %0,%3,%0\n" + " bne $+12\n" + " stwcx. %2,0,%1\n" + " bne- $-16\n" + : "=&r" (ret) + : "r" (p), "r" (1), "r" (0) + : "cr0", "memory"); + return ret; +} +#elif defined(__i386__) +static inline int testandset (int *p) +{ + long int readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#elif defined(__x86_64__) +static inline int testandset (int *p) +{ + long int64_t readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#elif defined(__s390__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" + " jl 0b" + : "=&d" (ret) + : "r" (1), "a" (p), "0" (*p) + : "cc", "memory" ); + return ret; +} +#elif defined(__alpha__) +static inline int testandset (int *p) +{ + int ret; + unsigned long one; + + __asm__ __volatile__ ("0: mov 1,%2\n" + " ldl_l %0,%1\n" + " stl_c %2,%1\n" + " beq %2,1f\n" + ".subsection 2\n" + "1: br 0b\n" + ".previous" + : "=r" (ret), "=m" (*p), "=r" (one) + : "m" (*p)); + return ret; +} +#elif defined(__sparc__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__("ldstub [%1], %0" + : "=r" (ret) + : "r" (p) + : "memory"); + + return (ret ? 1 : 0); +} +#elif defined(__arm__) +static inline int testandset (int *spinlock) +{ + register unsigned int ret; + __asm__ __volatile__("swp %0, %1, [%2]" + : "=r"(ret) + : "0"(1), "r"(spinlock)); + + return ret; +} +#elif defined(__mc68000) +static inline int testandset (int *p) +{ + char ret; + __asm__ __volatile__("tas %1; sne %0" + : "=r" (ret) + : "m" (p) + : "cc","memory"); + return ret; +} +#elif defined(__hppa__) + +/* Because malloc only guarantees 8-byte alignment for malloc'd data, + and GCC only guarantees 8-byte alignment for stack locals, we can't + be assured of 16-byte alignment for atomic lock data even if we + specify "__attribute ((aligned(16)))" in the type declaration. So, + we use a struct containing an array of four ints for the atomic lock + type and dynamically select the 16-byte aligned int from the array + for the semaphore. */ +#define __PA_LDCW_ALIGNMENT 16 +static inline void *ldcw_align (void *p) { + unsigned long a = (unsigned long)p; + a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1); + return (void *)a; +} + +static inline int testandset (spinlock_t *p) +{ + unsigned int ret; + p = ldcw_align(p); + __asm__ __volatile__("ldcw 0(%1),%0" + : "=r" (ret) + : "r" (p) + : "memory" ); + return !ret; +} + +#elif defined(__ia64) + +#include <ia64intrin.h> + +static inline int testandset (int *p) +{ + return __sync_lock_test_and_set (p, 1); +} +#elif defined(__mips__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__ ( + " .set push \n" + " .set noat \n" + " .set mips2 \n" + "1: li $1, 1 \n" + " ll %0, %1 \n" + " sc $1, %1 \n" + " beqz $1, 1b \n" + " .set pop " + : "=r" (ret), "+R" (*p) + : + : "memory"); + + return ret; +} +#else +#error unimplemented CPU support +#endif + +#if defined(CONFIG_USER_ONLY) +static inline void spin_lock(spinlock_t *lock) +{ + while (testandset(lock)); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + resetlock(lock); +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return !testandset(lock); +} +#else +static inline void spin_lock(spinlock_t *lock) +{ +} + +static inline void spin_unlock(spinlock_t *lock) +{ +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return 1; +} +#endif + +#endif diff --git a/src/recompiler/qemu-log.h b/src/recompiler/qemu-log.h new file mode 100644 index 00000000..64d375b4 --- /dev/null +++ b/src/recompiler/qemu-log.h @@ -0,0 +1,135 @@ +#ifndef QEMU_LOG_H +#define QEMU_LOG_H + +/* The deprecated global variables: */ +extern FILE *logfile; +extern int loglevel; + + +/* + * The new API: + * + */ + +/* Log settings checking macros: */ + +/* Returns true if qemu_log() will really write somewhere + */ +#ifndef VBOX +#define qemu_log_enabled() (logfile != NULL) +#else +# define qemu_log_enabled() LogIsEnabled() +#endif + +/* Returns true if a bit is set in the current loglevel mask + */ +#define qemu_loglevel_mask(b) ((loglevel & (b)) != 0) + + +/* Logging functions: */ + +/* main logging function + */ +#ifndef VBOX +#define qemu_log(...) do { \ + if (logfile) \ + fprintf(logfile, ## __VA_ARGS__); \ + } while (0) +#else +# define qemu_log(...) Log((__VA_ARGS__)) +#endif + +/* vfprintf-like logging function + */ +#ifndef VBOX +#define qemu_log_vprintf(fmt, va) do { \ + if (logfile) \ + vfprintf(logfile, fmt, va); \ + } while (0) +#else +# define qemu_log_vprintf(fmt, va) do { \ + if (LogIsEnabled()) \ + RTLogLoggerExV(RTLOGGRPFLAGS_LEVEL_1, LOG_GROUP, fmt, va); \ + } while (0) +#endif + +/* log only if a bit is set on the current loglevel mask + */ +#ifndef VBOX +#define qemu_log_mask(b, ...) do { \ + if (loglevel & (b)) \ + fprintf(logfile, ## __VA_ARGS__); \ + } while (0) +#else +# define qemu_log_mask(b, ...) do { \ + if (loglevel & (b)) \ + Log((__VA_ARGS__)); \ + } while (0) +#endif + + + + +/* Special cases: */ + +/* cpu_dump_state() logging functions: */ +#ifndef VBOX +#define log_cpu_state(env, f) cpu_dump_state((env), logfile, fprintf, (f)); +#else +#define log_cpu_state(env, f) cpu_dump_state((env), NULL, NULL, (f)); +#endif +#define log_cpu_state_mask(b, env, f) do { \ + if (loglevel & (b)) log_cpu_state((env), (f)); \ + } while (0) + +/* disas() and target_disas() to logfile: */ +#define log_target_disas(start, len, flags) \ + target_disas(logfile, (start), (len), (flags)) +#define log_disas(start, len) \ + disas(logfile, (start), (len)) + +/* page_dump() output to the log file: */ +#define log_page_dump() page_dump(logfile) + + + +/* Maintenance: */ + +/* fflush() the log file */ +#ifndef VBOX +#define qemu_log_flush() fflush(logfile) +#else +# define qemu_log_flush() RTLogFlush(NULL) +#endif + +/* Close the log file */ +#ifndef VBOX +#define qemu_log_close() do { \ + fclose(logfile); \ + logfile = NULL; \ + } while (0) +#else +# define qemu_log_close() do { } while (0) +#endif + +/* Set up a new log file */ +#ifndef VBOX +#define qemu_log_set_file(f) do { \ + logfile = (f); \ + } while (0) +#else +# define qemu_log_set_file(f) do { } while (0) +#endif + +/* Set up a new log file, only if none is set */ +#ifndef VBOX +#define qemu_log_try_set_file(f) do { \ + if (!logfile) \ + logfile = (f); \ + } while (0) +#else +#define qemu_log_try_set_file(f) do { } while (0) +#endif + + +#endif diff --git a/src/recompiler/qemu-queue.h b/src/recompiler/qemu-queue.h new file mode 100644 index 00000000..1d077458 --- /dev/null +++ b/src/recompiler/qemu-queue.h @@ -0,0 +1,449 @@ +/* $NetBSD: queue.h,v 1.52 2009/04/20 09:56:08 mschuett Exp $ */ + +/* + * Qemu version: Copy from netbsd, removed debug code, removed some of + * the implementations. Left in lists, simple queues, tail queues and + * circular queues. + */ + +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +#ifndef QEMU_SYS_QUEUE_H_ +#define QEMU_SYS_QUEUE_H_ + +/* + * This file defines four types of data structures: + * lists, simple queues, tail queues, and circular queues. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A simple queue is headed by a pair of pointers, one the head of the + * list and the other to the tail of the list. The elements are singly + * linked to save space, so elements can only be removed from the + * head of the list. New elements can be added to the list after + * an existing element, at the head of the list, or at the end of the + * list. A simple queue may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * A circle queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or after + * an existing element, at the head of the list, or at the end of the list. + * A circle queue may be traversed in either direction, but has a more + * complex end of list detection. + * + * For details on the use of these macros, see the queue(3) manual page. + */ + +/* + * List definitions. + */ +#define QLIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define QLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define QLIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ +#define QLIST_INIT(head) do { \ + (head)->lh_first = NULL; \ +} while (/*CONSTCOND*/0) + +#define QLIST_INSERT_AFTER(listelm, elm, field) do { \ + if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ + (listelm)->field.le_next->field.le_prev = \ + &(elm)->field.le_next; \ + (listelm)->field.le_next = (elm); \ + (elm)->field.le_prev = &(listelm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define QLIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + (elm)->field.le_next = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &(elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define QLIST_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} while (/*CONSTCOND*/0) + +#define QLIST_REMOVE(elm, field) do { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} while (/*CONSTCOND*/0) + +#define QLIST_FOREACH(var, head, field) \ + for ((var) = ((head)->lh_first); \ + (var); \ + (var) = ((var)->field.le_next)) + +#define QLIST_FOREACH_SAFE(var, head, field, next_var) \ + for ((var) = ((head)->lh_first); \ + (var) && ((next_var) = ((var)->field.le_next), 1); \ + (var) = (next_var)) + +/* + * List access methods. + */ +#define QLIST_EMPTY(head) ((head)->lh_first == NULL) +#define QLIST_FIRST(head) ((head)->lh_first) +#define QLIST_NEXT(elm, field) ((elm)->field.le_next) + + +/* + * Simple queue definitions. + */ +#define QSIMPLEQ_HEAD(name, type) \ +struct name { \ + struct type *sqh_first; /* first element */ \ + struct type **sqh_last; /* addr of last next element */ \ +} + +#define QSIMPLEQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).sqh_first } + +#define QSIMPLEQ_ENTRY(type) \ +struct { \ + struct type *sqe_next; /* next element */ \ +} + +/* + * Simple queue functions. + */ +#define QSIMPLEQ_INIT(head) do { \ + (head)->sqh_first = NULL; \ + (head)->sqh_last = &(head)->sqh_first; \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (head)->sqh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.sqe_next = NULL; \ + *(head)->sqh_last = (elm); \ + (head)->sqh_last = &(elm)->field.sqe_next; \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL) \ + (head)->sqh_last = &(elm)->field.sqe_next; \ + (listelm)->field.sqe_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_REMOVE_HEAD(head, field) do { \ + if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\ + (head)->sqh_last = &(head)->sqh_first; \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_REMOVE(head, elm, type, field) do { \ + if ((head)->sqh_first == (elm)) { \ + QSIMPLEQ_REMOVE_HEAD((head), field); \ + } else { \ + struct type *curelm = (head)->sqh_first; \ + while (curelm->field.sqe_next != (elm)) \ + curelm = curelm->field.sqe_next; \ + if ((curelm->field.sqe_next = \ + curelm->field.sqe_next->field.sqe_next) == NULL) \ + (head)->sqh_last = &(curelm)->field.sqe_next; \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_FOREACH(var, head, field) \ + for ((var) = ((head)->sqh_first); \ + (var); \ + (var) = ((var)->field.sqe_next)) + +#define QSIMPLEQ_FOREACH_SAFE(var, head, field, next) \ + for ((var) = ((head)->sqh_first); \ + (var) && ((next = ((var)->field.sqe_next)), 1); \ + (var) = (next)) + +#define QSIMPLEQ_CONCAT(head1, head2) do { \ + if (!QSIMPLEQ_EMPTY((head2))) { \ + *(head1)->sqh_last = (head2)->sqh_first; \ + (head1)->sqh_last = (head2)->sqh_last; \ + QSIMPLEQ_INIT((head2)); \ + } \ +} while (/*CONSTCOND*/0) + +#define QSIMPLEQ_LAST(head, type, field) \ + (QSIMPLEQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *)(void *) \ + ((char *)((head)->sqh_last) - offsetof(struct type, field)))) + +/* + * Simple queue access methods. + */ +#define QSIMPLEQ_EMPTY(head) ((head)->sqh_first == NULL) +#define QSIMPLEQ_FIRST(head) ((head)->sqh_first) +#define QSIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next) + + +/* + * Tail queue definitions. + */ +#define Q_TAILQ_HEAD(name, type, qual) \ +struct name { \ + qual type *tqh_first; /* first element */ \ + qual type *qual *tqh_last; /* addr of last next element */ \ +} +#define QTAILQ_HEAD(name, type) Q_TAILQ_HEAD(name, struct type,) + +#define QTAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define Q_TAILQ_ENTRY(type, qual) \ +struct { \ + qual type *tqe_next; /* next element */ \ + qual type *qual *tqe_prev; /* address of previous next element */\ +} +#define QTAILQ_ENTRY(type) Q_TAILQ_ENTRY(struct type,) + +/* + * Tail queue functions. + */ +#define QTAILQ_INIT(head) do { \ + (head)->tqh_first = NULL; \ + (head)->tqh_last = &(head)->tqh_first; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (head)->tqh_first->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_prev = &(head)->tqh_first; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &(elm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_prev = \ + &(elm)->field.tqe_next; \ + else \ + (head)->tqh_last = &(elm)->field.tqe_next; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + (elm)->field.tqe_next = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_REMOVE(head, elm, field) do { \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_FOREACH(var, head, field) \ + for ((var) = ((head)->tqh_first); \ + (var); \ + (var) = ((var)->field.tqe_next)) + +#define QTAILQ_FOREACH_SAFE(var, head, field, next_var) \ + for ((var) = ((head)->tqh_first); \ + (var) && ((next_var) = ((var)->field.tqe_next), 1); \ + (var) = (next_var)) + +#define QTAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last)); \ + (var); \ + (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last))) + +/* + * Tail queue access methods. + */ +#define QTAILQ_EMPTY(head) ((head)->tqh_first == NULL) +#define QTAILQ_FIRST(head) ((head)->tqh_first) +#define QTAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define QTAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) +#define QTAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + + +/* + * Circular queue definitions. + */ +#define QCIRCLEQ_HEAD(name, type) \ +struct name { \ + struct type *cqh_first; /* first element */ \ + struct type *cqh_last; /* last element */ \ +} + +#define QCIRCLEQ_HEAD_INITIALIZER(head) \ + { (void *)&head, (void *)&head } + +#define QCIRCLEQ_ENTRY(type) \ +struct { \ + struct type *cqe_next; /* next element */ \ + struct type *cqe_prev; /* previous element */ \ +} + +/* + * Circular queue functions. + */ +#define QCIRCLEQ_INIT(head) do { \ + (head)->cqh_first = (void *)(head); \ + (head)->cqh_last = (void *)(head); \ +} while (/*CONSTCOND*/0) + +#define QCIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm)->field.cqe_next; \ + (elm)->field.cqe_prev = (listelm); \ + if ((listelm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (listelm)->field.cqe_next->field.cqe_prev = (elm); \ + (listelm)->field.cqe_next = (elm); \ +} while (/*CONSTCOND*/0) + +#define QCIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \ + (elm)->field.cqe_next = (listelm); \ + (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \ + if ((listelm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (listelm)->field.cqe_prev->field.cqe_next = (elm); \ + (listelm)->field.cqe_prev = (elm); \ +} while (/*CONSTCOND*/0) + +#define QCIRCLEQ_INSERT_HEAD(head, elm, field) do { \ + (elm)->field.cqe_next = (head)->cqh_first; \ + (elm)->field.cqe_prev = (void *)(head); \ + if ((head)->cqh_last == (void *)(head)) \ + (head)->cqh_last = (elm); \ + else \ + (head)->cqh_first->field.cqe_prev = (elm); \ + (head)->cqh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define QCIRCLEQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.cqe_next = (void *)(head); \ + (elm)->field.cqe_prev = (head)->cqh_last; \ + if ((head)->cqh_first == (void *)(head)) \ + (head)->cqh_first = (elm); \ + else \ + (head)->cqh_last->field.cqe_next = (elm); \ + (head)->cqh_last = (elm); \ +} while (/*CONSTCOND*/0) + +#define QCIRCLEQ_REMOVE(head, elm, field) do { \ + if ((elm)->field.cqe_next == (void *)(head)) \ + (head)->cqh_last = (elm)->field.cqe_prev; \ + else \ + (elm)->field.cqe_next->field.cqe_prev = \ + (elm)->field.cqe_prev; \ + if ((elm)->field.cqe_prev == (void *)(head)) \ + (head)->cqh_first = (elm)->field.cqe_next; \ + else \ + (elm)->field.cqe_prev->field.cqe_next = \ + (elm)->field.cqe_next; \ +} while (/*CONSTCOND*/0) + +#define QCIRCLEQ_FOREACH(var, head, field) \ + for ((var) = ((head)->cqh_first); \ + (var) != (const void *)(head); \ + (var) = ((var)->field.cqe_next)) + +#define QCIRCLEQ_FOREACH_REVERSE(var, head, field) \ + for ((var) = ((head)->cqh_last); \ + (var) != (const void *)(head); \ + (var) = ((var)->field.cqe_prev)) + +/* + * Circular queue access methods. + */ +#define QCIRCLEQ_EMPTY(head) ((head)->cqh_first == (void *)(head)) +#define QCIRCLEQ_FIRST(head) ((head)->cqh_first) +#define QCIRCLEQ_LAST(head) ((head)->cqh_last) +#define QCIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next) +#define QCIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev) + +#define QCIRCLEQ_LOOP_NEXT(head, elm, field) \ + (((elm)->field.cqe_next == (void *)(head)) \ + ? ((head)->cqh_first) \ + : (elm->field.cqe_next)) +#define QCIRCLEQ_LOOP_PREV(head, elm, field) \ + (((elm)->field.cqe_prev == (void *)(head)) \ + ? ((head)->cqh_last) \ + : (elm->field.cqe_prev)) + +#endif /* !QEMU_SYS_QUEUE_H_ */ diff --git a/src/recompiler/qemu-timer.h b/src/recompiler/qemu-timer.h new file mode 100644 index 00000000..209c4d2c --- /dev/null +++ b/src/recompiler/qemu-timer.h @@ -0,0 +1,272 @@ +#ifndef QEMU_TIMER_H +#define QEMU_TIMER_H + +#include "qemu-common.h" + +/* timers */ +#ifndef VBOX + +typedef struct QEMUClock QEMUClock; +typedef void QEMUTimerCB(void *opaque); + +/* The real time clock should be used only for stuff which does not + change the virtual machine state, as it is run even if the virtual + machine is stopped. The real time clock has a frequency of 1000 + Hz. */ +extern QEMUClock *rt_clock; + +/* The virtual clock is only run during the emulation. It is stopped + when the virtual machine is stopped. Virtual timers use a high + precision clock, usually cpu cycles (use ticks_per_sec). */ +extern QEMUClock *vm_clock; + +/* The host clock should be use for device models that emulate accurate + real time sources. It will continue to run when the virtual machine + is suspended, and it will reflect system time changes the host may + undergo (e.g. due to NTP). The host clock has the same precision as + the virtual clock. */ +extern QEMUClock *host_clock; + +int64_t qemu_get_clock(QEMUClock *clock); +int64_t qemu_get_clock_ns(QEMUClock *clock); +void qemu_clock_enable(QEMUClock *clock, int enabled); + +QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque); +void qemu_free_timer(QEMUTimer *ts); +void qemu_del_timer(QEMUTimer *ts); +void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time); +int qemu_timer_pending(QEMUTimer *ts); +int qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time); + +void qemu_run_all_timers(void); +int qemu_alarm_pending(void); +int64_t qemu_next_deadline(void); +void configure_alarms(char const *opt); +void configure_icount(const char *option); +int qemu_calculate_timeout(void); +void init_clocks(void); +int init_timer_alarm(void); +void quit_timers(void); + +static inline int64_t get_ticks_per_sec(void) +{ + return 1000000000LL; +} + + +void qemu_get_timer(QEMUFile *f, QEMUTimer *ts); +void qemu_put_timer(QEMUFile *f, QEMUTimer *ts); + +/* ptimer.c */ +typedef struct ptimer_state ptimer_state; +typedef void (*ptimer_cb)(void *opaque); + +ptimer_state *ptimer_init(QEMUBH *bh); +void ptimer_set_period(ptimer_state *s, int64_t period); +void ptimer_set_freq(ptimer_state *s, uint32_t freq); +void ptimer_set_limit(ptimer_state *s, uint64_t limit, int reload); +uint64_t ptimer_get_count(ptimer_state *s); +void ptimer_set_count(ptimer_state *s, uint64_t count); +void ptimer_run(ptimer_state *s, int oneshot); +void ptimer_stop(ptimer_state *s); +void qemu_put_ptimer(QEMUFile *f, ptimer_state *s); +void qemu_get_ptimer(QEMUFile *f, ptimer_state *s); + +/* icount */ +int64_t qemu_icount_round(int64_t count); +extern int64_t qemu_icount; +#endif /* !VBOX */ +extern int use_icount; +#ifndef VBOX +extern int icount_time_shift; +extern int64_t qemu_icount_bias; +int64_t cpu_get_icount(void); + +/*******************************************/ +/* host CPU ticks (if available) */ + +#if defined(_ARCH_PPC) + +static inline int64_t cpu_get_real_ticks(void) +{ + int64_t retval; +#ifdef _ARCH_PPC64 + /* This reads timebase in one 64bit go and includes Cell workaround from: + http://ozlabs.org/pipermail/linuxppc-dev/2006-October/027052.html + */ + __asm__ __volatile__ ("mftb %0\n\t" + "cmpwi %0,0\n\t" + "beq- $-8" + : "=r" (retval)); +#else + /* http://ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html */ + unsigned long junk; + __asm__ __volatile__ ("mfspr %1,269\n\t" /* mftbu */ + "mfspr %L0,268\n\t" /* mftb */ + "mfspr %0,269\n\t" /* mftbu */ + "cmpw %0,%1\n\t" + "bne $-16" + : "=r" (retval), "=r" (junk)); +#endif + return retval; +} + +#elif defined(__i386__) + +static inline int64_t cpu_get_real_ticks(void) +{ + int64_t val; + asm volatile ("rdtsc" : "=A" (val)); + return val; +} + +#elif defined(__x86_64__) + +static inline int64_t cpu_get_real_ticks(void) +{ + uint32_t low,high; + int64_t val; + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + val = high; + val <<= 32; + val |= low; + return val; +} + +#elif defined(__hppa__) + +static inline int64_t cpu_get_real_ticks(void) +{ + int val; + asm volatile ("mfctl %%cr16, %0" : "=r"(val)); + return val; +} + +#elif defined(__ia64) + +static inline int64_t cpu_get_real_ticks(void) +{ + int64_t val; + asm volatile ("mov %0 = ar.itc" : "=r"(val) :: "memory"); + return val; +} + +#elif defined(__s390__) + +static inline int64_t cpu_get_real_ticks(void) +{ + int64_t val; + asm volatile("stck 0(%1)" : "=m" (val) : "a" (&val) : "cc"); + return val; +} + +#elif defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__) + +static inline int64_t cpu_get_real_ticks (void) +{ +#if defined(_LP64) + uint64_t rval; + asm volatile("rd %%tick,%0" : "=r"(rval)); + return rval; +#else + union { + uint64_t i64; + struct { + uint32_t high; + uint32_t low; + } i32; + } rval; + asm volatile("rd %%tick,%1; srlx %1,32,%0" + : "=r"(rval.i32.high), "=r"(rval.i32.low)); + return rval.i64; +#endif +} + +#elif defined(__mips__) && \ + ((defined(__mips_isa_rev) && __mips_isa_rev >= 2) || defined(__linux__)) +/* + * binutils wants to use rdhwr only on mips32r2 + * but as linux kernel emulate it, it's fine + * to use it. + * + */ +#define MIPS_RDHWR(rd, value) { \ + __asm__ __volatile__ (".set push\n\t" \ + ".set mips32r2\n\t" \ + "rdhwr %0, "rd"\n\t" \ + ".set pop" \ + : "=r" (value)); \ + } + +static inline int64_t cpu_get_real_ticks(void) +{ + /* On kernels >= 2.6.25 rdhwr <reg>, $2 and $3 are emulated */ + uint32_t count; + static uint32_t cyc_per_count = 0; + + if (!cyc_per_count) { + MIPS_RDHWR("$3", cyc_per_count); + } + + MIPS_RDHWR("$2", count); + return (int64_t)(count * cyc_per_count); +} + +#elif defined(__alpha__) + +static inline int64_t cpu_get_real_ticks(void) +{ + uint64_t cc; + uint32_t cur, ofs; + + asm volatile("rpcc %0" : "=r"(cc)); + cur = cc; + ofs = cc >> 32; + return cur - ofs; +} + +#else +/* The host CPU doesn't have an easily accessible cycle counter. + Just return a monotonically increasing value. This will be + totally wrong, but hopefully better than nothing. */ +static inline int64_t cpu_get_real_ticks (void) +{ + static int64_t ticks = 0; + return ticks++; +} +#endif + +#endif /* !VBOX */ + +#ifdef NEED_CPU_H +/* Deterministic execution requires that IO only be performed on the last + instruction of a TB so that interrupts take effect immediately. */ +static inline int can_do_io(CPUState *env) +{ + if (!use_icount) + return 1; + + /* If not executing code then assume we are ok. */ + if (!env->current_tb) + return 1; + + return env->can_do_io != 0; +} +#endif + +#ifndef VBOX + +#ifdef CONFIG_PROFILER +static inline int64_t profile_getclock(void) +{ + return cpu_get_real_ticks(); +} + +extern int64_t qemu_time, qemu_time_start; +extern int64_t tlb_flush_time; +extern int64_t dev_time; +#endif + +#endif /* !VBOX */ + +#endif diff --git a/src/recompiler/softmmu_defs.h b/src/recompiler/softmmu_defs.h new file mode 100644 index 00000000..e23c3498 --- /dev/null +++ b/src/recompiler/softmmu_defs.h @@ -0,0 +1,57 @@ +#ifndef SOFTMMU_DEFS_H +#define SOFTMMU_DEFS_H + +#ifndef VBOX +uint8_t REGPARM __ldb_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx); +uint16_t REGPARM __ldw_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx); +uint32_t REGPARM __ldl_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx); +uint64_t REGPARM __ldq_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx); + +uint8_t REGPARM __ldb_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx); +uint16_t REGPARM __ldw_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx); +uint32_t REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx); +uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx); +#else /* VBOX */ +RTCCUINTREG REGPARM __ldb_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx); +RTCCUINTREG REGPARM __ldw_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx); +RTCCUINTREG REGPARM __ldl_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx); +uint64_t REGPARM __ldq_mmu(target_ulong addr, int mmu_idx); +void REGPARM __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx); + +RTCCUINTREG REGPARM __ldb_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx); +RTCCUINTREG REGPARM __ldw_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx); +RTCCUINTREG REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx); +uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx); +void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx); + +# ifdef REM_PHYS_ADDR_IN_TLB +RTCCUINTREG REGPARM __ldb_vbox_phys(RTCCUINTREG addr); +RTCCUINTREG REGPARM __ldub_vbox_phys(RTCCUINTREG addr); +void REGPARM __stb_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val); +RTCCUINTREG REGPARM __ldw_vbox_phys(RTCCUINTREG addr); +RTCCUINTREG REGPARM __lduw_vbox_phys(RTCCUINTREG addr); +void REGPARM __stw_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val); +RTCCUINTREG REGPARM __ldl_vbox_phys(RTCCUINTREG addr); +RTCCUINTREG REGPARM __ldul_vbox_phys(RTCCUINTREG addr); +void REGPARM __stl_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val); +uint64_t REGPARM __ldq_vbox_phys(RTCCUINTREG addr); +void REGPARM __stq_vbox_phys(RTCCUINTREG addr, uint64_t val); +# endif + +#endif /* VBOX */ + +#endif diff --git a/src/recompiler/softmmu_exec.h b/src/recompiler/softmmu_exec.h new file mode 100644 index 00000000..28d1d53d --- /dev/null +++ b/src/recompiler/softmmu_exec.h @@ -0,0 +1,153 @@ +/* Common softmmu definitions and inline routines. */ + +/* XXX: find something cleaner. + * Furthermore, this is false for 64 bits targets + */ +#define ldul_user ldl_user +#define ldul_kernel ldl_kernel +#define ldul_hypv ldl_hypv +#define ldul_executive ldl_executive +#define ldul_supervisor ldl_supervisor + +#include "softmmu_defs.h" + +#define ACCESS_TYPE 0 +#define MEMSUFFIX MMU_MODE0_SUFFIX +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" +#undef ACCESS_TYPE +#undef MEMSUFFIX + +#define ACCESS_TYPE 1 +#define MEMSUFFIX MMU_MODE1_SUFFIX +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" +#undef ACCESS_TYPE +#undef MEMSUFFIX + +#if (NB_MMU_MODES >= 3) + +#define ACCESS_TYPE 2 +#define MEMSUFFIX MMU_MODE2_SUFFIX +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" +#undef ACCESS_TYPE +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 3) */ + +#if (NB_MMU_MODES >= 4) + +#define ACCESS_TYPE 3 +#define MEMSUFFIX MMU_MODE3_SUFFIX +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" +#undef ACCESS_TYPE +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 4) */ + +#if (NB_MMU_MODES >= 5) + +#define ACCESS_TYPE 4 +#define MEMSUFFIX MMU_MODE4_SUFFIX +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" +#undef ACCESS_TYPE +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 5) */ + +#if (NB_MMU_MODES >= 6) + +#define ACCESS_TYPE 5 +#define MEMSUFFIX MMU_MODE5_SUFFIX +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" +#undef ACCESS_TYPE +#undef MEMSUFFIX +#endif /* (NB_MMU_MODES >= 6) */ + +#if (NB_MMU_MODES > 6) +#error "NB_MMU_MODES > 6 is not supported for now" +#endif /* (NB_MMU_MODES > 6) */ + +/* these access are slower, they must be as rare as possible */ +#define ACCESS_TYPE (NB_MMU_MODES) +#define MEMSUFFIX _data +#define DATA_SIZE 1 +#include "softmmu_header.h" + +#define DATA_SIZE 2 +#include "softmmu_header.h" + +#define DATA_SIZE 4 +#include "softmmu_header.h" + +#define DATA_SIZE 8 +#include "softmmu_header.h" +#undef ACCESS_TYPE +#undef MEMSUFFIX + +#define ldub(p) ldub_data(p) +#define ldsb(p) ldsb_data(p) +#define lduw(p) lduw_data(p) +#define ldsw(p) ldsw_data(p) +#define ldl(p) ldl_data(p) +#define ldq(p) ldq_data(p) + +#define stb(p, v) stb_data(p, v) +#define stw(p, v) stw_data(p, v) +#define stl(p, v) stl_data(p, v) +#define stq(p, v) stq_data(p, v) diff --git a/src/recompiler/softmmu_header.h b/src/recompiler/softmmu_header.h new file mode 100644 index 00000000..06e2466d --- /dev/null +++ b/src/recompiler/softmmu_header.h @@ -0,0 +1,208 @@ +/* + * Software MMU support + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#if DATA_SIZE == 8 +#define SUFFIX q +#define USUFFIX q +#define DATA_TYPE uint64_t +#elif DATA_SIZE == 4 +#define SUFFIX l +#define USUFFIX l +#define DATA_TYPE uint32_t +#elif DATA_SIZE == 2 +#define SUFFIX w +#define USUFFIX uw +#define DATA_TYPE uint16_t +#define DATA_STYPE int16_t +#elif DATA_SIZE == 1 +#define SUFFIX b +#define USUFFIX ub +#define DATA_TYPE uint8_t +#define DATA_STYPE int8_t +#else +#error unsupported data size +#endif + +#if ACCESS_TYPE < (NB_MMU_MODES) + +#define CPU_MMU_INDEX ACCESS_TYPE +#define MMUSUFFIX _mmu + +#elif ACCESS_TYPE == (NB_MMU_MODES) + +#define CPU_MMU_INDEX (cpu_mmu_index(env)) +#define MMUSUFFIX _mmu + +#elif ACCESS_TYPE == (NB_MMU_MODES + 1) + +#define CPU_MMU_INDEX (cpu_mmu_index(env)) +#define MMUSUFFIX _cmmu + +#else +#error invalid ACCESS_TYPE +#endif + +#if DATA_SIZE == 8 +#define RES_TYPE uint64_t +#else +#define RES_TYPE uint32_t +#endif + +#if ACCESS_TYPE == (NB_MMU_MODES + 1) +#define ADDR_READ addr_code +#else +#define ADDR_READ addr_read +#endif + +/* generic load/store macros */ + +static inline RES_TYPE glue(glue(ld, USUFFIX), MEMSUFFIX)(target_ulong ptr) +{ + int page_index; + RES_TYPE res; + target_ulong addr; + uintptr_t physaddr; + int mmu_idx; + + addr = ptr; + page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + mmu_idx = CPU_MMU_INDEX; + if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != + (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { + res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx); + } else { + physaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr); + } + return res; +} + +#if DATA_SIZE <= 2 +static inline int glue(glue(lds, SUFFIX), MEMSUFFIX)(target_ulong ptr) +{ + int res, page_index; + target_ulong addr; + uintptr_t physaddr; + int mmu_idx; + + addr = ptr; + page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + mmu_idx = CPU_MMU_INDEX; + if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != + (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { + res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, mmu_idx); + } else { + physaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr); + } + return res; +} +#endif + +#if ACCESS_TYPE != (NB_MMU_MODES + 1) + +/* generic store macro */ + +static inline void glue(glue(st, SUFFIX), MEMSUFFIX)(target_ulong ptr, RES_TYPE v) +{ + int page_index; + target_ulong addr; + uintptr_t physaddr; + int mmu_idx; + + addr = ptr; + page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + mmu_idx = CPU_MMU_INDEX; + if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write != + (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { + glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, mmu_idx); + } else { + physaddr = addr + env->tlb_table[mmu_idx][page_index].addend; + glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v); + } +} + +#endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */ + +#if ACCESS_TYPE != (NB_MMU_MODES + 1) + +#if DATA_SIZE == 8 +static inline float64 glue(ldfq, MEMSUFFIX)(target_ulong ptr) +{ + union { + float64 d; + uint64_t i; + } u; + u.i = glue(ldq, MEMSUFFIX)(ptr); + return u.d; +} + +static inline void glue(stfq, MEMSUFFIX)(target_ulong ptr, float64 v) +{ + union { + float64 d; + uint64_t i; + } u; + u.d = v; + glue(stq, MEMSUFFIX)(ptr, u.i); +} +#endif /* DATA_SIZE == 8 */ + +#if DATA_SIZE == 4 +static inline float32 glue(ldfl, MEMSUFFIX)(target_ulong ptr) +{ + union { + float32 f; + uint32_t i; + } u; + u.i = glue(ldl, MEMSUFFIX)(ptr); + return u.f; +} + +static inline void glue(stfl, MEMSUFFIX)(target_ulong ptr, float32 v) +{ + union { + float32 f; + uint32_t i; + } u; + u.f = v; + glue(stl, MEMSUFFIX)(ptr, u.i); +} +#endif /* DATA_SIZE == 4 */ + +#endif /* ACCESS_TYPE != (NB_MMU_MODES + 1) */ + +#undef RES_TYPE +#undef DATA_TYPE +#undef DATA_STYPE +#undef SUFFIX +#undef USUFFIX +#undef DATA_SIZE +#undef CPU_MMU_INDEX +#undef MMUSUFFIX +#undef ADDR_READ diff --git a/src/recompiler/softmmu_template.h b/src/recompiler/softmmu_template.h new file mode 100644 index 00000000..a31411ba --- /dev/null +++ b/src/recompiler/softmmu_template.h @@ -0,0 +1,366 @@ +/* + * Software MMU support + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include "qemu-timer.h" + +#define DATA_SIZE (1 << SHIFT) + +#if DATA_SIZE == 8 +#define SUFFIX q +#define USUFFIX q +#define DATA_TYPE uint64_t +#ifdef VBOX +# define DATA_TYPE_PROMOTED uint64_t +#endif +#elif DATA_SIZE == 4 +#define SUFFIX l +#define USUFFIX l +#define DATA_TYPE uint32_t +#ifdef VBOX +# define DATA_TYPE_PROMOTED RTCCUINTREG +#endif +#elif DATA_SIZE == 2 +#define SUFFIX w +#define USUFFIX uw +#define DATA_TYPE uint16_t +#ifdef VBOX +# define DATA_TYPE_PROMOTED RTCCUINTREG +#endif +#elif DATA_SIZE == 1 +#define SUFFIX b +#define USUFFIX ub +#define DATA_TYPE uint8_t +#ifdef VBOX +# define DATA_TYPE_PROMOTED RTCCUINTREG +#endif +#else +#error unsupported data size +#endif + +#ifdef SOFTMMU_CODE_ACCESS +#define READ_ACCESS_TYPE 2 +#define ADDR_READ addr_code +#else +#define READ_ACCESS_TYPE 0 +#define ADDR_READ addr_read +#endif + +static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr, + int mmu_idx, + void *retaddr); +static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr, + target_ulong addr, + void *retaddr) +{ + DATA_TYPE res; + int index; + index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + physaddr = (physaddr & TARGET_PAGE_MASK) + addr; + env->mem_io_pc = (uintptr_t)retaddr; + if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT) + && !can_do_io(env)) { + cpu_io_recompile(env, retaddr); + } + + env->mem_io_vaddr = addr; +#if SHIFT <= 2 + res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr); +#else +#ifdef TARGET_WORDS_BIGENDIAN + res = (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr) << 32; + res |= io_mem_read[index][2](io_mem_opaque[index], physaddr + 4); +#else + res = io_mem_read[index][2](io_mem_opaque[index], physaddr); + res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32; +#endif +#endif /* SHIFT > 2 */ + return res; +} + +/* handle all cases except unaligned access which span two pages */ +#ifndef VBOX +DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, + int mmu_idx) +#else +/* Load helpers invoked from generated code, and TCG makes an assumption + that valid value takes the whole register, why gcc after 4.3 may + use only lower part of register for smaller types. So force promotion. */ +DATA_TYPE_PROMOTED REGPARM +glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, + int mmu_idx) +#endif +{ + DATA_TYPE res; + int index; + target_ulong tlb_addr; + target_phys_addr_t ioaddr; + uintptr_t addend; + void *retaddr; + + /* test if there is match for unaligned or IO access */ + /* XXX: could done more in memory macro in a non portable way */ + index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + redo: + tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (tlb_addr & ~TARGET_PAGE_MASK) { + /* IO access */ + if ((addr & (DATA_SIZE - 1)) != 0) + goto do_unaligned_access; + retaddr = GETPC(); + ioaddr = env->iotlb[mmu_idx][index]; + res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr); + } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { + /* slow unaligned access (it spans two pages or IO) */ + do_unaligned_access: + retaddr = GETPC(); +#ifdef ALIGNED_ONLY + do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr); +#endif + res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr, + mmu_idx, retaddr); + } else { + /* unaligned/aligned access in the same page */ +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) { + retaddr = GETPC(); + do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr); + } +#endif + addend = env->tlb_table[mmu_idx][index].addend; + res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend)); + } + } else { + /* the page is not in the TLB : fill it */ + retaddr = GETPC(); +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) + do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr); +#endif + tlb_fill(addr, READ_ACCESS_TYPE, mmu_idx, retaddr); + goto redo; + } + return res; +} + +/* handle all unaligned cases */ +static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr, + int mmu_idx, + void *retaddr) +{ + DATA_TYPE res, res1, res2; + int index, shift; + target_phys_addr_t ioaddr; + uintptr_t addend; + target_ulong tlb_addr, addr1, addr2; + + index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + redo: + tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (tlb_addr & ~TARGET_PAGE_MASK) { + /* IO access */ + if ((addr & (DATA_SIZE - 1)) != 0) + goto do_unaligned_access; + ioaddr = env->iotlb[mmu_idx][index]; + res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr); + } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { + do_unaligned_access: + /* slow unaligned access (it spans two pages) */ + addr1 = addr & ~(DATA_SIZE - 1); + addr2 = addr1 + DATA_SIZE; + res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1, + mmu_idx, retaddr); + res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2, + mmu_idx, retaddr); + shift = (addr & (DATA_SIZE - 1)) * 8; +#ifdef TARGET_WORDS_BIGENDIAN + res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); +#else + res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); +#endif + res = (DATA_TYPE)res; + } else { + /* unaligned/aligned access in the same page */ + addend = env->tlb_table[mmu_idx][index].addend; + res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend)); + } + } else { + /* the page is not in the TLB : fill it */ + tlb_fill(addr, READ_ACCESS_TYPE, mmu_idx, retaddr); + goto redo; + } + return res; +} + +#ifndef SOFTMMU_CODE_ACCESS + +static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr, + DATA_TYPE val, + int mmu_idx, + void *retaddr); + +static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr, + DATA_TYPE val, + target_ulong addr, + void *retaddr) +{ + int index; + index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1); + physaddr = (physaddr & TARGET_PAGE_MASK) + addr; + if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT) + && !can_do_io(env)) { + cpu_io_recompile(env, retaddr); + } + + env->mem_io_vaddr = addr; + env->mem_io_pc = (uintptr_t)retaddr; +#if SHIFT <= 2 + io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val); +#else +#ifdef TARGET_WORDS_BIGENDIAN + io_mem_write[index][2](io_mem_opaque[index], physaddr, val >> 32); + io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val); +#else + io_mem_write[index][2](io_mem_opaque[index], physaddr, val); + io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32); +#endif +#endif /* SHIFT > 2 */ +} + +void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, + DATA_TYPE val, + int mmu_idx) +{ + target_phys_addr_t ioaddr; + uintptr_t addend; + target_ulong tlb_addr; + void *retaddr; + int index; + + index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + redo: + tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (tlb_addr & ~TARGET_PAGE_MASK) { + /* IO access */ + if ((addr & (DATA_SIZE - 1)) != 0) + goto do_unaligned_access; + retaddr = GETPC(); + ioaddr = env->iotlb[mmu_idx][index]; + glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr); + } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { + do_unaligned_access: + retaddr = GETPC(); +#ifdef ALIGNED_ONLY + do_unaligned_access(addr, 1, mmu_idx, retaddr); +#endif + glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val, + mmu_idx, retaddr); + } else { + /* aligned/unaligned access in the same page */ +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) { + retaddr = GETPC(); + do_unaligned_access(addr, 1, mmu_idx, retaddr); + } +#endif + addend = env->tlb_table[mmu_idx][index].addend; + glue(glue(st, SUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend), val); + } + } else { + /* the page is not in the TLB : fill it */ + retaddr = GETPC(); +#ifdef ALIGNED_ONLY + if ((addr & (DATA_SIZE - 1)) != 0) + do_unaligned_access(addr, 1, mmu_idx, retaddr); +#endif + tlb_fill(addr, 1, mmu_idx, retaddr); + goto redo; + } +} + +/* handles all unaligned cases */ +static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr, + DATA_TYPE val, + int mmu_idx, + void *retaddr) +{ + target_phys_addr_t ioaddr; + uintptr_t addend; + target_ulong tlb_addr; + int index, i; + + index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + redo: + tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (tlb_addr & ~TARGET_PAGE_MASK) { + /* IO access */ + if ((addr & (DATA_SIZE - 1)) != 0) + goto do_unaligned_access; + ioaddr = env->iotlb[mmu_idx][index]; + glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr); + } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { + do_unaligned_access: + /* XXX: not efficient, but simple */ + /* Note: relies on the fact that tlb_fill() does not remove the + * previous page from the TLB cache. */ + for(i = DATA_SIZE - 1; i >= 0; i--) { +#ifdef TARGET_WORDS_BIGENDIAN + glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)), + mmu_idx, retaddr); +#else + glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8), + mmu_idx, retaddr); +#endif + } + } else { + /* aligned/unaligned access in the same page */ + addend = env->tlb_table[mmu_idx][index].addend; + glue(glue(st, SUFFIX), _raw)((uint8_t *)(uintptr_t)(addr+addend), val); + } + } else { + /* the page is not in the TLB : fill it */ + tlb_fill(addr, 1, mmu_idx, retaddr); + goto redo; + } +} + +#endif /* !defined(SOFTMMU_CODE_ACCESS) */ + +#ifdef VBOX +# undef DATA_TYPE_PROMOTED +#endif +#undef READ_ACCESS_TYPE +#undef SHIFT +#undef DATA_TYPE +#undef SUFFIX +#undef USUFFIX +#undef DATA_SIZE +#undef ADDR_READ diff --git a/src/recompiler/target-i386/Makefile.kup b/src/recompiler/target-i386/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/target-i386/Makefile.kup diff --git a/src/recompiler/target-i386/TODO b/src/recompiler/target-i386/TODO new file mode 100644 index 00000000..8dfd4633 --- /dev/null +++ b/src/recompiler/target-i386/TODO @@ -0,0 +1,32 @@ +Correctness issues: + +- some eflags manipulation incorrectly reset the bit 0x2. +- SVM: test, cpu save/restore, SMM save/restore. +- x86_64: lcall/ljmp intel/amd differences ? +- better code fetch (different exception handling + CS.limit support) +- user/kernel PUSHL/POPL in helper.c +- add missing cpuid tests +- return UD exception if LOCK prefix incorrectly used +- test ldt limit < 7 ? +- fix some 16 bit sp push/pop overflow (pusha/popa, lcall lret) +- full support of segment limit/rights +- full x87 exception support +- improve x87 bit exactness (use bochs code ?) +- DRx register support +- CR0.AC emulation +- SSE alignment checks +- fix SSE min/max with nans + +Optimizations/Features: + +- add SVM nested paging support +- add VMX support +- add AVX support +- add SSE5 support +- fxsave/fxrstor AMD extensions +- improve monitor/mwait support +- faster EFLAGS update: consider SZAP, C, O can be updated separately + with a bit field in CC_OP and more state variables. +- evaluate x87 stack pointer statically +- find a way to avoid translating several time the same TB if CR0.TS + is set or not. diff --git a/src/recompiler/target-i386/cpu.h b/src/recompiler/target-i386/cpu.h new file mode 100644 index 00000000..c643db8d --- /dev/null +++ b/src/recompiler/target-i386/cpu.h @@ -0,0 +1,1215 @@ +/* + * i386 virtual CPU header + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#ifndef CPU_I386_H +#define CPU_I386_H + +#include "config.h" + +#ifdef TARGET_X86_64 +#define TARGET_LONG_BITS 64 +#else +#define TARGET_LONG_BITS 32 +#endif + +/* target supports implicit self modifying code */ +#define TARGET_HAS_SMC +/* support for self modifying code even if the modified instruction is + close to the modifying instruction */ +#define TARGET_HAS_PRECISE_SMC + +#define TARGET_HAS_ICE 1 + +#ifdef TARGET_X86_64 +#define ELF_MACHINE EM_X86_64 +#else +#define ELF_MACHINE EM_386 +#endif + +#define CPUState struct CPUX86State + +#include "cpu-defs.h" + +#include "softfloat.h" + +#ifdef VBOX +# include <iprt/critsect.h> +# include <iprt/thread.h> +# include <iprt/assert.h> +# include <iprt/asm.h> +# include <VBox/vmm/vmm.h> +# include <VBox/vmm/stam.h> +# include <VBox/vmm/cpumctx.h> +# undef MSR_IA32_APICBASE_BSP +#endif /* VBOX */ + +#define R_EAX 0 +#define R_ECX 1 +#define R_EDX 2 +#define R_EBX 3 +#define R_ESP 4 +#define R_EBP 5 +#define R_ESI 6 +#define R_EDI 7 + +#define R_AL 0 +#define R_CL 1 +#define R_DL 2 +#define R_BL 3 +#define R_AH 4 +#define R_CH 5 +#define R_DH 6 +#define R_BH 7 + +#define R_ES 0 +#define R_CS 1 +#define R_SS 2 +#define R_DS 3 +#define R_FS 4 +#define R_GS 5 + +/* segment descriptor fields */ +#define DESC_G_MASK (1 << 23) +#define DESC_B_SHIFT 22 +#define DESC_B_MASK (1 << DESC_B_SHIFT) +#define DESC_L_SHIFT 21 /* x86_64 only : 64 bit code segment */ +#define DESC_L_MASK (1 << DESC_L_SHIFT) +#define DESC_AVL_MASK (1 << 20) +#define DESC_P_MASK (1 << 15) +#define DESC_DPL_SHIFT 13 +#define DESC_DPL_MASK (3 << DESC_DPL_SHIFT) +#define DESC_S_MASK (1 << 12) +#define DESC_TYPE_SHIFT 8 +#define DESC_TYPE_MASK (15 << DESC_TYPE_SHIFT) +#define DESC_A_MASK (1 << 8) + +#define DESC_CS_MASK (1 << 11) /* 1=code segment 0=data segment */ +#define DESC_C_MASK (1 << 10) /* code: conforming */ +#define DESC_R_MASK (1 << 9) /* code: readable */ + +#define DESC_E_MASK (1 << 10) /* data: expansion direction */ +#define DESC_W_MASK (1 << 9) /* data: writable */ + +#define DESC_TSS_BUSY_MASK (1 << 9) +#ifdef VBOX +# define DESC_INTEL_UNUSABLE RT_BIT_32(16+8) /**< Internal VT-x bit for NULL sectors. */ +# define DESC_RAW_FLAG_BITS UINT32_C(0x00ffffff) /**< Flag bits we load from the descriptor. */ +#endif + +/* eflags masks */ +#define CC_C 0x0001 +#define CC_P 0x0004 +#define CC_A 0x0010 +#define CC_Z 0x0040 +#define CC_S 0x0080 +#define CC_O 0x0800 + +#define TF_SHIFT 8 +#define IOPL_SHIFT 12 +#define VM_SHIFT 17 + +#define TF_MASK 0x00000100 +#define IF_MASK 0x00000200 +#define DF_MASK 0x00000400 +#define IOPL_MASK 0x00003000 +#define NT_MASK 0x00004000 +#define RF_MASK 0x00010000 +#define VM_MASK 0x00020000 +#define AC_MASK 0x00040000 +#define VIF_MASK 0x00080000 +#define VIP_MASK 0x00100000 +#define ID_MASK 0x00200000 + +/* hidden flags - used internally by qemu to represent additional cpu + states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not + redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit + position to ease oring with eflags. */ +/* current cpl */ +#define HF_CPL_SHIFT 0 +/* true if soft mmu is being used */ +#define HF_SOFTMMU_SHIFT 2 +/* true if hardware interrupts must be disabled for next instruction */ +#define HF_INHIBIT_IRQ_SHIFT 3 +/* 16 or 32 segments */ +#define HF_CS32_SHIFT 4 +#define HF_SS32_SHIFT 5 +/* zero base for DS, ES and SS : can be '0' only in 32 bit CS segment */ +#define HF_ADDSEG_SHIFT 6 +/* copy of CR0.PE (protected mode) */ +#define HF_PE_SHIFT 7 +#define HF_TF_SHIFT 8 /* must be same as eflags */ +#define HF_MP_SHIFT 9 /* the order must be MP, EM, TS */ +#define HF_EM_SHIFT 10 +#define HF_TS_SHIFT 11 +#define HF_IOPL_SHIFT 12 /* must be same as eflags */ +#define HF_LMA_SHIFT 14 /* only used on x86_64: long mode active */ +#define HF_CS64_SHIFT 15 /* only used on x86_64: 64 bit code segment */ +#define HF_RF_SHIFT 16 /* must be same as eflags */ +#define HF_VM_SHIFT 17 /* must be same as eflags */ +#define HF_SMM_SHIFT 19 /* CPU in SMM mode */ +#define HF_SVME_SHIFT 20 /* SVME enabled (copy of EFER.SVME) */ +#define HF_SVMI_SHIFT 21 /* SVM intercepts are active */ +#define HF_OSFXSR_SHIFT 22 /* CR4.OSFXSR */ + +#define HF_CPL_MASK (3 << HF_CPL_SHIFT) +#define HF_SOFTMMU_MASK (1 << HF_SOFTMMU_SHIFT) +#define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT) +#define HF_CS32_MASK (1 << HF_CS32_SHIFT) +#define HF_SS32_MASK (1 << HF_SS32_SHIFT) +#define HF_ADDSEG_MASK (1 << HF_ADDSEG_SHIFT) +#define HF_PE_MASK (1 << HF_PE_SHIFT) +#define HF_TF_MASK (1 << HF_TF_SHIFT) +#define HF_MP_MASK (1 << HF_MP_SHIFT) +#define HF_EM_MASK (1 << HF_EM_SHIFT) +#define HF_TS_MASK (1 << HF_TS_SHIFT) +#define HF_IOPL_MASK (3 << HF_IOPL_SHIFT) +#define HF_LMA_MASK (1 << HF_LMA_SHIFT) +#define HF_CS64_MASK (1 << HF_CS64_SHIFT) +#define HF_RF_MASK (1 << HF_RF_SHIFT) +#define HF_VM_MASK (1 << HF_VM_SHIFT) +#define HF_SMM_MASK (1 << HF_SMM_SHIFT) +#define HF_SVME_MASK (1 << HF_SVME_SHIFT) +#define HF_SVMI_MASK (1 << HF_SVMI_SHIFT) +#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT) + +/* hflags2 */ + +#define HF2_GIF_SHIFT 0 /* if set CPU takes interrupts */ +#define HF2_HIF_SHIFT 1 /* value of IF_MASK when entering SVM */ +#define HF2_NMI_SHIFT 2 /* CPU serving NMI */ +#define HF2_VINTR_SHIFT 3 /* value of V_INTR_MASKING bit */ + +#define HF2_GIF_MASK (1 << HF2_GIF_SHIFT) +#define HF2_HIF_MASK (1 << HF2_HIF_SHIFT) +#define HF2_NMI_MASK (1 << HF2_NMI_SHIFT) +#define HF2_VINTR_MASK (1 << HF2_VINTR_SHIFT) + +#define CR0_PE_SHIFT 0 +#define CR0_MP_SHIFT 1 + +#define CR0_PE_MASK (1 << 0) +#define CR0_MP_MASK (1 << 1) +#define CR0_EM_MASK (1 << 2) +#define CR0_TS_MASK (1 << 3) +#define CR0_ET_MASK (1 << 4) +#define CR0_NE_MASK (1 << 5) +#define CR0_WP_MASK (1 << 16) +#define CR0_AM_MASK (1 << 18) +#define CR0_PG_MASK (1U << 31) + +#define CR4_VME_MASK (1 << 0) +#define CR4_PVI_MASK (1 << 1) +#define CR4_TSD_MASK (1 << 2) +#define CR4_DE_MASK (1 << 3) +#define CR4_PSE_MASK (1 << 4) +#define CR4_PAE_MASK (1 << 5) +#define CR4_MCE_MASK (1 << 6) +#define CR4_PGE_MASK (1 << 7) +#define CR4_PCE_MASK (1 << 8) +#define CR4_OSFXSR_SHIFT 9 +#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT) +#define CR4_OSXMMEXCPT_MASK (1 << 10) + +#define DR6_BD (1 << 13) +#define DR6_BS (1 << 14) +#define DR6_BT (1 << 15) +#define DR6_FIXED_1 0xffff0ff0 + +#define DR7_GD (1 << 13) +#define DR7_TYPE_SHIFT 16 +#define DR7_LEN_SHIFT 18 +#define DR7_FIXED_1 0x00000400 + +#define PG_PRESENT_BIT 0 +#define PG_RW_BIT 1 +#define PG_USER_BIT 2 +#define PG_PWT_BIT 3 +#define PG_PCD_BIT 4 +#define PG_ACCESSED_BIT 5 +#define PG_DIRTY_BIT 6 +#define PG_PSE_BIT 7 +#define PG_GLOBAL_BIT 8 +#define PG_NX_BIT 63 + +#define PG_PRESENT_MASK (1 << PG_PRESENT_BIT) +#define PG_RW_MASK (1 << PG_RW_BIT) +#define PG_USER_MASK (1 << PG_USER_BIT) +#define PG_PWT_MASK (1 << PG_PWT_BIT) +#define PG_PCD_MASK (1 << PG_PCD_BIT) +#define PG_ACCESSED_MASK (1 << PG_ACCESSED_BIT) +#define PG_DIRTY_MASK (1 << PG_DIRTY_BIT) +#define PG_PSE_MASK (1 << PG_PSE_BIT) +#define PG_GLOBAL_MASK (1 << PG_GLOBAL_BIT) +#define PG_NX_MASK (1LL << PG_NX_BIT) + +#define PG_ERROR_W_BIT 1 + +#define PG_ERROR_P_MASK 0x01 +#define PG_ERROR_W_MASK (1 << PG_ERROR_W_BIT) +#define PG_ERROR_U_MASK 0x04 +#define PG_ERROR_RSVD_MASK 0x08 +#define PG_ERROR_I_D_MASK 0x10 + +#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ + +#define MCE_CAP_DEF MCG_CTL_P +#define MCE_BANKS_DEF 10 + +#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ + +#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ +#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ + +#define MSR_IA32_TSC 0x10 +#define MSR_IA32_APICBASE 0x1b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_MTRRcap 0xfe +#define MSR_MTRRcap_VCNT 8 +#define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) +#define MSR_MTRRcap_WC_SUPPORTED (1 << 10) + +#define MSR_IA32_SYSENTER_CS 0x174 +#define MSR_IA32_SYSENTER_ESP 0x175 +#define MSR_IA32_SYSENTER_EIP 0x176 + +#define MSR_MCG_CAP 0x179 +#define MSR_MCG_STATUS 0x17a +#define MSR_MCG_CTL 0x17b + +#define MSR_IA32_PERF_STATUS 0x198 + +#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) +#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) + +#define MSR_MTRRfix64K_00000 0x250 +#define MSR_MTRRfix16K_80000 0x258 +#define MSR_MTRRfix16K_A0000 0x259 +#define MSR_MTRRfix4K_C0000 0x268 +#define MSR_MTRRfix4K_C8000 0x269 +#define MSR_MTRRfix4K_D0000 0x26a +#define MSR_MTRRfix4K_D8000 0x26b +#define MSR_MTRRfix4K_E0000 0x26c +#define MSR_MTRRfix4K_E8000 0x26d +#define MSR_MTRRfix4K_F0000 0x26e +#define MSR_MTRRfix4K_F8000 0x26f + +#define MSR_PAT 0x277 + +#define MSR_MTRRdefType 0x2ff + +#define MSR_MC0_CTL 0x400 +#define MSR_MC0_STATUS 0x401 +#define MSR_MC0_ADDR 0x402 +#define MSR_MC0_MISC 0x403 + +#define MSR_EFER 0xc0000080 + +#define MSR_EFER_SCE (1 << 0) +#define MSR_EFER_LME (1 << 8) +#define MSR_EFER_LMA (1 << 10) +#define MSR_EFER_NXE (1 << 11) +#define MSR_EFER_SVME (1 << 12) +#define MSR_EFER_FFXSR (1 << 14) + +#ifdef VBOX +# define MSR_APIC_RANGE_START 0x800 +# define MSR_APIC_RANGE_END 0x900 +#endif + +#define MSR_STAR 0xc0000081 +#define MSR_LSTAR 0xc0000082 +#define MSR_CSTAR 0xc0000083 +#define MSR_FMASK 0xc0000084 +#define MSR_FSBASE 0xc0000100 +#define MSR_GSBASE 0xc0000101 +#define MSR_KERNELGSBASE 0xc0000102 +#define MSR_TSC_AUX 0xc0000103 + +#define MSR_VM_HSAVE_PA 0xc0010117 + +/* cpuid_features bits */ +#define CPUID_FP87 (1 << 0) +#define CPUID_VME (1 << 1) +#define CPUID_DE (1 << 2) +#define CPUID_PSE (1 << 3) +#define CPUID_TSC (1 << 4) +#define CPUID_MSR (1 << 5) +#define CPUID_PAE (1 << 6) +#define CPUID_MCE (1 << 7) +#define CPUID_CX8 (1 << 8) +#define CPUID_APIC (1 << 9) +#define CPUID_SEP (1 << 11) /* sysenter/sysexit */ +#define CPUID_MTRR (1 << 12) +#define CPUID_PGE (1 << 13) +#define CPUID_MCA (1 << 14) +#define CPUID_CMOV (1 << 15) +#define CPUID_PAT (1 << 16) +#define CPUID_PSE36 (1 << 17) +#define CPUID_PN (1 << 18) +#define CPUID_CLFLUSH (1 << 19) +#define CPUID_DTS (1 << 21) +#define CPUID_ACPI (1 << 22) +#define CPUID_MMX (1 << 23) +#define CPUID_FXSR (1 << 24) +#define CPUID_SSE (1 << 25) +#define CPUID_SSE2 (1 << 26) +#define CPUID_SS (1 << 27) +#define CPUID_HT (1 << 28) +#define CPUID_TM (1 << 29) +#define CPUID_IA64 (1 << 30) +#define CPUID_PBE (1 << 31) + +#define CPUID_EXT_SSE3 (1 << 0) +#define CPUID_EXT_DTES64 (1 << 2) +#define CPUID_EXT_MONITOR (1 << 3) +#define CPUID_EXT_DSCPL (1 << 4) +#define CPUID_EXT_VMX (1 << 5) +#define CPUID_EXT_SMX (1 << 6) +#define CPUID_EXT_EST (1 << 7) +#define CPUID_EXT_TM2 (1 << 8) +#define CPUID_EXT_SSSE3 (1 << 9) +#define CPUID_EXT_CID (1 << 10) +#define CPUID_EXT_CX16 (1 << 13) +#define CPUID_EXT_XTPR (1 << 14) +#define CPUID_EXT_PDCM (1 << 15) +#define CPUID_EXT_DCA (1 << 18) +#define CPUID_EXT_SSE41 (1 << 19) +#define CPUID_EXT_SSE42 (1 << 20) +#define CPUID_EXT_X2APIC (1 << 21) +#define CPUID_EXT_MOVBE (1 << 22) +#define CPUID_EXT_POPCNT (1 << 23) +#define CPUID_EXT_XSAVE (1 << 26) +#define CPUID_EXT_OSXSAVE (1 << 27) +#define CPUID_EXT_HYPERVISOR (1 << 31) + +#define CPUID_EXT2_SYSCALL (1 << 11) +#define CPUID_EXT2_MP (1 << 19) +#define CPUID_EXT2_NX (1 << 20) +#define CPUID_EXT2_MMXEXT (1 << 22) +#define CPUID_EXT2_FFXSR (1 << 25) +#define CPUID_EXT2_PDPE1GB (1 << 26) +#define CPUID_EXT2_RDTSCP (1 << 27) +#define CPUID_EXT2_LM (1 << 29) +#define CPUID_EXT2_3DNOWEXT (1 << 30) +#define CPUID_EXT2_3DNOW (1 << 31) + +#define CPUID_EXT3_LAHF_LM (1 << 0) +#define CPUID_EXT3_CMP_LEG (1 << 1) +#define CPUID_EXT3_SVM (1 << 2) +#define CPUID_EXT3_EXTAPIC (1 << 3) +#define CPUID_EXT3_CR8LEG (1 << 4) +#define CPUID_EXT3_ABM (1 << 5) +#define CPUID_EXT3_SSE4A (1 << 6) +#define CPUID_EXT3_MISALIGNSSE (1 << 7) +#define CPUID_EXT3_3DNOWPREFETCH (1 << 8) +#define CPUID_EXT3_OSVW (1 << 9) +#define CPUID_EXT3_IBS (1 << 10) +#define CPUID_EXT3_SKINIT (1 << 12) + +#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */ +#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */ +#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */ + +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ + +#define CPUID_MWAIT_IBE (1 << 1) /* Interrupts can exit capability */ +#define CPUID_MWAIT_EMX (1 << 0) /* enumeration supported */ + +#define EXCP00_DIVZ 0 +#define EXCP01_DB 1 +#define EXCP02_NMI 2 +#define EXCP03_INT3 3 +#define EXCP04_INTO 4 +#define EXCP05_BOUND 5 +#define EXCP06_ILLOP 6 +#define EXCP07_PREX 7 +#define EXCP08_DBLE 8 +#define EXCP09_XERR 9 +#define EXCP0A_TSS 10 +#define EXCP0B_NOSEG 11 +#define EXCP0C_STACK 12 +#define EXCP0D_GPF 13 +#define EXCP0E_PAGE 14 +#define EXCP10_COPR 16 +#define EXCP11_ALGN 17 +#define EXCP12_MCHK 18 + +#define EXCP_SYSCALL 0x100 /* only happens in user only emulation + for syscall instruction */ + +enum { + CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ + CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */ + + CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ + CC_OP_MULW, + CC_OP_MULL, + CC_OP_MULQ, + + CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_ADDW, + CC_OP_ADDL, + CC_OP_ADDQ, + + CC_OP_ADCB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_ADCW, + CC_OP_ADCL, + CC_OP_ADCQ, + + CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_SUBW, + CC_OP_SUBL, + CC_OP_SUBQ, + + CC_OP_SBBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_SBBW, + CC_OP_SBBL, + CC_OP_SBBQ, + + CC_OP_LOGICB, /* modify all flags, CC_DST = res */ + CC_OP_LOGICW, + CC_OP_LOGICL, + CC_OP_LOGICQ, + + CC_OP_INCB, /* modify all flags except, CC_DST = res, CC_SRC = C */ + CC_OP_INCW, + CC_OP_INCL, + CC_OP_INCQ, + + CC_OP_DECB, /* modify all flags except, CC_DST = res, CC_SRC = C */ + CC_OP_DECW, + CC_OP_DECL, + CC_OP_DECQ, + + CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.msb = C */ + CC_OP_SHLW, + CC_OP_SHLL, + CC_OP_SHLQ, + + CC_OP_SARB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */ + CC_OP_SARW, + CC_OP_SARL, + CC_OP_SARQ, + + CC_OP_NB, +}; + +#ifdef FLOATX80 +#define USE_X86LDOUBLE +#endif + +#ifdef USE_X86LDOUBLE +typedef floatx80 CPU86_LDouble; +#else +typedef float64 CPU86_LDouble; +#endif + +typedef struct SegmentCache { + uint32_t selector; +#ifdef VBOX + /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */ + uint16_t newselector; + uint16_t fVBoxFlags; +#endif + target_ulong base; + uint32_t limit; + uint32_t flags; +} SegmentCache; + +typedef union { + uint8_t _b[16]; + uint16_t _w[8]; + uint32_t _l[4]; + uint64_t _q[2]; + float32 _s[4]; + float64 _d[2]; +} XMMReg; + +typedef union { + uint8_t _b[8]; + uint16_t _w[4]; + uint32_t _l[2]; + float32 _s[2]; + uint64_t q; +} MMXReg; + +#ifdef HOST_WORDS_BIGENDIAN +#define XMM_B(n) _b[15 - (n)] +#define XMM_W(n) _w[7 - (n)] +#define XMM_L(n) _l[3 - (n)] +#define XMM_S(n) _s[3 - (n)] +#define XMM_Q(n) _q[1 - (n)] +#define XMM_D(n) _d[1 - (n)] + +#define MMX_B(n) _b[7 - (n)] +#define MMX_W(n) _w[3 - (n)] +#define MMX_L(n) _l[1 - (n)] +#define MMX_S(n) _s[1 - (n)] +#else +#define XMM_B(n) _b[n] +#define XMM_W(n) _w[n] +#define XMM_L(n) _l[n] +#define XMM_S(n) _s[n] +#define XMM_Q(n) _q[n] +#define XMM_D(n) _d[n] + +#define MMX_B(n) _b[n] +#define MMX_W(n) _w[n] +#define MMX_L(n) _l[n] +#define MMX_S(n) _s[n] +#endif +#define MMX_Q(n) q + +typedef union { +#ifdef USE_X86LDOUBLE + CPU86_LDouble d __attribute__((aligned(16))); +#else + CPU86_LDouble d; +#endif + MMXReg mmx; +} FPReg; + +typedef struct { + uint64_t base; + uint64_t mask; +} MTRRVar; + +#define CPU_NB_REGS64 16 +#define CPU_NB_REGS32 8 + +#ifdef TARGET_X86_64 +#define CPU_NB_REGS CPU_NB_REGS64 +#else +#define CPU_NB_REGS CPU_NB_REGS32 +#endif + +#define NB_MMU_MODES 2 + +typedef struct CPUX86State { + /* standard registers */ + target_ulong regs[CPU_NB_REGS]; + target_ulong eip; + target_ulong eflags; /* eflags register. During CPU emulation, CC + flags and DF are set to zero because they are + stored elsewhere */ + + /* emulator internal eflags handling */ + target_ulong cc_src; + target_ulong cc_dst; + uint32_t cc_op; + int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ + uint32_t hflags; /* TB flags, see HF_xxx constants. These flags + are known at translation time. */ + uint32_t hflags2; /* various other flags, see HF2_xxx constants. */ + + /* segments */ + SegmentCache segs[6]; /* selector values */ + SegmentCache ldt; + SegmentCache tr; + SegmentCache gdt; /* only base and limit are used */ + SegmentCache idt; /* only base and limit are used */ + + target_ulong cr[5]; /* NOTE: cr1 is unused */ + int32_t a20_mask; + + /* FPU state */ + unsigned int fpstt; /* top of stack index */ + uint16_t fpus; + uint16_t fpuc; + uint8_t fptags[8]; /* 0 = valid, 1 = empty */ + FPReg fpregs[8]; + + /* emulator internal variables */ + float_status fp_status; +#ifdef VBOX + uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */ +#endif + CPU86_LDouble ft0; +#if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN) + uint32_t alignment4; /* long double is 12 byte, pad it to 16. */ +#endif + + float_status mmx_status; /* for 3DNow! float ops */ + float_status sse_status; + uint32_t mxcsr; + XMMReg xmm_regs[CPU_NB_REGS]; + XMMReg xmm_t0; + MMXReg mmx_t0; + target_ulong cc_tmp; /* temporary for rcr/rcl */ + + /* sysenter registers */ + uint32_t sysenter_cs; +#ifdef VBOX + uint32_t alignment0; +#endif + target_ulong sysenter_esp; + target_ulong sysenter_eip; + uint64_t efer; + uint64_t star; + + uint64_t vm_hsave; + uint64_t vm_vmcb; + uint64_t tsc_offset; + uint64_t intercept; + uint16_t intercept_cr_read; + uint16_t intercept_cr_write; + uint16_t intercept_dr_read; + uint16_t intercept_dr_write; + uint32_t intercept_exceptions; + uint8_t v_tpr; + +#ifdef TARGET_X86_64 + target_ulong lstar; + target_ulong cstar; + target_ulong fmask; + target_ulong kernelgsbase; +#endif + uint64_t system_time_msr; + uint64_t wall_clock_msr; + + uint64_t tsc; + + uint64_t pat; + + /* exception/interrupt handling */ + int error_code; + int exception_is_int; +#ifdef VBOX +# define EXCEPTION_IS_INT_VALUE_HARDWARE_IRQ 0x42 /**< Special CPUX86State::exception_is_int value indicating hardware irq. (HACK ALERT) */ +#endif + target_ulong exception_next_eip; + target_ulong dr[8]; /* debug registers */ + union { + CPUBreakpoint *cpu_breakpoint[4]; + CPUWatchpoint *cpu_watchpoint[4]; + }; /* break/watchpoints for dr[0..3] */ + uint32_t smbase; + int old_exception; /* exception in flight */ + + CPU_COMMON + +#ifdef VBOX + /** cpu state flags. (see defines below) */ + uint32_t state; + /** The VM handle. */ + PVM pVM; + /** The VMCPU handle. */ + PVMCPU pVCpu; + /** code buffer for instruction emulation */ + void *pvCodeBuffer; + /** code buffer size */ + uint32_t cbCodeBuffer; +#endif /* VBOX */ + + /* processor features (e.g. for CPUID insn) */ +#ifndef VBOX /* remR3CpuId deals with these */ + uint32_t cpuid_level; + uint32_t cpuid_vendor1; + uint32_t cpuid_vendor2; + uint32_t cpuid_vendor3; + uint32_t cpuid_version; +#endif /* !VBOX */ + uint32_t cpuid_features; + uint32_t cpuid_ext_features; +#ifndef VBOX + uint32_t cpuid_xlevel; + uint32_t cpuid_model[12]; +#endif /* !VBOX */ + uint32_t cpuid_ext2_features; + uint32_t cpuid_ext3_features; + uint32_t cpuid_apic_id; +#ifndef VBOX + int cpuid_vendor_override; + + /* MTRRs */ + uint64_t mtrr_fixed[11]; + uint64_t mtrr_deftype; + MTRRVar mtrr_var[8]; + + /* For KVM */ + uint32_t mp_state; + int32_t exception_injected; + int32_t interrupt_injected; + uint8_t soft_interrupt; + uint8_t nmi_injected; + uint8_t nmi_pending; + uint8_t has_error_code; + uint32_t sipi_vector; + + uint32_t cpuid_kvm_features; + + /* in order to simplify APIC support, we leave this pointer to the + user */ + struct DeviceState *apic_state; + + uint64 mcg_cap; + uint64 mcg_status; + uint64 mcg_ctl; + uint64 mce_banks[MCE_BANKS_DEF*4]; + + uint64_t tsc_aux; + + /* vmstate */ + uint16_t fpus_vmstate; + uint16_t fptag_vmstate; + uint16_t fpregs_format_vmstate; + + uint64_t xstate_bv; + XMMReg ymmh_regs[CPU_NB_REGS]; + + uint64_t xcr0; +#else /* VBOX */ + + /** Alignment padding. */ +# if HC_ARCH_BITS == 64 \ + || ( HC_ARCH_BITS == 32 \ + && !defined(RT_OS_WINDOWS) \ + && ( (!defined(VBOX_ENABLE_VBOXREM64) && !defined(RT_OS_SOLARIS) && !defined(RT_OS_FREEBSD)) \ + || (defined(VBOX_ENABLE_VBOXREM64) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))) ) ) + uint32_t alignment2[1]; +# endif + + /** Profiling tb_flush. */ + STAMPROFILE StatTbFlush; + + /** Addends for HVA -> GPA translations. */ + target_phys_addr_t phys_addends[NB_MMU_MODES][CPU_TLB_SIZE]; +#endif /* VBOX */ +} CPUX86State; + +#ifdef VBOX + +/* Version 1.6 structure; just for loading the old saved state */ +typedef struct SegmentCache_Ver16 { + uint32_t selector; + uint32_t base; + uint32_t limit; + uint32_t flags; + /** The new selector is saved here when we are unable to sync it before invoking the recompiled code. */ + uint32_t newselector; +} SegmentCache_Ver16; + +# define CPU_NB_REGS_VER16 8 + +/* Version 1.6 structure; just for loading the old saved state */ +typedef struct CPUX86State_Ver16 { +# if TARGET_LONG_BITS > HOST_LONG_BITS + /* temporaries if we cannot store them in host registers */ + uint32_t t0, t1, t2; +# endif + + /* standard registers */ + uint32_t regs[CPU_NB_REGS_VER16]; + uint32_t eip; + uint32_t eflags; /* eflags register. During CPU emulation, CC + flags and DF are set to zero because they are + stored elsewhere */ + + /* emulator internal eflags handling */ + uint32_t cc_src; + uint32_t cc_dst; + uint32_t cc_op; + int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ + uint32_t hflags; /* hidden flags, see HF_xxx constants */ + + /* segments */ + SegmentCache_Ver16 segs[6]; /* selector values */ + SegmentCache_Ver16 ldt; + SegmentCache_Ver16 tr; + SegmentCache_Ver16 gdt; /* only base and limit are used */ + SegmentCache_Ver16 idt; /* only base and limit are used */ + + uint32_t cr[5]; /* NOTE: cr1 is unused */ + uint32_t a20_mask; + + /* FPU state */ + unsigned int fpstt; /* top of stack index */ + unsigned int fpus; + unsigned int fpuc; + uint8_t fptags[8]; /* 0 = valid, 1 = empty */ + union { +# ifdef USE_X86LDOUBLE + CPU86_LDouble d __attribute__((aligned(16))); +# else + CPU86_LDouble d; +# endif + MMXReg mmx; + } fpregs[8]; + + /* emulator internal variables */ + float_status fp_status; +# ifdef VBOX + uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */ +# endif + CPU86_LDouble ft0; +# if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN) + uint32_t alignment4; /* long double is 12 byte, pad it to 16. */ +# endif + union { + float f; + double d; + int i32; + int64_t i64; + } fp_convert; + + float_status sse_status; + uint32_t mxcsr; + XMMReg xmm_regs[CPU_NB_REGS_VER16]; + XMMReg xmm_t0; + MMXReg mmx_t0; + + /* sysenter registers */ + uint32_t sysenter_cs; + uint32_t sysenter_esp; + uint32_t sysenter_eip; +# ifdef VBOX + uint32_t alignment0; +# endif + uint64_t efer; + uint64_t star; + + uint64_t pat; + + /* temporary data for USE_CODE_COPY mode */ +# ifdef USE_CODE_COPY + uint32_t tmp0; + uint32_t saved_esp; + int native_fp_regs; /* if true, the FPU state is in the native CPU regs */ +# endif + + /* exception/interrupt handling */ + jmp_buf jmp_env; +} CPUX86State_Ver16; + +/** CPUX86State state flags + * @{ */ +# define CPU_RAW_RING0 0x0002 /* Set after first time RawR0 is executed, never cleared. */ +# define CPU_EMULATE_SINGLE_INSTR 0x0040 /* Execute a single instruction in emulation mode */ +# define CPU_EMULATE_SINGLE_STEP 0x0080 /* go into single step mode */ +# define CPU_RAW_HM 0x0100 /* Set after first time HWACC is executed, never cleared. */ +/** @} */ +#endif /* !VBOX */ + +#ifdef VBOX +CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model); +#else /* !VBOX */ +CPUX86State *cpu_x86_init(const char *cpu_model); +#endif /* !VBOX */ +int cpu_x86_exec(CPUX86State *s); +void cpu_x86_close(CPUX86State *s); +void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + const char *optarg); +void x86_cpudef_setup(void); + +int cpu_get_pic_interrupt(CPUX86State *s); +/* MSDOS compatibility mode FPU exception support */ +void cpu_set_ferr(CPUX86State *s); + +/* this function must always be used to load data in the segment + cache: it synchronizes the hflags with the segment cache values */ +#ifndef VBOX +static inline void cpu_x86_load_seg_cache(CPUX86State *env, + int seg_reg, unsigned int selector, + target_ulong base, + unsigned int limit, + unsigned int flags) +#else +static inline void cpu_x86_load_seg_cache_with_clean_flags(CPUX86State *env, + int seg_reg, unsigned int selector, + target_ulong base, + unsigned int limit, + unsigned int flags) +#endif +{ + SegmentCache *sc; + unsigned int new_hflags; + + sc = &env->segs[seg_reg]; + sc->selector = selector; + sc->base = base; + sc->limit = limit; + sc->flags = flags; +#ifdef VBOX + sc->newselector = 0; + sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID; +#endif + + /* update the hidden flags */ + { + if (seg_reg == R_CS) { +#ifdef TARGET_X86_64 + if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) { + /* long mode */ + env->hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; + env->hflags &= ~(HF_ADDSEG_MASK); + } else +#endif + { + /* legacy / compatibility case */ + new_hflags = (env->segs[R_CS].flags & DESC_B_MASK) + >> (DESC_B_SHIFT - HF_CS32_SHIFT); + env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) | + new_hflags; + } + } + new_hflags = (env->segs[R_SS].flags & DESC_B_MASK) + >> (DESC_B_SHIFT - HF_SS32_SHIFT); + if (env->hflags & HF_CS64_MASK) { + /* zero base assumed for DS, ES and SS in long mode */ + } else if (!(env->cr[0] & CR0_PE_MASK) || + (env->eflags & VM_MASK) || + !(env->hflags & HF_CS32_MASK)) { + /* XXX: try to avoid this test. The problem comes from the + fact that is real mode or vm86 mode we only modify the + 'base' and 'selector' fields of the segment cache to go + faster. A solution may be to force addseg to one in + translate-i386.c. */ + new_hflags |= HF_ADDSEG_MASK; + } else { + new_hflags |= ((env->segs[R_DS].base | + env->segs[R_ES].base | + env->segs[R_SS].base) != 0) << + HF_ADDSEG_SHIFT; + } + env->hflags = (env->hflags & + ~(HF_SS32_MASK | HF_ADDSEG_MASK)) | new_hflags; + } +} + +#ifdef VBOX +/* Raw input, adjust the flags adding the stupid intel flag when applicable. */ +static inline void cpu_x86_load_seg_cache(CPUX86State *env, + int seg_reg, unsigned int selector, + target_ulong base, + unsigned int limit, + unsigned int flags) +{ + flags &= DESC_RAW_FLAG_BITS; + if (flags & DESC_P_MASK) + flags |= DESC_A_MASK; /* Make sure the A bit is set to avoid trouble. */ + else if (selector < 4U) + flags |= DESC_INTEL_UNUSABLE; + cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, base, limit, flags); +} +#endif + +static inline void cpu_x86_load_seg_cache_sipi(CPUX86State *env, + int sipi_vector) +{ + env->eip = 0; + cpu_x86_load_seg_cache(env, R_CS, sipi_vector << 8, + sipi_vector << 12, + env->segs[R_CS].limit, + env->segs[R_CS].flags); + env->halted = 0; +} + +int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, + target_ulong *base, unsigned int *limit, + unsigned int *flags); + +/* wrapper, just in case memory mappings must be changed */ +static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl) +{ +#if HF_CPL_MASK == 3 + s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl; +#else +#error HF_CPL_MASK is hardcoded +#endif +} + +/* op_helper.c */ +/* used for debug or cpu save/restore */ +void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f); +CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper); + +/* cpu-exec.c */ +/* the following helpers are only usable in user mode simulation as + they can trigger unexpected exceptions */ +void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector); +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); + +/* you can call this signal handler from your SIGBUS and SIGSEGV + signal handlers to inform the virtual CPU of exceptions. non zero + is returned if the signal was handled by the virtual CPU. */ +int cpu_x86_signal_handler(int host_signum, void *pinfo, + void *puc); + +/* cpuid.c */ +void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx); +int cpu_x86_register (CPUX86State *env, const char *cpu_model); +void cpu_clear_apic_feature(CPUX86State *env); + +/* helper.c */ +int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, + int is_write, int mmu_idx, int is_softmmu); +#define cpu_handle_mmu_fault cpu_x86_handle_mmu_fault +void cpu_x86_set_a20(CPUX86State *env, int a20_state); + +static inline int hw_breakpoint_enabled(unsigned long dr7, int index) +{ + return (dr7 >> (index * 2)) & 3; +} + +static inline int hw_breakpoint_type(unsigned long dr7, int index) +{ + return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3; +} + +static inline int hw_breakpoint_len(unsigned long dr7, int index) +{ + int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3); + return (len == 2) ? 8 : len + 1; +} + +void hw_breakpoint_insert(CPUX86State *env, int index); +void hw_breakpoint_remove(CPUX86State *env, int index); +int check_hw_breakpoints(CPUX86State *env, int force_dr6_update); + +/* will be suppressed */ +void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0); +void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3); +void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4); + +/* hw/pc.c */ +void cpu_smm_update(CPUX86State *env); +uint64_t cpu_get_tsc(CPUX86State *env); + +/* used to debug */ +#define X86_DUMP_FPU 0x0001 /* dump FPU state too */ +#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */ + +#ifdef VBOX +int cpu_rdmsr(CPUX86State *env, uint32_t idMsr, uint64_t *puValue); +int cpu_wrmsr(CPUX86State *env, uint32_t idMsr, uint64_t uValue); +void cpu_trap_raw(CPUX86State *env1); + +/* in helper.c */ +uint8_t read_byte(CPUX86State *env1, target_ulong addr); +uint16_t read_word(CPUX86State *env1, target_ulong addr); +void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val); +uint32_t read_dword(CPUX86State *env1, target_ulong addr); +void write_word(CPUX86State *env1, target_ulong addr, uint16_t val); +void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val); +/* in helper.c */ +int emulate_single_instr(CPUX86State *env1); +int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr, uint32_t *esp_ptr, int dpl); + +void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr); +void save_raw_fp_state(CPUX86State *env, uint8_t *ptr); +#endif /* VBOX */ + +#define TARGET_PAGE_BITS 12 + +#ifdef TARGET_X86_64 +#define TARGET_PHYS_ADDR_SPACE_BITS 52 +/* ??? This is really 48 bits, sign-extended, but the only thing + accessible to userland with bit 48 set is the VSYSCALL, and that + is handled via other mechanisms. */ +#define TARGET_VIRT_ADDR_SPACE_BITS 47 +#else +#define TARGET_PHYS_ADDR_SPACE_BITS 36 +#define TARGET_VIRT_ADDR_SPACE_BITS 32 +#endif + +#define cpu_init cpu_x86_init +#define cpu_exec cpu_x86_exec +#define cpu_gen_code cpu_x86_gen_code +#define cpu_signal_handler cpu_x86_signal_handler +#define cpu_list_id x86_cpu_list +#define cpudef_setup x86_cpudef_setup + +#define CPU_SAVE_VERSION 12 + +/* MMU modes definitions */ +#define MMU_MODE0_SUFFIX _kernel +#define MMU_MODE1_SUFFIX _user +#define MMU_USER_IDX 1 +static inline int cpu_mmu_index (CPUState *env) +{ + return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0; +} + +/* translate.c */ +void optimize_flags_init(void); + +typedef struct CCTable { + int (*compute_all)(void); /* return all the flags */ + int (*compute_c)(void); /* return the C flag */ +} CCTable; + +#if defined(CONFIG_USER_ONLY) +static inline void cpu_clone_regs(CPUState *env, target_ulong newsp) +{ + if (newsp) + env->regs[R_ESP] = newsp; + env->regs[R_EAX] = 0; +} +#endif + +#include "cpu-all.h" +#include "svm.h" + +#ifndef VBOX +#if !defined(CONFIG_USER_ONLY) +#include "hw/apic.h" +#endif +#else /* VBOX */ +extern void cpu_set_apic_tpr(CPUX86State *env, uint8_t val); +extern uint8_t cpu_get_apic_tpr(CPUX86State *env); +extern uint64_t cpu_get_apic_base(CPUX86State *env); +#endif /* VBOX */ + +static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, + target_ulong *cs_base, int *flags) +{ + *cs_base = env->segs[R_CS].base; + if (env->hflags & HF_CS64_MASK) + *pc = *cs_base + env->eip; + else + *pc = (uint32_t)(*cs_base + env->eip); + *flags = env->hflags | + (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK)); +} + +#ifndef VBOX +void apic_init_reset(CPUState *env); +void apic_sipi(CPUState *env); +void do_cpu_init(CPUState *env); +void do_cpu_sipi(CPUState *env); +#endif /* !VBOX */ +#endif /* CPU_I386_H */ diff --git a/src/recompiler/target-i386/exec.h b/src/recompiler/target-i386/exec.h new file mode 100644 index 00000000..355599fa --- /dev/null +++ b/src/recompiler/target-i386/exec.h @@ -0,0 +1,370 @@ +/* + * i386 execution defines + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include "config.h" +#include "dyngen-exec.h" + +/* XXX: factorize this mess */ +#ifdef TARGET_X86_64 +#define TARGET_LONG_BITS 64 +#else +#define TARGET_LONG_BITS 32 +#endif + +#include "cpu-defs.h" + +register struct CPUX86State *env asm(AREG0); + +#include "qemu-common.h" +#include "qemu-log.h" + +#undef EAX +#define EAX (env->regs[R_EAX]) +#undef ECX +#define ECX (env->regs[R_ECX]) +#undef EDX +#define EDX (env->regs[R_EDX]) +#undef EBX +#define EBX (env->regs[R_EBX]) +#undef ESP +#define ESP (env->regs[R_ESP]) +#undef EBP +#define EBP (env->regs[R_EBP]) +#undef ESI +#define ESI (env->regs[R_ESI]) +#undef EDI +#define EDI (env->regs[R_EDI]) +#undef EIP +#define EIP (env->eip) +#define DF (env->df) + +#define CC_SRC (env->cc_src) +#define CC_DST (env->cc_dst) +#define CC_OP (env->cc_op) + +/* float macros */ +#define FT0 (env->ft0) +#define ST0 (env->fpregs[env->fpstt].d) +#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) +#define ST1 ST(1) + +#include "cpu.h" +#include "exec-all.h" + +/* op_helper.c */ +void do_interrupt(int intno, int is_int, int error_code, + target_ulong next_eip, int is_hw); +void do_interrupt_user(int intno, int is_int, int error_code, + target_ulong next_eip); +void QEMU_NORETURN raise_exception_err(int exception_index, int error_code); +void QEMU_NORETURN raise_exception(int exception_index); +void QEMU_NORETURN raise_exception_env(int exception_index, CPUState *nenv); +void do_smm_enter(void); + +/* n must be a constant to be efficient */ +static inline target_long lshift(target_long x, int n) +{ + if (n >= 0) + return x << n; + else + return x >> (-n); +} + +#include "helper.h" + +static inline void svm_check_intercept(uint32_t type) +{ + helper_svm_check_intercept_param(type, 0); +} + +#if !defined(CONFIG_USER_ONLY) + +#include "softmmu_exec.h" + +#endif /* !defined(CONFIG_USER_ONLY) */ + +#ifdef USE_X86LDOUBLE +/* use long double functions */ +#define floatx_to_int32 floatx80_to_int32 +#define floatx_to_int64 floatx80_to_int64 +#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero +#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero +#define int32_to_floatx int32_to_floatx80 +#define int64_to_floatx int64_to_floatx80 +#define float32_to_floatx float32_to_floatx80 +#define float64_to_floatx float64_to_floatx80 +#define floatx_to_float32 floatx80_to_float32 +#define floatx_to_float64 floatx80_to_float64 +#define floatx_abs floatx80_abs +#define floatx_chs floatx80_chs +#define floatx_round_to_int floatx80_round_to_int +#define floatx_compare floatx80_compare +#define floatx_compare_quiet floatx80_compare_quiet +#else +#define floatx_to_int32 float64_to_int32 +#define floatx_to_int64 float64_to_int64 +#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero +#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero +#define int32_to_floatx int32_to_float64 +#define int64_to_floatx int64_to_float64 +#define float32_to_floatx float32_to_float64 +#define float64_to_floatx(x, e) (x) +#define floatx_to_float32 float64_to_float32 +#define floatx_to_float64(x, e) (x) +#define floatx_abs float64_abs +#define floatx_chs float64_chs +#define floatx_round_to_int float64_round_to_int +#define floatx_compare float64_compare +#define floatx_compare_quiet float64_compare_quiet +#endif + +#ifdef VBOX +# ifdef IPRT_NO_CRT +# undef sin +# undef cos +# undef sqrt +# undef pow +# undef log +# undef tan +# undef atan2 +# undef floor +# undef ceil +# undef ldexp +# define sin sinl +# define cos cosl +# define sqrt sqrtl +# define pow powl +# define log logl +# define tan tanl +# define atan2 atan2l +# define floor floorl +# define ceil ceill +# define ldexp ldexpl +# endif +#endif + +#define RC_MASK 0xc00 +#define RC_NEAR 0x000 +#define RC_DOWN 0x400 +#define RC_UP 0x800 +#define RC_CHOP 0xc00 + +#define MAXTAN 9223372036854775808.0 + +#ifdef USE_X86LDOUBLE + +/* only for x86 */ +typedef union { + long double d; + struct { + unsigned long long lower; + unsigned short upper; + } l; +} CPU86_LDoubleU; + +/* the following deal with x86 long double-precision numbers */ +#define MAXEXPD 0x7fff +#define EXPBIAS 16383 +#define EXPD(fp) (fp.l.upper & 0x7fff) +#define SIGND(fp) ((fp.l.upper) & 0x8000) +#define MANTD(fp) (fp.l.lower) +#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS + +#else + +/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */ +typedef union { + double d; +#if !defined(HOST_WORDS_BIGENDIAN) && !defined(__arm__) + struct { + uint32_t lower; + int32_t upper; + } l; +#else + struct { + int32_t upper; + uint32_t lower; + } l; +#endif +#ifndef __arm__ + int64_t ll; +#endif +} CPU86_LDoubleU; + +/* the following deal with IEEE double-precision numbers */ +#define MAXEXPD 0x7ff +#define EXPBIAS 1023 +#define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF) +#define SIGND(fp) ((fp.l.upper) & 0x80000000) +#ifdef __arm__ +#define MANTD(fp) (fp.l.lower | ((uint64_t)(fp.l.upper & ((1 << 20) - 1)) << 32)) +#else +#define MANTD(fp) (fp.ll & ((1LL << 52) - 1)) +#endif +#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7ff << 20)) | (EXPBIAS << 20) +#endif + +static inline void fpush(void) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fptags[env->fpstt] = 0; /* validate stack entry */ +} + +static inline void fpop(void) +{ + env->fptags[env->fpstt] = 1; /* invvalidate stack entry */ + env->fpstt = (env->fpstt + 1) & 7; +} + +#ifndef USE_X86LDOUBLE +static inline CPU86_LDouble helper_fldt(target_ulong ptr) +{ + CPU86_LDoubleU temp; + int upper, e; + uint64_t ll; + + /* mantissa */ + upper = lduw(ptr + 8); + /* XXX: handle overflow ? */ + e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */ + e |= (upper >> 4) & 0x800; /* sign */ + ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1); +#ifdef __arm__ + temp.l.upper = (e << 20) | (ll >> 32); + temp.l.lower = ll; +#else + temp.ll = ll | ((uint64_t)e << 52); +#endif + return temp.d; +} + +static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr) +{ + CPU86_LDoubleU temp; + int e; + + temp.d = f; + /* mantissa */ + stq(ptr, (MANTD(temp) << 11) | (1LL << 63)); + /* exponent + sign */ + e = EXPD(temp) - EXPBIAS + 16383; + e |= SIGND(temp) >> 16; + stw(ptr + 8, e); +} +#else + +/* we use memory access macros */ + +static inline CPU86_LDouble helper_fldt(target_ulong ptr) +{ + CPU86_LDoubleU temp; + + temp.l.lower = ldq(ptr); + temp.l.upper = lduw(ptr + 8); + return temp.d; +} + +static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr) +{ + CPU86_LDoubleU temp; + + temp.d = f; + stq(ptr, temp.l.lower); + stw(ptr + 8, temp.l.upper); +} + +#endif /* USE_X86LDOUBLE */ + +#define FPUS_IE (1 << 0) +#define FPUS_DE (1 << 1) +#define FPUS_ZE (1 << 2) +#define FPUS_OE (1 << 3) +#define FPUS_UE (1 << 4) +#define FPUS_PE (1 << 5) +#define FPUS_SF (1 << 6) +#define FPUS_SE (1 << 7) +#define FPUS_B (1 << 15) + +#define FPUC_EM 0x3f + +static inline uint32_t compute_eflags(void) +{ + return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK); +} + +/* NOTE: CC_OP must be modified manually to CC_OP_EFLAGS */ +static inline void load_eflags(int eflags, int update_mask) +{ + CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + DF = 1 - (2 * ((eflags >> 10) & 1)); + env->eflags = (env->eflags & ~update_mask) | + (eflags & update_mask) | 0x2; +} + +static inline int cpu_has_work(CPUState *env) +{ + int work; + + work = (env->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK); + work |= env->interrupt_request & CPU_INTERRUPT_NMI; + work |= env->interrupt_request & CPU_INTERRUPT_INIT; + work |= env->interrupt_request & CPU_INTERRUPT_SIPI; + + return work; +} + +static inline int cpu_halted(CPUState *env) { + /* handle exit of HALTED state */ + if (!env->halted) + return 0; + /* disable halt condition */ + if (cpu_has_work(env)) { + env->halted = 0; + return 0; + } + return EXCP_HALTED; +} + +/* load efer and update the corresponding hflags. XXX: do consistency + checks with cpuid bits ? */ +static inline void cpu_load_efer(CPUState *env, uint64_t val) +{ + env->efer = val; + env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK); + if (env->efer & MSR_EFER_LMA) + env->hflags |= HF_LMA_MASK; + if (env->efer & MSR_EFER_SVME) + env->hflags |= HF_SVME_MASK; +} + +static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) +{ + env->eip = tb->pc - tb->cs_base; +} + diff --git a/src/recompiler/target-i386/helper.c b/src/recompiler/target-i386/helper.c new file mode 100644 index 00000000..a6f37ff6 --- /dev/null +++ b/src/recompiler/target-i386/helper.c @@ -0,0 +1,1227 @@ +/* + * i386 helpers (without register variable usage) + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#ifndef VBOX +#include <inttypes.h> +#include <signal.h> +#endif /* !VBOX */ + +#include "cpu.h" +#include "exec-all.h" +#include "qemu-common.h" +#include "kvm.h" + +//#define DEBUG_MMU + +/* NOTE: must be called outside the CPU execute loop */ +void cpu_reset(CPUX86State *env) +{ + int i; + + if (qemu_loglevel_mask(CPU_LOG_RESET)) { + qemu_log("CPU Reset (CPU %d)\n", env->cpu_index); + log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP); + } + + memset(env, 0, offsetof(CPUX86State, breakpoints)); + + tlb_flush(env, 1); + + env->old_exception = -1; + + /* init to reset state */ + +#ifdef CONFIG_SOFTMMU + env->hflags |= HF_SOFTMMU_MASK; +#endif + env->hflags2 |= HF2_GIF_MASK; + + cpu_x86_update_cr0(env, 0x60000010); + env->a20_mask = ~0x0; + env->smbase = 0x30000; + + env->idt.limit = 0xffff; + env->gdt.limit = 0xffff; + env->ldt.limit = 0xffff; + env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT); + env->tr.limit = 0xffff; + env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT); + + cpu_x86_load_seg_cache(env, R_CS, 0xf000, 0xffff0000, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK); + + env->eip = 0xfff0; +#ifndef VBOX /* We'll get the right value from CPUM. */ + env->regs[R_EDX] = env->cpuid_version; +#endif + + env->eflags = 0x2; + + /* FPU init */ + for(i = 0;i < 8; i++) + env->fptags[i] = 1; + env->fpuc = 0x37f; + + env->mxcsr = 0x1f80; + + memset(env->dr, 0, sizeof(env->dr)); + env->dr[6] = DR6_FIXED_1; + env->dr[7] = DR7_FIXED_1; + cpu_breakpoint_remove_all(env, BP_CPU); + cpu_watchpoint_remove_all(env, BP_CPU); + +#ifndef VBOX + env->mcg_status = 0; +#endif +} + +void cpu_x86_close(CPUX86State *env) +{ +#ifndef VBOX + qemu_free(env); +#endif +} + +/***********************************************************/ +/* x86 debug */ + +static const char *cc_op_str[] = { + "DYNAMIC", + "EFLAGS", + + "MULB", + "MULW", + "MULL", + "MULQ", + + "ADDB", + "ADDW", + "ADDL", + "ADDQ", + + "ADCB", + "ADCW", + "ADCL", + "ADCQ", + + "SUBB", + "SUBW", + "SUBL", + "SUBQ", + + "SBBB", + "SBBW", + "SBBL", + "SBBQ", + + "LOGICB", + "LOGICW", + "LOGICL", + "LOGICQ", + + "INCB", + "INCW", + "INCL", + "INCQ", + + "DECB", + "DECW", + "DECL", + "DECQ", + + "SHLB", + "SHLW", + "SHLL", + "SHLQ", + + "SARB", + "SARW", + "SARL", + "SARQ", +}; + +static void +cpu_x86_dump_seg_cache(CPUState *env, FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + const char *name, struct SegmentCache *sc) +{ +#ifdef VBOX +# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__) +#endif +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name, + sc->selector, sc->base, sc->limit, sc->flags); + } else +#endif + { + cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector, + (uint32_t)sc->base, sc->limit, sc->flags); + } + + if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK)) + goto done; + + cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT); + if (sc->flags & DESC_S_MASK) { + if (sc->flags & DESC_CS_MASK) { + cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" : + ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16")); + cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-', + (sc->flags & DESC_R_MASK) ? 'R' : '-'); + } else { + cpu_fprintf(f, (sc->flags & DESC_B_MASK) ? "DS " : "DS16"); + cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-', + (sc->flags & DESC_W_MASK) ? 'W' : '-'); + } + cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-'); + } else { + static const char *sys_type_name[2][16] = { + { /* 32 bit mode */ + "Reserved", "TSS16-avl", "LDT", "TSS16-busy", + "CallGate16", "TaskGate", "IntGate16", "TrapGate16", + "Reserved", "TSS32-avl", "Reserved", "TSS32-busy", + "CallGate32", "Reserved", "IntGate32", "TrapGate32" + }, + { /* 64 bit mode */ + "<hiword>", "Reserved", "LDT", "Reserved", "Reserved", + "Reserved", "Reserved", "Reserved", "Reserved", + "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64", + "Reserved", "IntGate64", "TrapGate64" + } + }; + cpu_fprintf(f, "%s", + sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0] + [(sc->flags & DESC_TYPE_MASK) + >> DESC_TYPE_SHIFT]); + } +done: + cpu_fprintf(f, "\n"); +#ifdef VBOX +# undef cpu_fprintf +#endif +} + +void cpu_dump_state(CPUState *env, FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...), + int flags) +{ + int eflags, i, nb; + char cc_op_name[32]; + static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" }; + +#ifdef VBOX +# define cpu_fprintf(f, ...) RTLogPrintf(__VA_ARGS__) +#endif + cpu_synchronize_state(env); + + eflags = env->eflags; +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + cpu_fprintf(f, + "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n" + "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n" + "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n" + "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n" + "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", + env->regs[R_EAX], + env->regs[R_EBX], + env->regs[R_ECX], + env->regs[R_EDX], + env->regs[R_ESI], + env->regs[R_EDI], + env->regs[R_EBP], + env->regs[R_ESP], + env->regs[8], + env->regs[9], + env->regs[10], + env->regs[11], + env->regs[12], + env->regs[13], + env->regs[14], + env->regs[15], + env->eip, eflags, + eflags & DF_MASK ? 'D' : '-', + eflags & CC_O ? 'O' : '-', + eflags & CC_S ? 'S' : '-', + eflags & CC_Z ? 'Z' : '-', + eflags & CC_A ? 'A' : '-', + eflags & CC_P ? 'P' : '-', + eflags & CC_C ? 'C' : '-', + env->hflags & HF_CPL_MASK, + (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, + (env->a20_mask >> 20) & 1, + (env->hflags >> HF_SMM_SHIFT) & 1, + env->halted); + } else +#endif + { + cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n" + "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n" + "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", + (uint32_t)env->regs[R_EAX], + (uint32_t)env->regs[R_EBX], + (uint32_t)env->regs[R_ECX], + (uint32_t)env->regs[R_EDX], + (uint32_t)env->regs[R_ESI], + (uint32_t)env->regs[R_EDI], + (uint32_t)env->regs[R_EBP], + (uint32_t)env->regs[R_ESP], + (uint32_t)env->eip, eflags, + eflags & DF_MASK ? 'D' : '-', + eflags & CC_O ? 'O' : '-', + eflags & CC_S ? 'S' : '-', + eflags & CC_Z ? 'Z' : '-', + eflags & CC_A ? 'A' : '-', + eflags & CC_P ? 'P' : '-', + eflags & CC_C ? 'C' : '-', + env->hflags & HF_CPL_MASK, + (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, + (env->a20_mask >> 20) & 1, + (env->hflags >> HF_SMM_SHIFT) & 1, + env->halted); + } + + for(i = 0; i < 6; i++) { + cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i], + &env->segs[i]); + } + cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt); + cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr); + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + cpu_fprintf(f, "GDT= %016" PRIx64 " %08x\n", + env->gdt.base, env->gdt.limit); + cpu_fprintf(f, "IDT= %016" PRIx64 " %08x\n", + env->idt.base, env->idt.limit); + cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n", + (uint32_t)env->cr[0], + env->cr[2], + env->cr[3], + (uint32_t)env->cr[4]); + for(i = 0; i < 4; i++) + cpu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]); + cpu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n", + env->dr[6], env->dr[7]); + } else +#endif + { + cpu_fprintf(f, "GDT= %08x %08x\n", + (uint32_t)env->gdt.base, env->gdt.limit); + cpu_fprintf(f, "IDT= %08x %08x\n", + (uint32_t)env->idt.base, env->idt.limit); + cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n", + (uint32_t)env->cr[0], + (uint32_t)env->cr[2], + (uint32_t)env->cr[3], + (uint32_t)env->cr[4]); + for(i = 0; i < 4; i++) + cpu_fprintf(f, "DR%d=%08x ", i, env->dr[i]); + cpu_fprintf(f, "\nDR6=%08x DR7=%08x\n", env->dr[6], env->dr[7]); + } + if (flags & X86_DUMP_CCOP) { + if ((unsigned)env->cc_op < CC_OP_NB) + snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]); + else + snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op); +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n", + env->cc_src, env->cc_dst, + cc_op_name); + } else +#endif + { + cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n", + (uint32_t)env->cc_src, (uint32_t)env->cc_dst, + cc_op_name); + } + } + cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer); + if (flags & X86_DUMP_FPU) { + int fptag; + fptag = 0; + for(i = 0; i < 8; i++) { + fptag |= ((!env->fptags[i]) << i); + } + cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n", + env->fpuc, + (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11, + env->fpstt, + fptag, + env->mxcsr); + for(i=0;i<8;i++) { +#if defined(USE_X86LDOUBLE) + union { + long double d; + struct { + uint64_t lower; + uint16_t upper; + } l; + } tmp; + tmp.d = env->fpregs[i].d; + cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x", + i, tmp.l.lower, tmp.l.upper); +#else + cpu_fprintf(f, "FPR%d=%016" PRIx64, + i, env->fpregs[i].mmx.q); +#endif + if ((i & 1) == 1) + cpu_fprintf(f, "\n"); + else + cpu_fprintf(f, " "); + } + if (env->hflags & HF_CS64_MASK) + nb = 16; + else + nb = 8; + for(i=0;i<nb;i++) { + cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x", + i, + env->xmm_regs[i].XMM_L(3), + env->xmm_regs[i].XMM_L(2), + env->xmm_regs[i].XMM_L(1), + env->xmm_regs[i].XMM_L(0)); + if ((i & 1) == 1) + cpu_fprintf(f, "\n"); + else + cpu_fprintf(f, " "); + } + } +#ifdef VBOX +# undef cpu_fprintf +#endif +} + +/***********************************************************/ +/* x86 mmu */ +/* XXX: add PGE support */ + +void cpu_x86_set_a20(CPUX86State *env, int a20_state) +{ + a20_state = (a20_state != 0); + if (a20_state != ((env->a20_mask >> 20) & 1)) { +#if defined(DEBUG_MMU) + printf("A20 update: a20=%d\n", a20_state); +#endif + /* if the cpu is currently executing code, we must unlink it and + all the potentially executing TB */ + cpu_interrupt(env, CPU_INTERRUPT_EXITTB); + + /* when a20 is changed, all the MMU mappings are invalid, so + we must flush everything */ + tlb_flush(env, 1); + env->a20_mask = ~(1 << 20) | (a20_state << 20); + } +} + +void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0) +{ + int pe_state; + +#if defined(DEBUG_MMU) + printf("CR0 update: CR0=0x%08x\n", new_cr0); +#endif + if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) != + (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) { + tlb_flush(env, 1); + } + +#ifdef TARGET_X86_64 + if (!(env->cr[0] & CR0_PG_MASK) && (new_cr0 & CR0_PG_MASK) && + (env->efer & MSR_EFER_LME)) { + /* enter in long mode */ + /* XXX: generate an exception */ + if (!(env->cr[4] & CR4_PAE_MASK)) + return; + env->efer |= MSR_EFER_LMA; + env->hflags |= HF_LMA_MASK; + } else if ((env->cr[0] & CR0_PG_MASK) && !(new_cr0 & CR0_PG_MASK) && + (env->efer & MSR_EFER_LMA)) { + /* exit long mode */ + env->efer &= ~MSR_EFER_LMA; + env->hflags &= ~(HF_LMA_MASK | HF_CS64_MASK); + env->eip &= 0xffffffff; + } +#endif + env->cr[0] = new_cr0 | CR0_ET_MASK; + + /* update PE flag in hidden flags */ + pe_state = (env->cr[0] & CR0_PE_MASK); + env->hflags = (env->hflags & ~HF_PE_MASK) | (pe_state << HF_PE_SHIFT); + /* ensure that ADDSEG is always set in real mode */ + env->hflags |= ((pe_state ^ 1) << HF_ADDSEG_SHIFT); + /* update FPU flags */ + env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) | + ((new_cr0 << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)); +#ifdef VBOX + remR3ChangeCpuMode(env); +#endif +} + +/* XXX: in legacy PAE mode, generate a GPF if reserved bits are set in + the PDPT */ +void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) +{ + env->cr[3] = new_cr3; + if (env->cr[0] & CR0_PG_MASK) { +#if defined(DEBUG_MMU) + printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3); +#endif + tlb_flush(env, 0); + } +} + +void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) +{ +#if defined(DEBUG_MMU) + printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]); +#endif + if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) != + (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) { + tlb_flush(env, 1); + } + /* SSE handling */ + if (!(env->cpuid_features & CPUID_SSE)) + new_cr4 &= ~CR4_OSFXSR_MASK; + if (new_cr4 & CR4_OSFXSR_MASK) + env->hflags |= HF_OSFXSR_MASK; + else + env->hflags &= ~HF_OSFXSR_MASK; + + env->cr[4] = new_cr4; +#ifdef VBOX + remR3ChangeCpuMode(env); +#endif +} + +#if defined(CONFIG_USER_ONLY) + +int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, + int is_write, int mmu_idx, int is_softmmu) +{ + /* user mode only emulation */ + is_write &= 1; + env->cr[2] = addr; + env->error_code = (is_write << PG_ERROR_W_BIT); + env->error_code |= PG_ERROR_U_MASK; + env->exception_index = EXCP0E_PAGE; + return 1; +} + +#else + +/* XXX: This value should match the one returned by CPUID + * and in exec.c */ +# if defined(TARGET_X86_64) +# define PHYS_ADDR_MASK 0xfffffff000LL +# else +# define PHYS_ADDR_MASK 0xffffff000LL +# endif + +/* return value: + -1 = cannot handle fault + 0 = nothing more to do + 1 = generate PF fault +*/ +int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr, + int is_write1, int mmu_idx, int is_softmmu) +{ + uint64_t ptep, pte; + target_ulong pde_addr, pte_addr; + int error_code, is_dirty, prot, page_size, is_write, is_user; + target_phys_addr_t paddr; + uint32_t page_offset; + target_ulong vaddr, virt_addr; + + is_user = mmu_idx == MMU_USER_IDX; +#if defined(DEBUG_MMU) + printf("MMU fault: addr=" TARGET_FMT_lx " w=%d u=%d eip=" TARGET_FMT_lx "\n", + addr, is_write1, is_user, env->eip); +#endif + is_write = is_write1 & 1; + + if (!(env->cr[0] & CR0_PG_MASK)) { + pte = addr; + virt_addr = addr & TARGET_PAGE_MASK; + prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + page_size = 4096; + goto do_mapping; + } + + if (env->cr[4] & CR4_PAE_MASK) { + uint64_t pde, pdpe; + target_ulong pdpe_addr; + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint64_t pml4e_addr, pml4e; + int32_t sext; + + /* test virtual address sign extension */ + sext = (int64_t)addr >> 47; + if (sext != 0 && sext != -1) { + env->error_code = 0; + env->exception_index = EXCP0D_GPF; + return 1; + } + + pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & + env->a20_mask; + pml4e = ldq_phys(pml4e_addr); + if (!(pml4e & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + if (!(pml4e & PG_ACCESSED_MASK)) { + pml4e |= PG_ACCESSED_MASK; + stl_phys_notdirty(pml4e_addr, pml4e); + } + ptep = pml4e ^ PG_NX_MASK; + pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + ptep &= pdpe ^ PG_NX_MASK; + if (!(pdpe & PG_ACCESSED_MASK)) { + pdpe |= PG_ACCESSED_MASK; + stl_phys_notdirty(pdpe_addr, pdpe); + } + } else +#endif + { + /* XXX: load them when cr3 is loaded ? */ + pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; + } + + pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) & + env->a20_mask; + pde = ldq_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + ptep &= pde ^ PG_NX_MASK; + if (pde & PG_PSE_MASK) { + /* 2 MB page */ + page_size = 2048 * 1024; + ptep ^= PG_NX_MASK; + if ((ptep & PG_NX_MASK) && is_write1 == 2) + goto do_fault_protect; + if (is_user) { + if (!(ptep & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pde & PG_DIRTY_MASK); + if (!(pde & PG_ACCESSED_MASK) || is_dirty) { + pde |= PG_ACCESSED_MASK; + if (is_dirty) + pde |= PG_DIRTY_MASK; + stl_phys_notdirty(pde_addr, pde); + } + /* align to page_size */ + pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff); + virt_addr = addr & ~(page_size - 1); + } else { + /* 4 KB page */ + if (!(pde & PG_ACCESSED_MASK)) { + pde |= PG_ACCESSED_MASK; + stl_phys_notdirty(pde_addr, pde); + } + pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) & + env->a20_mask; + pte = ldq_phys(pte_addr); + if (!(pte & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) { + error_code = PG_ERROR_RSVD_MASK; + goto do_fault; + } + /* combine pde and pte nx, user and rw protections */ + ptep &= pte ^ PG_NX_MASK; + ptep ^= PG_NX_MASK; + if ((ptep & PG_NX_MASK) && is_write1 == 2) + goto do_fault_protect; + if (is_user) { + if (!(ptep & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pte & PG_DIRTY_MASK); + if (!(pte & PG_ACCESSED_MASK) || is_dirty) { + pte |= PG_ACCESSED_MASK; + if (is_dirty) + pte |= PG_DIRTY_MASK; + stl_phys_notdirty(pte_addr, pte); + } + page_size = 4096; + virt_addr = addr & ~0xfff; + pte = pte & (PHYS_ADDR_MASK | 0xfff); + } + } else { + uint32_t pde; + + /* page directory entry */ + pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & + env->a20_mask; + pde = ldl_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + /* if PSE bit is set, then we use a 4MB page */ + if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { + page_size = 4096 * 1024; + if (is_user) { + if (!(pde & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(pde & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(pde & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pde & PG_DIRTY_MASK); + if (!(pde & PG_ACCESSED_MASK) || is_dirty) { + pde |= PG_ACCESSED_MASK; + if (is_dirty) + pde |= PG_DIRTY_MASK; + stl_phys_notdirty(pde_addr, pde); + } + + pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */ + ptep = pte; + virt_addr = addr & ~(page_size - 1); + } else { + if (!(pde & PG_ACCESSED_MASK)) { + pde |= PG_ACCESSED_MASK; + stl_phys_notdirty(pde_addr, pde); + } + + /* page directory entry */ + pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & + env->a20_mask; + pte = ldl_phys(pte_addr); + if (!(pte & PG_PRESENT_MASK)) { + error_code = 0; + goto do_fault; + } + /* combine pde and pte user and rw protections */ + ptep = pte & pde; + if (is_user) { + if (!(ptep & PG_USER_MASK)) + goto do_fault_protect; + if (is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } else { + if ((env->cr[0] & CR0_WP_MASK) && + is_write && !(ptep & PG_RW_MASK)) + goto do_fault_protect; + } + is_dirty = is_write && !(pte & PG_DIRTY_MASK); + if (!(pte & PG_ACCESSED_MASK) || is_dirty) { + pte |= PG_ACCESSED_MASK; + if (is_dirty) + pte |= PG_DIRTY_MASK; + stl_phys_notdirty(pte_addr, pte); + } + page_size = 4096; + virt_addr = addr & ~0xfff; + } + } + /* the page can be put in the TLB */ + prot = PAGE_READ; + if (!(ptep & PG_NX_MASK)) + prot |= PAGE_EXEC; + if (pte & PG_DIRTY_MASK) { + /* only set write access if already dirty... otherwise wait + for dirty access */ + if (is_user) { + if (ptep & PG_RW_MASK) + prot |= PAGE_WRITE; + } else { + if (!(env->cr[0] & CR0_WP_MASK) || + (ptep & PG_RW_MASK)) + prot |= PAGE_WRITE; + } + } + do_mapping: +#ifndef VBOX + pte = pte & env->a20_mask; +#endif + + /* Even if 4MB pages, we map only one 4KB page in the cache to + avoid filling it too fast */ + page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1); + paddr = (pte & TARGET_PAGE_MASK) + page_offset; +#ifdef VBOX + paddr &= env->a20_mask; +#endif + vaddr = virt_addr + page_offset; + + tlb_set_page(env, vaddr, paddr, prot, mmu_idx, page_size); + return 0; + do_fault_protect: + error_code = PG_ERROR_P_MASK; + do_fault: + error_code |= (is_write << PG_ERROR_W_BIT); + if (is_user) + error_code |= PG_ERROR_U_MASK; + if (is_write1 == 2 && + (env->efer & MSR_EFER_NXE) && + (env->cr[4] & CR4_PAE_MASK)) + error_code |= PG_ERROR_I_D_MASK; + if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) { + /* cr2 is not modified in case of exceptions */ + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), + addr); + } else { + env->cr[2] = addr; + } + env->error_code = error_code; + env->exception_index = EXCP0E_PAGE; + return 1; +} + +target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr) +{ + target_ulong pde_addr, pte_addr; + uint64_t pte; + target_phys_addr_t paddr; + uint32_t page_offset; + int page_size; + + if (env->cr[4] & CR4_PAE_MASK) { + target_ulong pdpe_addr; + uint64_t pde, pdpe; + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint64_t pml4e_addr, pml4e; + int32_t sext; + + /* test virtual address sign extension */ + sext = (int64_t)addr >> 47; + if (sext != 0 && sext != -1) + return -1; + + pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & + env->a20_mask; + pml4e = ldq_phys(pml4e_addr); + if (!(pml4e & PG_PRESENT_MASK)) + return -1; + + pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) + return -1; + } else +#endif + { + pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & + env->a20_mask; + pdpe = ldq_phys(pdpe_addr); + if (!(pdpe & PG_PRESENT_MASK)) + return -1; + } + + pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) & + env->a20_mask; + pde = ldq_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) { + return -1; + } + if (pde & PG_PSE_MASK) { + /* 2 MB page */ + page_size = 2048 * 1024; + pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */ + } else { + /* 4 KB page */ + pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) & + env->a20_mask; + page_size = 4096; + pte = ldq_phys(pte_addr); + } + if (!(pte & PG_PRESENT_MASK)) + return -1; + } else { + uint32_t pde; + + if (!(env->cr[0] & CR0_PG_MASK)) { + pte = addr; + page_size = 4096; + } else { + /* page directory entry */ + pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask; + pde = ldl_phys(pde_addr); + if (!(pde & PG_PRESENT_MASK)) + return -1; + if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { + pte = pde & ~0x003ff000; /* align to 4MB */ + page_size = 4096 * 1024; + } else { + /* page directory entry */ + pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask; + pte = ldl_phys(pte_addr); + if (!(pte & PG_PRESENT_MASK)) + return -1; + page_size = 4096; + } + } + pte = pte & env->a20_mask; + } + + page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1); + paddr = (pte & TARGET_PAGE_MASK) + page_offset; + return paddr; +} + +void hw_breakpoint_insert(CPUState *env, int index) +{ + int type, err = 0; + + switch (hw_breakpoint_type(env->dr[7], index)) { + case 0: + if (hw_breakpoint_enabled(env->dr[7], index)) + err = cpu_breakpoint_insert(env, env->dr[index], BP_CPU, + &env->cpu_breakpoint[index]); + break; + case 1: + type = BP_CPU | BP_MEM_WRITE; + goto insert_wp; + case 2: + /* No support for I/O watchpoints yet */ + break; + case 3: + type = BP_CPU | BP_MEM_ACCESS; + insert_wp: + err = cpu_watchpoint_insert(env, env->dr[index], + hw_breakpoint_len(env->dr[7], index), + type, &env->cpu_watchpoint[index]); + break; + } + if (err) + env->cpu_breakpoint[index] = NULL; +} + +void hw_breakpoint_remove(CPUState *env, int index) +{ + if (!env->cpu_breakpoint[index]) + return; + switch (hw_breakpoint_type(env->dr[7], index)) { + case 0: + if (hw_breakpoint_enabled(env->dr[7], index)) + cpu_breakpoint_remove_by_ref(env, env->cpu_breakpoint[index]); + break; + case 1: + case 3: + cpu_watchpoint_remove_by_ref(env, env->cpu_watchpoint[index]); + break; + case 2: + /* No support for I/O watchpoints yet */ + break; + } +} + +int check_hw_breakpoints(CPUState *env, int force_dr6_update) +{ + target_ulong dr6; + int reg, type; + int hit_enabled = 0; + + dr6 = env->dr[6] & ~0xf; + for (reg = 0; reg < 4; reg++) { + type = hw_breakpoint_type(env->dr[7], reg); + if ((type == 0 && env->dr[reg] == env->eip) || + ((type & 1) && env->cpu_watchpoint[reg] && + (env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT))) { + dr6 |= 1 << reg; + if (hw_breakpoint_enabled(env->dr[7], reg)) + hit_enabled = 1; + } + } + if (hit_enabled || force_dr6_update) + env->dr[6] = dr6; + return hit_enabled; +} + +static CPUDebugExcpHandler *prev_debug_excp_handler; + +void raise_exception_env(int exception_index, CPUState *env); + +static void breakpoint_handler(CPUState *env) +{ + CPUBreakpoint *bp; + + if (env->watchpoint_hit) { + if (env->watchpoint_hit->flags & BP_CPU) { + env->watchpoint_hit = NULL; + if (check_hw_breakpoints(env, 0)) + raise_exception_env(EXCP01_DB, env); + else + cpu_resume_from_signal(env, NULL); + } + } else { + QTAILQ_FOREACH(bp, &env->breakpoints, entry) + if (bp->pc == env->eip) { + if (bp->flags & BP_CPU) { + check_hw_breakpoints(env, 1); + raise_exception_env(EXCP01_DB, env); + } + break; + } + } + if (prev_debug_excp_handler) + prev_debug_excp_handler(env); +} + +#ifndef VBOX +/* This should come from sysemu.h - if we could include it here... */ +void qemu_system_reset_request(void); + +void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, + uint64_t mcg_status, uint64_t addr, uint64_t misc) +{ + uint64_t mcg_cap = cenv->mcg_cap; + unsigned bank_num = mcg_cap & 0xff; + uint64_t *banks = cenv->mce_banks; + + if (bank >= bank_num || !(status & MCI_STATUS_VAL)) + return; + + /* + * if MSR_MCG_CTL is not all 1s, the uncorrected error + * reporting is disabled + */ + if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) && + cenv->mcg_ctl != ~(uint64_t)0) + return; + banks += 4 * bank; + /* + * if MSR_MCi_CTL is not all 1s, the uncorrected error + * reporting is disabled for the bank + */ + if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0) + return; + if (status & MCI_STATUS_UC) { + if ((cenv->mcg_status & MCG_STATUS_MCIP) || + !(cenv->cr[4] & CR4_MCE_MASK)) { + fprintf(stderr, "injects mce exception while previous " + "one is in progress!\n"); + qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); + qemu_system_reset_request(); + return; + } + if (banks[1] & MCI_STATUS_VAL) + status |= MCI_STATUS_OVER; + banks[2] = addr; + banks[3] = misc; + cenv->mcg_status = mcg_status; + banks[1] = status; + cpu_interrupt(cenv, CPU_INTERRUPT_MCE); + } else if (!(banks[1] & MCI_STATUS_VAL) + || !(banks[1] & MCI_STATUS_UC)) { + if (banks[1] & MCI_STATUS_VAL) + status |= MCI_STATUS_OVER; + banks[2] = addr; + banks[3] = misc; + banks[1] = status; + } else + banks[1] |= MCI_STATUS_OVER; +} +#endif /* !VBOX */ +#endif /* !CONFIG_USER_ONLY */ + +#ifndef VBOX + +static void mce_init(CPUX86State *cenv) +{ + unsigned int bank, bank_num; + + if (((cenv->cpuid_version >> 8)&0xf) >= 6 + && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) { + cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF; + cenv->mcg_ctl = ~(uint64_t)0; + bank_num = MCE_BANKS_DEF; + for (bank = 0; bank < bank_num; bank++) + cenv->mce_banks[bank*4] = ~(uint64_t)0; + } +} + +int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, + target_ulong *base, unsigned int *limit, + unsigned int *flags) +{ + SegmentCache *dt; + target_ulong ptr; + uint32_t e1, e2; + int index; + + if (selector & 0x4) + dt = &env->ldt; + else + dt = &env->gdt; + index = selector & ~7; + ptr = dt->base + index; + if ((index + 7) > dt->limit + || cpu_memory_rw_debug(env, ptr, (uint8_t *)&e1, sizeof(e1), 0) != 0 + || cpu_memory_rw_debug(env, ptr+4, (uint8_t *)&e2, sizeof(e2), 0) != 0) + return 0; + + *base = ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000)); + *limit = (e1 & 0xffff) | (e2 & 0x000f0000); + if (e2 & DESC_G_MASK) + *limit = (*limit << 12) | 0xfff; + *flags = e2; + + return 1; +} + +#endif /* !VBOX */ + +#ifndef VBOX +CPUX86State *cpu_x86_init(const char *cpu_model) +#else +CPUX86State *cpu_x86_init(CPUX86State *env, const char *cpu_model) +#endif +{ +#ifndef VBOX + CPUX86State *env; +#endif + static int inited; + +#ifndef VBOX + env = qemu_mallocz(sizeof(CPUX86State)); +#endif + cpu_exec_init(env); + env->cpu_model_str = cpu_model; + + /* init various static tables */ + if (!inited) { + inited = 1; + optimize_flags_init(); +#ifndef CONFIG_USER_ONLY + prev_debug_excp_handler = + cpu_set_debug_excp_handler(breakpoint_handler); +#endif + } +#ifndef VBOX + if (cpu_x86_register(env, cpu_model) < 0) { + cpu_x86_close(env); + return NULL; + } + mce_init(env); +#endif + + qemu_init_vcpu(env); + + return env; +} + +#ifndef VBOX +#if !defined(CONFIG_USER_ONLY) +void do_cpu_init(CPUState *env) +{ + int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI; + cpu_reset(env); + env->interrupt_request = sipi; + apic_init_reset(env->apic_state); + env->halted = !cpu_is_bsp(env); +} + +void do_cpu_sipi(CPUState *env) +{ + apic_sipi(env->apic_state); +} +#else +void do_cpu_init(CPUState *env) +{ +} +void do_cpu_sipi(CPUState *env) +{ +} +#endif +#endif /* !VBOX */ diff --git a/src/recompiler/target-i386/helper.h b/src/recompiler/target-i386/helper.h new file mode 100644 index 00000000..8307304a --- /dev/null +++ b/src/recompiler/target-i386/helper.h @@ -0,0 +1,253 @@ +#include "def-helper.h" + +DEF_HELPER_FLAGS_1(cc_compute_all, TCG_CALL_PURE, i32, int) +DEF_HELPER_FLAGS_1(cc_compute_c, TCG_CALL_PURE, i32, int) + +DEF_HELPER_0(lock, void) +DEF_HELPER_0(unlock, void) +DEF_HELPER_2(write_eflags, void, tl, i32) +DEF_HELPER_0(read_eflags, tl) +DEF_HELPER_1(divb_AL, void, tl) +DEF_HELPER_1(idivb_AL, void, tl) +DEF_HELPER_1(divw_AX, void, tl) +DEF_HELPER_1(idivw_AX, void, tl) +DEF_HELPER_1(divl_EAX, void, tl) +DEF_HELPER_1(idivl_EAX, void, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_1(mulq_EAX_T0, void, tl) +DEF_HELPER_1(imulq_EAX_T0, void, tl) +DEF_HELPER_2(imulq_T0_T1, tl, tl, tl) +DEF_HELPER_1(divq_EAX, void, tl) +DEF_HELPER_1(idivq_EAX, void, tl) +#endif + +DEF_HELPER_1(aam, void, int) +DEF_HELPER_1(aad, void, int) +DEF_HELPER_0(aaa, void) +DEF_HELPER_0(aas, void) +DEF_HELPER_0(daa, void) +DEF_HELPER_0(das, void) + +DEF_HELPER_1(lsl, tl, tl) +DEF_HELPER_1(lar, tl, tl) +DEF_HELPER_1(verr, void, tl) +DEF_HELPER_1(verw, void, tl) +DEF_HELPER_1(lldt, void, int) +DEF_HELPER_1(ltr, void, int) +DEF_HELPER_2(load_seg, void, int, int) +DEF_HELPER_3(ljmp_protected, void, int, tl, int) +DEF_HELPER_4(lcall_real, void, int, tl, int, int) +DEF_HELPER_4(lcall_protected, void, int, tl, int, int) +DEF_HELPER_1(iret_real, void, int) +DEF_HELPER_2(iret_protected, void, int, int) +DEF_HELPER_2(lret_protected, void, int, int) +DEF_HELPER_1(read_crN, tl, int) +DEF_HELPER_2(write_crN, void, int, tl) +DEF_HELPER_1(lmsw, void, tl) +DEF_HELPER_0(clts, void) +DEF_HELPER_2(movl_drN_T0, void, int, tl) +DEF_HELPER_1(invlpg, void, tl) + +DEF_HELPER_3(enter_level, void, int, int, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_3(enter64_level, void, int, int, tl) +#endif +DEF_HELPER_0(sysenter, void) +DEF_HELPER_1(sysexit, void, int) +#ifdef TARGET_X86_64 +DEF_HELPER_1(syscall, void, int) +DEF_HELPER_1(sysret, void, int) +#endif +DEF_HELPER_1(hlt, void, int) +DEF_HELPER_1(monitor, void, tl) +DEF_HELPER_1(mwait, void, int) +DEF_HELPER_0(debug, void) +DEF_HELPER_0(reset_rf, void) +DEF_HELPER_2(raise_interrupt, void, int, int) +DEF_HELPER_1(raise_exception, void, int) +DEF_HELPER_0(cli, void) +DEF_HELPER_0(sti, void) +DEF_HELPER_0(set_inhibit_irq, void) +DEF_HELPER_0(reset_inhibit_irq, void) +DEF_HELPER_2(boundw, void, tl, int) +DEF_HELPER_2(boundl, void, tl, int) +DEF_HELPER_0(rsm, void) +DEF_HELPER_1(into, void, int) +DEF_HELPER_1(cmpxchg8b, void, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_1(cmpxchg16b, void, tl) +#endif +DEF_HELPER_0(single_step, void) +DEF_HELPER_0(cpuid, void) +DEF_HELPER_0(rdtsc, void) +DEF_HELPER_0(rdtscp, void) +DEF_HELPER_0(rdpmc, void) +DEF_HELPER_0(rdmsr, void) +DEF_HELPER_0(wrmsr, void) + +DEF_HELPER_1(check_iob, void, i32) +DEF_HELPER_1(check_iow, void, i32) +DEF_HELPER_1(check_iol, void, i32) +DEF_HELPER_2(outb, void, i32, i32) +DEF_HELPER_1(inb, tl, i32) +DEF_HELPER_2(outw, void, i32, i32) +DEF_HELPER_1(inw, tl, i32) +DEF_HELPER_2(outl, void, i32, i32) +DEF_HELPER_1(inl, tl, i32) + +DEF_HELPER_2(svm_check_intercept_param, void, i32, i64) +DEF_HELPER_2(vmexit, void, i32, i64) +DEF_HELPER_3(svm_check_io, void, i32, i32, i32) +DEF_HELPER_2(vmrun, void, int, int) +DEF_HELPER_0(vmmcall, void) +DEF_HELPER_1(vmload, void, int) +DEF_HELPER_1(vmsave, void, int) +DEF_HELPER_0(stgi, void) +DEF_HELPER_0(clgi, void) +DEF_HELPER_0(skinit, void) +DEF_HELPER_1(invlpga, void, int) + +/* x86 FPU */ + +DEF_HELPER_1(flds_FT0, void, i32) +DEF_HELPER_1(fldl_FT0, void, i64) +DEF_HELPER_1(fildl_FT0, void, s32) +DEF_HELPER_1(flds_ST0, void, i32) +DEF_HELPER_1(fldl_ST0, void, i64) +DEF_HELPER_1(fildl_ST0, void, s32) +DEF_HELPER_1(fildll_ST0, void, s64) +#ifndef VBOX +DEF_HELPER_0(fsts_ST0, i32) +DEF_HELPER_0(fstl_ST0, i64) +DEF_HELPER_0(fist_ST0, s32) +DEF_HELPER_0(fistl_ST0, s32) +DEF_HELPER_0(fistll_ST0, s64) +DEF_HELPER_0(fistt_ST0, s32) +DEF_HELPER_0(fisttl_ST0, s32) +DEF_HELPER_0(fisttll_ST0, s64) +#else /* VBOX */ +DEF_HELPER_0(fsts_ST0, RTCCUINTREG) +DEF_HELPER_0(fstl_ST0, i64) +DEF_HELPER_0(fist_ST0, RTCCINTREG) +DEF_HELPER_0(fistl_ST0, RTCCINTREG) +DEF_HELPER_0(fistll_ST0, s64) +DEF_HELPER_0(fistt_ST0, RTCCINTREG) +DEF_HELPER_0(fisttl_ST0, RTCCINTREG) +DEF_HELPER_0(fisttll_ST0, s64) +#endif /* VBOX */ +DEF_HELPER_1(fldt_ST0, void, tl) +DEF_HELPER_1(fstt_ST0, void, tl) +DEF_HELPER_0(fpush, void) +DEF_HELPER_0(fpop, void) +DEF_HELPER_0(fdecstp, void) +DEF_HELPER_0(fincstp, void) +DEF_HELPER_1(ffree_STN, void, int) +DEF_HELPER_0(fmov_ST0_FT0, void) +DEF_HELPER_1(fmov_FT0_STN, void, int) +DEF_HELPER_1(fmov_ST0_STN, void, int) +DEF_HELPER_1(fmov_STN_ST0, void, int) +DEF_HELPER_1(fxchg_ST0_STN, void, int) +DEF_HELPER_0(fcom_ST0_FT0, void) +DEF_HELPER_0(fucom_ST0_FT0, void) +DEF_HELPER_0(fcomi_ST0_FT0, void) +DEF_HELPER_0(fucomi_ST0_FT0, void) +DEF_HELPER_0(fadd_ST0_FT0, void) +DEF_HELPER_0(fmul_ST0_FT0, void) +DEF_HELPER_0(fsub_ST0_FT0, void) +DEF_HELPER_0(fsubr_ST0_FT0, void) +DEF_HELPER_0(fdiv_ST0_FT0, void) +DEF_HELPER_0(fdivr_ST0_FT0, void) +DEF_HELPER_1(fadd_STN_ST0, void, int) +DEF_HELPER_1(fmul_STN_ST0, void, int) +DEF_HELPER_1(fsub_STN_ST0, void, int) +DEF_HELPER_1(fsubr_STN_ST0, void, int) +DEF_HELPER_1(fdiv_STN_ST0, void, int) +DEF_HELPER_1(fdivr_STN_ST0, void, int) +DEF_HELPER_0(fchs_ST0, void) +DEF_HELPER_0(fabs_ST0, void) +DEF_HELPER_0(fxam_ST0, void) +DEF_HELPER_0(fld1_ST0, void) +DEF_HELPER_0(fldl2t_ST0, void) +DEF_HELPER_0(fldl2e_ST0, void) +DEF_HELPER_0(fldpi_ST0, void) +DEF_HELPER_0(fldlg2_ST0, void) +DEF_HELPER_0(fldln2_ST0, void) +DEF_HELPER_0(fldz_ST0, void) +DEF_HELPER_0(fldz_FT0, void) +#ifndef VBOX +DEF_HELPER_0(fnstsw, i32) +DEF_HELPER_0(fnstcw, i32) +#else /* VBOX */ +DEF_HELPER_0(fnstsw, RTCCUINTREG) +DEF_HELPER_0(fnstcw, RTCCUINTREG) +#endif /* VBOX */ +DEF_HELPER_1(fldcw, void, i32) +DEF_HELPER_0(fclex, void) +DEF_HELPER_0(fwait, void) +DEF_HELPER_0(fninit, void) +DEF_HELPER_1(fbld_ST0, void, tl) +DEF_HELPER_1(fbst_ST0, void, tl) +DEF_HELPER_0(f2xm1, void) +DEF_HELPER_0(fyl2x, void) +DEF_HELPER_0(fptan, void) +DEF_HELPER_0(fpatan, void) +DEF_HELPER_0(fxtract, void) +DEF_HELPER_0(fprem1, void) +DEF_HELPER_0(fprem, void) +DEF_HELPER_0(fyl2xp1, void) +DEF_HELPER_0(fsqrt, void) +DEF_HELPER_0(fsincos, void) +DEF_HELPER_0(frndint, void) +DEF_HELPER_0(fscale, void) +DEF_HELPER_0(fsin, void) +DEF_HELPER_0(fcos, void) +DEF_HELPER_2(fstenv, void, tl, int) +DEF_HELPER_2(fldenv, void, tl, int) +DEF_HELPER_2(fsave, void, tl, int) +DEF_HELPER_2(frstor, void, tl, int) +DEF_HELPER_2(fxsave, void, tl, int) +DEF_HELPER_2(fxrstor, void, tl, int) +DEF_HELPER_1(bsf, tl, tl) +DEF_HELPER_1(bsr, tl, tl) +DEF_HELPER_2(lzcnt, tl, tl, int) + +/* MMX/SSE */ + +DEF_HELPER_0(enter_mmx, void) +DEF_HELPER_0(emms, void) +DEF_HELPER_2(movq, void, ptr, ptr) + +#define SHIFT 0 +#include "ops_sse_header.h" +#define SHIFT 1 +#include "ops_sse_header.h" + +DEF_HELPER_2(rclb, tl, tl, tl) +DEF_HELPER_2(rclw, tl, tl, tl) +DEF_HELPER_2(rcll, tl, tl, tl) +DEF_HELPER_2(rcrb, tl, tl, tl) +DEF_HELPER_2(rcrw, tl, tl, tl) +DEF_HELPER_2(rcrl, tl, tl, tl) +#ifdef TARGET_X86_64 +DEF_HELPER_2(rclq, tl, tl, tl) +DEF_HELPER_2(rcrq, tl, tl, tl) +#endif + +#ifdef VBOX +DEF_HELPER_1(write_eflags_vme, void, tl) +DEF_HELPER_0(read_eflags_vme, tl) +DEF_HELPER_0(cli_vme, void) +DEF_HELPER_0(sti_vme, void) +DEF_HELPER_0(check_external_event, void) +DEF_HELPER_0(dump_state, void) +DEF_HELPER_1(sync_seg, void, i32) + +void helper_external_event(void); +void helper_record_call(void); + +/* in op_helper.c */ +void sync_seg(CPUX86State *env1, int seg_reg, int selector); +void sync_ldtr(CPUX86State *env1, int selector); +#endif /* VBOX */ + +#include "def-helper.h" diff --git a/src/recompiler/target-i386/helper_template.h b/src/recompiler/target-i386/helper_template.h new file mode 100644 index 00000000..193b3274 --- /dev/null +++ b/src/recompiler/target-i386/helper_template.h @@ -0,0 +1,344 @@ +/* + * i386 helpers + * + * Copyright (c) 2008 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#define DATA_BITS (1 << (3 + SHIFT)) +#define SHIFT_MASK (DATA_BITS - 1) +#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1)) +#if DATA_BITS <= 32 +#define SHIFT1_MASK 0x1f +#else +#define SHIFT1_MASK 0x3f +#endif + +#if DATA_BITS == 8 +#define SUFFIX b +#define DATA_TYPE uint8_t +#define DATA_STYPE int8_t +#define DATA_MASK 0xff +#elif DATA_BITS == 16 +#define SUFFIX w +#define DATA_TYPE uint16_t +#define DATA_STYPE int16_t +#define DATA_MASK 0xffff +#elif DATA_BITS == 32 +#define SUFFIX l +#define DATA_TYPE uint32_t +#define DATA_STYPE int32_t +#define DATA_MASK 0xffffffff +#elif DATA_BITS == 64 +#define SUFFIX q +#define DATA_TYPE uint64_t +#define DATA_STYPE int64_t +#define DATA_MASK 0xffffffffffffffffULL +#else +#error unhandled operand size +#endif + +/* dynamic flags computation */ + +static int glue(compute_all_add, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_SRC; + src2 = CC_DST - CC_SRC; + cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_add, SUFFIX)(void) +{ + int cf; + target_long src1; + src1 = CC_SRC; + cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; + return cf; +} + +static int glue(compute_all_adc, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_SRC; + src2 = CC_DST - CC_SRC - 1; + cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_adc, SUFFIX)(void) +{ + int cf; + target_long src1; + src1 = CC_SRC; + cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; + return cf; +} + +static int glue(compute_all_sub, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST + CC_SRC; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_sub, SUFFIX)(void) +{ + int cf; + target_long src1, src2; + src1 = CC_DST + CC_SRC; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; + return cf; +} + +static int glue(compute_all_sbb, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST + CC_SRC + 1; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_sbb, SUFFIX)(void) +{ + int cf; + target_long src1, src2; + src1 = CC_DST + CC_SRC + 1; + src2 = CC_SRC; + cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; + return cf; +} + +static int glue(compute_all_logic, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = 0; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = 0; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_logic, SUFFIX)(void) +{ + return 0; +} + +static int glue(compute_all_inc, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST - 1; + src2 = 1; + cf = CC_SRC; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11; + return cf | pf | af | zf | sf | of; +} + +#if DATA_BITS == 32 +static int glue(compute_c_inc, SUFFIX)(void) +{ + return CC_SRC; +} +#endif + +static int glue(compute_all_dec, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + target_long src1, src2; + src1 = CC_DST + 1; + src2 = 1; + cf = CC_SRC; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_all_shl, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + /* of is defined if shift count == 1 */ + of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_shl, SUFFIX)(void) +{ + return (CC_SRC >> (DATA_BITS - 1)) & CC_C; +} + +#if DATA_BITS == 32 +static int glue(compute_c_sar, SUFFIX)(void) +{ + return CC_SRC & 1; +} +#endif + +static int glue(compute_all_sar, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = CC_SRC & 1; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + /* of is defined if shift count == 1 */ + of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +#if DATA_BITS == 32 +static int glue(compute_c_mul, SUFFIX)(void) +{ + int cf; + cf = (CC_SRC != 0); + return cf; +} +#endif + +/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and + CF are modified and it is slower to do that. */ +static int glue(compute_all_mul, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = (CC_SRC != 0); + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST == 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = cf << 11; + return cf | pf | af | zf | sf | of; +} + +/* shifts */ + +target_ulong glue(helper_rcl, SUFFIX)(target_ulong t0, target_ulong t1) +{ + int count, eflags; + target_ulong src; + target_long res; + + count = t1 & SHIFT1_MASK; +#if DATA_BITS == 16 + count = rclw_table[count]; +#elif DATA_BITS == 8 + count = rclb_table[count]; +#endif + if (count) { + eflags = helper_cc_compute_all(CC_OP); + t0 &= DATA_MASK; + src = t0; + res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1)); + if (count > 1) + res |= t0 >> (DATA_BITS + 1 - count); + t0 = res; + env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | + ((src >> (DATA_BITS - count)) & CC_C); + } else { + env->cc_tmp = -1; + } + return t0; +} + +target_ulong glue(helper_rcr, SUFFIX)(target_ulong t0, target_ulong t1) +{ + int count, eflags; + target_ulong src; + target_long res; + + count = t1 & SHIFT1_MASK; +#if DATA_BITS == 16 + count = rclw_table[count]; +#elif DATA_BITS == 8 + count = rclb_table[count]; +#endif + if (count) { + eflags = helper_cc_compute_all(CC_OP); + t0 &= DATA_MASK; + src = t0; + res = (t0 >> count) | ((target_ulong)(eflags & CC_C) << (DATA_BITS - count)); + if (count > 1) + res |= t0 << (DATA_BITS + 1 - count); + t0 = res; + env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | + ((src >> (count - 1)) & CC_C); + } else { + env->cc_tmp = -1; + } + return t0; +} + +#undef DATA_BITS +#undef SHIFT_MASK +#undef SHIFT1_MASK +#undef SIGN_MASK +#undef DATA_TYPE +#undef DATA_STYPE +#undef DATA_MASK +#undef SUFFIX diff --git a/src/recompiler/target-i386/op_helper.c b/src/recompiler/target-i386/op_helper.c new file mode 100644 index 00000000..07b58f8d --- /dev/null +++ b/src/recompiler/target-i386/op_helper.c @@ -0,0 +1,7164 @@ +/* + * i386 helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include "exec.h" +#include "exec-all.h" +#include "host-utils.h" +#include "ioport.h" + +#ifdef VBOX +# include "qemu-common.h" +# include <math.h> +# include "tcg.h" +# include <VBox/err.h> +#endif /* VBOX */ + +//#define DEBUG_PCALL + + +#ifdef DEBUG_PCALL +# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__) +# define LOG_PCALL_STATE(env) \ + log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP) +#else +# define LOG_PCALL(...) do { } while (0) +# define LOG_PCALL_STATE(env) do { } while (0) +#endif + + +#if 0 +#define raise_exception_err(a, b)\ +do {\ + qemu_log("raise_exception line=%d\n", __LINE__);\ + (raise_exception_err)(a, b);\ +} while (0) +#endif + +static const uint8_t parity_table[256] = { + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, +}; + +/* modulo 17 table */ +static const uint8_t rclw_table[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9,10,11,12,13,14,15, + 16, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9,10,11,12,13,14, +}; + +/* modulo 9 table */ +static const uint8_t rclb_table[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 0, 1, 2, 3, 4, +}; + +static const CPU86_LDouble f15rk[7] = +{ + 0.00000000000000000000L, + 1.00000000000000000000L, + 3.14159265358979323851L, /*pi*/ + 0.30102999566398119523L, /*lg2*/ + 0.69314718055994530943L, /*ln2*/ + 1.44269504088896340739L, /*l2e*/ + 3.32192809488736234781L, /*l2t*/ +}; + +/* broken thread support */ + +static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED; + +void helper_lock(void) +{ + spin_lock(&global_cpu_lock); +} + +void helper_unlock(void) +{ + spin_unlock(&global_cpu_lock); +} + +void helper_write_eflags(target_ulong t0, uint32_t update_mask) +{ + load_eflags(t0, update_mask); +} + +target_ulong helper_read_eflags(void) +{ + uint32_t eflags; + eflags = helper_cc_compute_all(CC_OP); + eflags |= (DF & DF_MASK); + eflags |= env->eflags & ~(VM_MASK | RF_MASK); + return eflags; +} + +#ifdef VBOX + +void helper_write_eflags_vme(target_ulong t0) +{ + unsigned int new_eflags = t0; + + assert(env->eflags & (1<<VM_SHIFT)); + + /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */ + /* if TF will be set -> #GP */ + if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK)) + || (new_eflags & TF_MASK)) { + raise_exception(EXCP0D_GPF); + } else { + load_eflags(new_eflags, + (TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff); + + if (new_eflags & IF_MASK) { + env->eflags |= VIF_MASK; + } else { + env->eflags &= ~VIF_MASK; + } + } +} + +target_ulong helper_read_eflags_vme(void) +{ + uint32_t eflags; + eflags = helper_cc_compute_all(CC_OP); + eflags |= (DF & DF_MASK); + eflags |= env->eflags & ~(VM_MASK | RF_MASK); + if (env->eflags & VIF_MASK) + eflags |= IF_MASK; + else + eflags &= ~IF_MASK; + + /* According to AMD manual, should be read with IOPL == 3 */ + eflags |= (3 << IOPL_SHIFT); + + /* We only use helper_read_eflags_vme() in 16-bits mode */ + return eflags & 0xffff; +} + +void helper_dump_state() +{ + LogRel(("CS:EIP=%08x:%08x, FLAGS=%08x\n", env->segs[R_CS].base, env->eip, env->eflags)); + LogRel(("EAX=%08x\tECX=%08x\tEDX=%08x\tEBX=%08x\n", + (uint32_t)env->regs[R_EAX], (uint32_t)env->regs[R_ECX], + (uint32_t)env->regs[R_EDX], (uint32_t)env->regs[R_EBX])); + LogRel(("ESP=%08x\tEBP=%08x\tESI=%08x\tEDI=%08x\n", + (uint32_t)env->regs[R_ESP], (uint32_t)env->regs[R_EBP], + (uint32_t)env->regs[R_ESI], (uint32_t)env->regs[R_EDI])); +} + +/** + * Updates e2 with the DESC_A_MASK, writes it to the descriptor table, and + * returns the updated e2. + * + * @returns e2 with A set. + * @param e2 The 2nd selector DWORD. + */ +static uint32_t set_segment_accessed(int selector, uint32_t e2) +{ + SegmentCache *dt = selector & X86_SEL_LDT ? &env->ldt : &env->gdt; + target_ulong ptr = dt->base + (selector & X86_SEL_MASK); + + e2 |= DESC_A_MASK; + stl_kernel(ptr + 4, e2); + return e2; +} + +#endif /* VBOX */ + +/* return non zero if error */ +static inline int load_segment(uint32_t *e1_ptr, uint32_t *e2_ptr, + int selector) +{ + SegmentCache *dt; + int index; + target_ulong ptr; + + if (selector & 0x4) + dt = &env->ldt; + else + dt = &env->gdt; + index = selector & ~7; + if ((index + 7) > dt->limit) + return -1; + ptr = dt->base + index; + *e1_ptr = ldl_kernel(ptr); + *e2_ptr = ldl_kernel(ptr + 4); + return 0; +} + +static inline unsigned int get_seg_limit(uint32_t e1, uint32_t e2) +{ + unsigned int limit; + limit = (e1 & 0xffff) | (e2 & 0x000f0000); + if (e2 & DESC_G_MASK) + limit = (limit << 12) | 0xfff; + return limit; +} + +static inline uint32_t get_seg_base(uint32_t e1, uint32_t e2) +{ + return ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000)); +} + +static inline void load_seg_cache_raw_dt(SegmentCache *sc, uint32_t e1, uint32_t e2) +{ + sc->base = get_seg_base(e1, e2); + sc->limit = get_seg_limit(e1, e2); +#ifndef VBOX + sc->flags = e2; +#else + sc->flags = e2 & DESC_RAW_FLAG_BITS; + sc->newselector = 0; + sc->fVBoxFlags = CPUMSELREG_FLAGS_VALID; +#endif +} + +/* init the segment cache in vm86 mode. */ +static inline void load_seg_vm(int seg, int selector) +{ + selector &= 0xffff; +#ifdef VBOX + /* flags must be 0xf3; expand-up read/write accessed data segment with DPL=3. (VT-x) */ + unsigned flags = DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | DESC_A_MASK; + flags |= (3 << DESC_DPL_SHIFT); + + cpu_x86_load_seg_cache(env, seg, selector, + (selector << 4), 0xffff, flags); +#else /* VBOX */ + cpu_x86_load_seg_cache(env, seg, selector, + (selector << 4), 0xffff, 0); +#endif /* VBOX */ +} + +static inline void get_ss_esp_from_tss(uint32_t *ss_ptr, + uint32_t *esp_ptr, int dpl) +{ +#ifndef VBOX + int type, index, shift; +#else + unsigned int type, index, shift; +#endif + +#if 0 + { + int i; + printf("TR: base=%p limit=%x\n", env->tr.base, env->tr.limit); + for(i=0;i<env->tr.limit;i++) { + printf("%02x ", env->tr.base[i]); + if ((i & 7) == 7) printf("\n"); + } + printf("\n"); + } +#endif + + if (!(env->tr.flags & DESC_P_MASK)) + cpu_abort(env, "invalid tss"); + type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + if ((type & 7) != 3) + cpu_abort(env, "invalid tss type"); + shift = type >> 3; + index = (dpl * 4 + 2) << shift; + if (index + (4 << shift) - 1 > env->tr.limit) + raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc); + if (shift == 0) { + *esp_ptr = lduw_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 2); + } else { + *esp_ptr = ldl_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 4); + } +} + +/* XXX: merge with load_seg() */ +static void tss_load_seg(int seg_reg, int selector) +{ + uint32_t e1, e2; + int rpl, dpl, cpl; + +#ifdef VBOX + e1 = e2 = 0; /* gcc warning? */ + cpl = env->hflags & HF_CPL_MASK; + /* Trying to load a selector with CPL=1? */ + if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0)) + { + Log(("RPL 1 -> sel %04X -> %04X (tss_load_seg)\n", selector, selector & 0xfffc)); + selector = selector & 0xfffc; + } +#endif /* VBOX */ + + if ((selector & 0xfffc) != 0) { + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + if (!(e2 & DESC_S_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (seg_reg == R_CS) { + if (!(e2 & DESC_CS_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + /* XXX: is it correct ? */ + if (dpl != rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + if ((e2 & DESC_C_MASK) && dpl > rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + } else if (seg_reg == R_SS) { + /* SS must be writable data */ + if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + if (dpl != cpl || dpl != rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + } else { + /* not readable code */ + if ((e2 & DESC_CS_MASK) && !(e2 & DESC_R_MASK)) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + /* if data or non conforming code, checks the rights */ + if (((e2 >> DESC_TYPE_SHIFT) & 0xf) < 12) { + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); + } + } + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + cpu_x86_load_seg_cache(env, seg_reg, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + } else { + if (seg_reg == R_SS || seg_reg == R_CS) + raise_exception_err(EXCP0A_TSS, selector & 0xfffc); +#ifdef VBOX +# if 0 /** @todo now we ignore loading 0 selectors, need to check what is correct once */ + cpu_x86_load_seg_cache(env, seg_reg, selector, + 0, 0, 0); +# endif +#endif /* VBOX */ + } +} + +#define SWITCH_TSS_JMP 0 +#define SWITCH_TSS_IRET 1 +#define SWITCH_TSS_CALL 2 + +/* XXX: restore CPU state in registers (PowerPC case) */ +static void switch_tss(int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip) +{ + int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i; + target_ulong tss_base; + uint32_t new_regs[8], new_segs[6]; + uint32_t new_eflags, new_eip, new_cr3, new_ldt, new_trap; + uint32_t old_eflags, eflags_mask; + SegmentCache *dt; +#ifndef VBOX + int index; +#else + unsigned int index; +#endif + target_ulong ptr; + + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + LOG_PCALL("switch_tss: sel=0x%04x type=%d src=%d\n", tss_selector, type, source); + + /* if task gate, we read the TSS segment and we load it */ + if (type == 5) { + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc); + tss_selector = e1 >> 16; + if (tss_selector & 4) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + if (load_segment(&e1, &e2, tss_selector) != 0) + raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc); + if (e2 & DESC_S_MASK) + raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc); + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + if ((type & 7) != 1) + raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc); + } + + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc); + + if (type & 8) + tss_limit_max = 103; + else + tss_limit_max = 43; + tss_limit = get_seg_limit(e1, e2); + tss_base = get_seg_base(e1, e2); + if ((tss_selector & 4) != 0 || + tss_limit < tss_limit_max) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + old_type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + if (old_type & 8) + old_tss_limit_max = 103; + else + old_tss_limit_max = 43; + +#ifndef VBOX /* The old TSS is written first... */ + /* read all the registers from the new TSS */ + if (type & 8) { + /* 32 bit */ + new_cr3 = ldl_kernel(tss_base + 0x1c); + new_eip = ldl_kernel(tss_base + 0x20); + new_eflags = ldl_kernel(tss_base + 0x24); + for(i = 0; i < 8; i++) + new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4)); + for(i = 0; i < 6; i++) + new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4)); + new_ldt = lduw_kernel(tss_base + 0x60); + new_trap = ldl_kernel(tss_base + 0x64); + } else { + /* 16 bit */ + new_cr3 = 0; + new_eip = lduw_kernel(tss_base + 0x0e); + new_eflags = lduw_kernel(tss_base + 0x10); + for(i = 0; i < 8; i++) + new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000; + for(i = 0; i < 4; i++) + new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 4)); + new_ldt = lduw_kernel(tss_base + 0x2a); + new_segs[R_FS] = 0; + new_segs[R_GS] = 0; + new_trap = 0; + } +#endif + + /* NOTE: we must avoid memory exceptions during the task switch, + so we make dummy accesses before */ + /* XXX: it can still fail in some cases, so a bigger hack is + necessary to valid the TLB after having done the accesses */ + + v1 = ldub_kernel(env->tr.base); + v2 = ldub_kernel(env->tr.base + old_tss_limit_max); + stb_kernel(env->tr.base, v1); + stb_kernel(env->tr.base + old_tss_limit_max, v2); + + /* clear busy bit (it is restartable) */ + if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) { + target_ulong ptr; + uint32_t e2; + ptr = env->gdt.base + (env->tr.selector & ~7); + e2 = ldl_kernel(ptr + 4); + e2 &= ~DESC_TSS_BUSY_MASK; + stl_kernel(ptr + 4, e2); + } + old_eflags = compute_eflags(); + if (source == SWITCH_TSS_IRET) + old_eflags &= ~NT_MASK; + + /* save the current state in the old TSS */ + if (type & 8) { + /* 32 bit */ + stl_kernel(env->tr.base + 0x20, next_eip); + stl_kernel(env->tr.base + 0x24, old_eflags); + stl_kernel(env->tr.base + (0x28 + 0 * 4), EAX); + stl_kernel(env->tr.base + (0x28 + 1 * 4), ECX); + stl_kernel(env->tr.base + (0x28 + 2 * 4), EDX); + stl_kernel(env->tr.base + (0x28 + 3 * 4), EBX); + stl_kernel(env->tr.base + (0x28 + 4 * 4), ESP); + stl_kernel(env->tr.base + (0x28 + 5 * 4), EBP); + stl_kernel(env->tr.base + (0x28 + 6 * 4), ESI); + stl_kernel(env->tr.base + (0x28 + 7 * 4), EDI); + for(i = 0; i < 6; i++) + stw_kernel(env->tr.base + (0x48 + i * 4), env->segs[i].selector); +#if defined(VBOX) && defined(DEBUG) + printf("TSS 32 bits switch\n"); + printf("Saving CS=%08X\n", env->segs[R_CS].selector); +#endif + } else { + /* 16 bit */ + stw_kernel(env->tr.base + 0x0e, next_eip); + stw_kernel(env->tr.base + 0x10, old_eflags); + stw_kernel(env->tr.base + (0x12 + 0 * 2), EAX); + stw_kernel(env->tr.base + (0x12 + 1 * 2), ECX); + stw_kernel(env->tr.base + (0x12 + 2 * 2), EDX); + stw_kernel(env->tr.base + (0x12 + 3 * 2), EBX); + stw_kernel(env->tr.base + (0x12 + 4 * 2), ESP); + stw_kernel(env->tr.base + (0x12 + 5 * 2), EBP); + stw_kernel(env->tr.base + (0x12 + 6 * 2), ESI); + stw_kernel(env->tr.base + (0x12 + 7 * 2), EDI); + for(i = 0; i < 4; i++) + stw_kernel(env->tr.base + (0x22 + i * 2), env->segs[i].selector); + } + +#ifdef VBOX + /* read all the registers from the new TSS - may be the same as the old one */ + if (type & 8) { + /* 32 bit */ + new_cr3 = ldl_kernel(tss_base + 0x1c); + new_eip = ldl_kernel(tss_base + 0x20); + new_eflags = ldl_kernel(tss_base + 0x24); + for(i = 0; i < 8; i++) + new_regs[i] = ldl_kernel(tss_base + (0x28 + i * 4)); + for(i = 0; i < 6; i++) + new_segs[i] = lduw_kernel(tss_base + (0x48 + i * 4)); + new_ldt = lduw_kernel(tss_base + 0x60); + new_trap = ldl_kernel(tss_base + 0x64); + } else { + /* 16 bit */ + new_cr3 = 0; + new_eip = lduw_kernel(tss_base + 0x0e); + new_eflags = lduw_kernel(tss_base + 0x10); + for(i = 0; i < 8; i++) + new_regs[i] = lduw_kernel(tss_base + (0x12 + i * 2)) | 0xffff0000; + for(i = 0; i < 4; i++) + new_segs[i] = lduw_kernel(tss_base + (0x22 + i * 2)); + new_ldt = lduw_kernel(tss_base + 0x2a); + new_segs[R_FS] = 0; + new_segs[R_GS] = 0; + new_trap = 0; + } +#endif + + /* now if an exception occurs, it will occurs in the next task + context */ + + if (source == SWITCH_TSS_CALL) { + stw_kernel(tss_base, env->tr.selector); + new_eflags |= NT_MASK; + } + + /* set busy bit */ + if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) { + target_ulong ptr; + uint32_t e2; + ptr = env->gdt.base + (tss_selector & ~7); + e2 = ldl_kernel(ptr + 4); + e2 |= DESC_TSS_BUSY_MASK; + stl_kernel(ptr + 4, e2); + } + + /* set the new CPU state */ + /* from this point, any exception which occurs can give problems */ + env->cr[0] |= CR0_TS_MASK; + env->hflags |= HF_TS_MASK; + env->tr.selector = tss_selector; + env->tr.base = tss_base; + env->tr.limit = tss_limit; +#ifndef VBOX + env->tr.flags = e2 & ~DESC_TSS_BUSY_MASK; +#else + env->tr.flags = (e2 | DESC_TSS_BUSY_MASK) & DESC_RAW_FLAG_BITS; + env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->tr.newselector = 0; +#endif + + if ((type & 8) && (env->cr[0] & CR0_PG_MASK)) { + cpu_x86_update_cr3(env, new_cr3); + } + + /* load all registers without an exception, then reload them with + possible exception */ + env->eip = new_eip; + eflags_mask = TF_MASK | AC_MASK | ID_MASK | + IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK; + if (!(type & 8)) + eflags_mask &= 0xffff; + load_eflags(new_eflags, eflags_mask); + /* XXX: what to do in 16 bit case ? */ + EAX = new_regs[0]; + ECX = new_regs[1]; + EDX = new_regs[2]; + EBX = new_regs[3]; + ESP = new_regs[4]; + EBP = new_regs[5]; + ESI = new_regs[6]; + EDI = new_regs[7]; + if (new_eflags & VM_MASK) { + for(i = 0; i < 6; i++) + load_seg_vm(i, new_segs[i]); + /* in vm86, CPL is always 3 */ + cpu_x86_set_cpl(env, 3); + } else { + /* CPL is set the RPL of CS */ + cpu_x86_set_cpl(env, new_segs[R_CS] & 3); + /* first just selectors as the rest may trigger exceptions */ + for(i = 0; i < 6; i++) + cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0); + } + + env->ldt.selector = new_ldt & ~4; + env->ldt.base = 0; + env->ldt.limit = 0; + env->ldt.flags = 0; +#ifdef VBOX + env->ldt.flags = DESC_INTEL_UNUSABLE; + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + + /* load the LDT */ + if (new_ldt & 4) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + + if ((new_ldt & 0xfffc) != 0) { + dt = &env->gdt; + index = new_ldt & ~7; + if ((index + 7) > dt->limit) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0A_TSS, new_ldt & 0xfffc); + load_seg_cache_raw_dt(&env->ldt, e1, e2); + } + + /* load the segments */ + if (!(new_eflags & VM_MASK)) { + tss_load_seg(R_CS, new_segs[R_CS]); + tss_load_seg(R_SS, new_segs[R_SS]); + tss_load_seg(R_ES, new_segs[R_ES]); + tss_load_seg(R_DS, new_segs[R_DS]); + tss_load_seg(R_FS, new_segs[R_FS]); + tss_load_seg(R_GS, new_segs[R_GS]); + } + + /* check that EIP is in the CS segment limits */ + if (new_eip > env->segs[R_CS].limit) { + /* XXX: different exception if CALL ? */ + raise_exception_err(EXCP0D_GPF, 0); + } + +#ifndef CONFIG_USER_ONLY + /* reset local breakpoints */ + if (env->dr[7] & 0x55) { + for (i = 0; i < 4; i++) { + if (hw_breakpoint_enabled(env->dr[7], i) == 0x1) + hw_breakpoint_remove(env, i); + } + env->dr[7] &= ~0x55; + } +#endif +} + +/* check if Port I/O is allowed in TSS */ +static inline void check_io(int addr, int size) +{ +#ifndef VBOX + int io_offset, val, mask; +#else + int val, mask; + unsigned int io_offset; +#endif /* VBOX */ + + /* TSS must be a valid 32 bit one */ + if (!(env->tr.flags & DESC_P_MASK) || + ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 || + env->tr.limit < 103) + goto fail; + io_offset = lduw_kernel(env->tr.base + 0x66); + io_offset += (addr >> 3); + /* Note: the check needs two bytes */ + if ((io_offset + 1) > env->tr.limit) + goto fail; + val = lduw_kernel(env->tr.base + io_offset); + val >>= (addr & 7); + mask = (1 << size) - 1; + /* all bits must be zero to allow the I/O */ + if ((val & mask) != 0) { + fail: + raise_exception_err(EXCP0D_GPF, 0); + } +} + +#ifdef VBOX + +/* Keep in sync with gen_check_external_event() */ +void helper_check_external_event() +{ + if ( (env->interrupt_request & ( CPU_INTERRUPT_EXTERNAL_FLUSH_TLB + | CPU_INTERRUPT_EXTERNAL_EXIT + | CPU_INTERRUPT_EXTERNAL_TIMER + | CPU_INTERRUPT_EXTERNAL_DMA)) + || ( (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) + && (env->eflags & IF_MASK) + && !(env->hflags & HF_INHIBIT_IRQ_MASK) ) ) + { + helper_external_event(); + } + +} + +void helper_sync_seg(uint32_t reg) +{ + if (env->segs[reg].newselector) + sync_seg(env, reg, env->segs[reg].newselector); +} + +#endif /* VBOX */ + +void helper_check_iob(uint32_t t0) +{ + check_io(t0, 1); +} + +void helper_check_iow(uint32_t t0) +{ + check_io(t0, 2); +} + +void helper_check_iol(uint32_t t0) +{ + check_io(t0, 4); +} + +void helper_outb(uint32_t port, uint32_t data) +{ +#ifndef VBOX + cpu_outb(port, data & 0xff); +#else + cpu_outb(env, port, data & 0xff); +#endif +} + +target_ulong helper_inb(uint32_t port) +{ +#ifndef VBOX + return cpu_inb(port); +#else + return cpu_inb(env, port); +#endif +} + +void helper_outw(uint32_t port, uint32_t data) +{ +#ifndef VBOX + cpu_outw(port, data & 0xffff); +#else + cpu_outw(env, port, data & 0xffff); +#endif +} + +target_ulong helper_inw(uint32_t port) +{ +#ifndef VBOX + return cpu_inw(port); +#else + return cpu_inw(env, port); +#endif +} + +void helper_outl(uint32_t port, uint32_t data) +{ +#ifndef VBOX + cpu_outl(port, data); +#else + cpu_outl(env, port, data); +#endif +} + +target_ulong helper_inl(uint32_t port) +{ +#ifndef VBOX + return cpu_inl(port); +#else + return cpu_inl(env, port); +#endif +} + +static inline unsigned int get_sp_mask(unsigned int e2) +{ + if (e2 & DESC_B_MASK) + return 0xffffffff; + else + return 0xffff; +} + +static int exeption_has_error_code(int intno) +{ + switch(intno) { + case 8: + case 10: + case 11: + case 12: + case 13: + case 14: + case 17: + return 1; + } + return 0; +} + +#ifdef TARGET_X86_64 +#define SET_ESP(val, sp_mask)\ +do {\ + if ((sp_mask) == 0xffff)\ + ESP = (ESP & ~0xffff) | ((val) & 0xffff);\ + else if ((sp_mask) == 0xffffffffLL)\ + ESP = (uint32_t)(val);\ + else\ + ESP = (val);\ +} while (0) +#else +#define SET_ESP(val, sp_mask) ESP = (ESP & ~(sp_mask)) | ((val) & (sp_mask)) +#endif + +/* in 64-bit machines, this can overflow. So this segment addition macro + * can be used to trim the value to 32-bit whenever needed */ +#define SEG_ADDL(ssp, sp, sp_mask) ((uint32_t)((ssp) + (sp & (sp_mask)))) + +/* XXX: add a is_user flag to have proper security support */ +#define PUSHW(ssp, sp, sp_mask, val)\ +{\ + sp -= 2;\ + stw_kernel((ssp) + (sp & (sp_mask)), (val));\ +} + +#define PUSHL(ssp, sp, sp_mask, val)\ +{\ + sp -= 4;\ + stl_kernel(SEG_ADDL(ssp, sp, sp_mask), (uint32_t)(val));\ +} + +#define POPW(ssp, sp, sp_mask, val)\ +{\ + val = lduw_kernel((ssp) + (sp & (sp_mask)));\ + sp += 2;\ +} + +#define POPL(ssp, sp, sp_mask, val)\ +{\ + val = (uint32_t)ldl_kernel(SEG_ADDL(ssp, sp, sp_mask));\ + sp += 4;\ +} + +/* protected mode interrupt */ +static void do_interrupt_protected(int intno, int is_int, int error_code, + unsigned int next_eip, int is_hw) +{ + SegmentCache *dt; + target_ulong ptr, ssp; + int type, dpl, selector, ss_dpl, cpl; + int has_error_code, new_stack, shift; + uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0; + uint32_t old_eip, sp_mask; + +#ifdef VBOX + if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS) + cpu_loop_exit(); +#endif + + has_error_code = 0; + if (!is_int && !is_hw) + has_error_code = exeption_has_error_code(intno); + if (is_int) + old_eip = next_eip; + else + old_eip = env->eip; + + dt = &env->idt; +#ifndef VBOX + if (intno * 8 + 7 > dt->limit) +#else + if ((unsigned)intno * 8 + 7 > dt->limit) +#endif + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + ptr = dt->base + intno * 8; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + /* check gate type */ + type = (e2 >> DESC_TYPE_SHIFT) & 0x1f; + switch(type) { + case 5: /* task gate */ +#ifdef VBOX + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); +#endif + /* must do that check here to return the correct error code */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2); + switch_tss(intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); + if (has_error_code) { + int type; + uint32_t mask; + /* push the error code */ + type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + shift = type >> 3; + if (env->segs[R_SS].flags & DESC_B_MASK) + mask = 0xffffffff; + else + mask = 0xffff; + esp = (ESP - (2 << shift)) & mask; + ssp = env->segs[R_SS].base + esp; + if (shift) + stl_kernel(ssp, error_code); + else + stw_kernel(ssp, error_code); + SET_ESP(esp, mask); + } + return; + case 6: /* 286 interrupt gate */ + case 7: /* 286 trap gate */ + case 14: /* 386 interrupt gate */ + case 15: /* 386 trap gate */ + break; + default: + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + break; + } + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + /* check valid bit */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, intno * 8 + 2); + selector = e1 >> 16; + offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff); + if ((selector & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); +#ifdef VBOX /** @todo figure out when this is done one day... */ + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(selector, e2); +#endif + if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + if (!(e2 & DESC_C_MASK) && dpl < cpl) { + /* to inner privilege */ + get_ss_esp_from_tss(&ss, &esp, dpl); + if ((ss & 0xfffc) == 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if ((ss & 3) != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (load_segment(&ss_e1, &ss_e2, ss) != 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); +#ifdef VBOX /** @todo figure out when this is done one day... */ + if (!(ss_e2 & DESC_A_MASK)) + ss_e2 = set_segment_accessed(ss, ss_e2); +#endif + ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3; + if (ss_dpl != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_S_MASK) || + (ss_e2 & DESC_CS_MASK) || + !(ss_e2 & DESC_W_MASK)) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_P_MASK)) +#ifdef VBOX /* See page 3-477 of 253666.pdf */ + raise_exception_err(EXCP0C_STACK, ss & 0xfffc); +#else + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); +#endif + new_stack = 1; + sp_mask = get_sp_mask(ss_e2); + ssp = get_seg_base(ss_e1, ss_e2); +#if defined(VBOX) && defined(DEBUG) + printf("new stack %04X:%08X gate dpl=%d\n", ss, esp, dpl); +#endif + } else if ((e2 & DESC_C_MASK) || dpl == cpl) { + /* to same privilege */ + if (env->eflags & VM_MASK) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; + sp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + esp = ESP; + dpl = cpl; + } else { + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; /* avoid warning */ + sp_mask = 0; /* avoid warning */ + ssp = 0; /* avoid warning */ + esp = 0; /* avoid warning */ + } + + shift = type >> 3; + +#if 0 + /* XXX: check that enough room is available */ + push_size = 6 + (new_stack << 2) + (has_error_code << 1); + if (env->eflags & VM_MASK) + push_size += 8; + push_size <<= shift; +#endif + if (shift == 1) { + if (new_stack) { + if (env->eflags & VM_MASK) { + PUSHL(ssp, esp, sp_mask, env->segs[R_GS].selector); + PUSHL(ssp, esp, sp_mask, env->segs[R_FS].selector); + PUSHL(ssp, esp, sp_mask, env->segs[R_DS].selector); + PUSHL(ssp, esp, sp_mask, env->segs[R_ES].selector); + } + PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector); + PUSHL(ssp, esp, sp_mask, ESP); + } + PUSHL(ssp, esp, sp_mask, compute_eflags()); + PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector); + PUSHL(ssp, esp, sp_mask, old_eip); + if (has_error_code) { + PUSHL(ssp, esp, sp_mask, error_code); + } + } else { + if (new_stack) { + if (env->eflags & VM_MASK) { + PUSHW(ssp, esp, sp_mask, env->segs[R_GS].selector); + PUSHW(ssp, esp, sp_mask, env->segs[R_FS].selector); + PUSHW(ssp, esp, sp_mask, env->segs[R_DS].selector); + PUSHW(ssp, esp, sp_mask, env->segs[R_ES].selector); + } + PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector); + PUSHW(ssp, esp, sp_mask, ESP); + } + PUSHW(ssp, esp, sp_mask, compute_eflags()); + PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector); + PUSHW(ssp, esp, sp_mask, old_eip); + if (has_error_code) { + PUSHW(ssp, esp, sp_mask, error_code); + } + } + + if (new_stack) { + if (env->eflags & VM_MASK) { + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0, 0); + } + ss = (ss & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_SS, ss, + ssp, get_seg_limit(ss_e1, ss_e2), ss_e2); + } + SET_ESP(esp, sp_mask); + + selector = (selector & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, dpl); + env->eip = offset; + + /* interrupt gate clear IF mask */ + if ((type & 1) == 0) { + env->eflags &= ~IF_MASK; + } +#ifndef VBOX + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); +#else + /* + * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD + * gets confused by seemingly changed EFLAGS. See #3491 and + * public bug #2341. + */ + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK); +#endif +} + +#ifdef VBOX + +/* check if VME interrupt redirection is enabled in TSS */ +DECLINLINE(bool) is_vme_irq_redirected(int intno) +{ + unsigned int io_offset, intredir_offset; + unsigned char val, mask; + + /* TSS must be a valid 32 bit one */ + if (!(env->tr.flags & DESC_P_MASK) || + ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 11 || + env->tr.limit < 103) + goto fail; + io_offset = lduw_kernel(env->tr.base + 0x66); + /* Make sure the io bitmap offset is valid; anything less than sizeof(VBOXTSS) means there's none. */ + if (io_offset < 0x68 + 0x20) + io_offset = 0x68 + 0x20; + /* the virtual interrupt redirection bitmap is located below the io bitmap */ + intredir_offset = io_offset - 0x20; + + intredir_offset += (intno >> 3); + if ((intredir_offset) > env->tr.limit) + goto fail; + + val = ldub_kernel(env->tr.base + intredir_offset); + mask = 1 << (unsigned char)(intno & 7); + + /* bit set means no redirection. */ + if ((val & mask) != 0) { + return false; + } + return true; + +fail: + raise_exception_err(EXCP0D_GPF, 0); + return true; +} + +/* V86 mode software interrupt with CR4.VME=1 */ +static void do_soft_interrupt_vme(int intno, int error_code, unsigned int next_eip) +{ + target_ulong ptr, ssp; + int selector; + uint32_t offset, esp; + uint32_t old_cs, old_eflags; + uint32_t iopl; + + iopl = ((env->eflags >> IOPL_SHIFT) & 3); + + if (!is_vme_irq_redirected(intno)) + { + if (iopl == 3) + { + do_interrupt_protected(intno, 1, error_code, next_eip, 0); + return; + } + else + raise_exception_err(EXCP0D_GPF, 0); + } + + /* virtual mode idt is at linear address 0 */ + ptr = 0 + intno * 4; + offset = lduw_kernel(ptr); + selector = lduw_kernel(ptr + 2); + esp = ESP; + ssp = env->segs[R_SS].base; + old_cs = env->segs[R_CS].selector; + + old_eflags = compute_eflags(); + if (iopl < 3) + { + /* copy VIF into IF and set IOPL to 3 */ + if (env->eflags & VIF_MASK) + old_eflags |= IF_MASK; + else + old_eflags &= ~IF_MASK; + + old_eflags |= (3 << IOPL_SHIFT); + } + + /* XXX: use SS segment size ? */ + PUSHW(ssp, esp, 0xffff, old_eflags); + PUSHW(ssp, esp, 0xffff, old_cs); + PUSHW(ssp, esp, 0xffff, next_eip); + + /* update processor state */ + ESP = (ESP & ~0xffff) | (esp & 0xffff); + env->eip = offset; + env->segs[R_CS].selector = selector; + env->segs[R_CS].base = (selector << 4); + env->eflags &= ~(TF_MASK | RF_MASK); + + if (iopl < 3) + env->eflags &= ~VIF_MASK; + else + env->eflags &= ~IF_MASK; +} + +#endif /* VBOX */ + +#ifdef TARGET_X86_64 + +#define PUSHQ(sp, val)\ +{\ + sp -= 8;\ + stq_kernel(sp, (val));\ +} + +#define POPQ(sp, val)\ +{\ + val = ldq_kernel(sp);\ + sp += 8;\ +} + +static inline target_ulong get_rsp_from_tss(int level) +{ + int index; + +#if 0 + printf("TR: base=" TARGET_FMT_lx " limit=%x\n", + env->tr.base, env->tr.limit); +#endif + + if (!(env->tr.flags & DESC_P_MASK)) + cpu_abort(env, "invalid tss"); + index = 8 * level + 4; + if ((index + 7) > env->tr.limit) + raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc); + return ldq_kernel(env->tr.base + index); +} + +/* 64 bit interrupt */ +static void do_interrupt64(int intno, int is_int, int error_code, + target_ulong next_eip, int is_hw) +{ + SegmentCache *dt; + target_ulong ptr; + int type, dpl, selector, cpl, ist; + int has_error_code, new_stack; + uint32_t e1, e2, e3, ss; + target_ulong old_eip, esp, offset; + +#ifdef VBOX + if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS) + cpu_loop_exit(); +#endif + + has_error_code = 0; + if (!is_int && !is_hw) + has_error_code = exeption_has_error_code(intno); + if (is_int) + old_eip = next_eip; + else + old_eip = env->eip; + + dt = &env->idt; + if (intno * 16 + 15 > dt->limit) + raise_exception_err(EXCP0D_GPF, intno * 16 + 2); + ptr = dt->base + intno * 16; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + e3 = ldl_kernel(ptr + 8); + /* check gate type */ + type = (e2 >> DESC_TYPE_SHIFT) & 0x1f; + switch(type) { + case 14: /* 386 interrupt gate */ + case 15: /* 386 trap gate */ + break; + default: + raise_exception_err(EXCP0D_GPF, intno * 16 + 2); + break; + } + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, intno * 16 + 2); + /* check valid bit */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, intno * 16 + 2); + selector = e1 >> 16; + offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff); + ist = e2 & 7; + if ((selector & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + if (!(e2 & DESC_L_MASK) || (e2 & DESC_B_MASK)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) { + /* to inner privilege */ + if (ist != 0) + esp = get_rsp_from_tss(ist + 3); + else + esp = get_rsp_from_tss(dpl); + esp &= ~0xfLL; /* align stack */ + ss = 0; + new_stack = 1; + } else if ((e2 & DESC_C_MASK) || dpl == cpl) { + /* to same privilege */ + if (env->eflags & VM_MASK) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; + if (ist != 0) + esp = get_rsp_from_tss(ist + 3); + else + esp = ESP; + esp &= ~0xfLL; /* align stack */ + dpl = cpl; + } else { + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + new_stack = 0; /* avoid warning */ + esp = 0; /* avoid warning */ + } + + PUSHQ(esp, env->segs[R_SS].selector); + PUSHQ(esp, ESP); + PUSHQ(esp, compute_eflags()); + PUSHQ(esp, env->segs[R_CS].selector); + PUSHQ(esp, old_eip); + if (has_error_code) { + PUSHQ(esp, error_code); + } + + if (new_stack) { + ss = 0 | dpl; +#ifndef VBOX + cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0); +#else + cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, dpl << DESC_DPL_SHIFT); +#endif + } + ESP = esp; + + selector = (selector & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, dpl); + env->eip = offset; + + /* interrupt gate clear IF mask */ + if ((type & 1) == 0) { + env->eflags &= ~IF_MASK; + } +#ifndef VBOX + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); +#else /* VBOX */ + /* + * We must clear VIP/VIF too on interrupt entry, as otherwise FreeBSD + * gets confused by seemingly changed EFLAGS. See #3491 and + * public bug #2341. + */ + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK | VIF_MASK | VIP_MASK); +#endif /* VBOX */ +} +#endif + +#ifdef TARGET_X86_64 +#if defined(CONFIG_USER_ONLY) +void helper_syscall(int next_eip_addend) +{ + env->exception_index = EXCP_SYSCALL; + env->exception_next_eip = env->eip + next_eip_addend; + cpu_loop_exit(); +} +#else +void helper_syscall(int next_eip_addend) +{ + int selector; + + if (!(env->efer & MSR_EFER_SCE)) { + raise_exception_err(EXCP06_ILLOP, 0); + } + selector = (env->star >> 32) & 0xffff; + if (env->hflags & HF_LMA_MASK) { + int code64; + + ECX = env->eip + next_eip_addend; + env->regs[11] = compute_eflags(); + + code64 = env->hflags & HF_CS64_MASK; + + cpu_x86_set_cpl(env, 0); + cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK); + cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + env->eflags &= ~env->fmask; + load_eflags(env->eflags, 0); + if (code64) + env->eip = env->lstar; + else + env->eip = env->cstar; + } else { + ECX = (uint32_t)(env->eip + next_eip_addend); + + cpu_x86_set_cpl(env, 0); + cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK); + env->eip = (uint32_t)env->star; + } +} +#endif +#endif + +#ifdef TARGET_X86_64 +void helper_sysret(int dflag) +{ + int cpl, selector; + + if (!(env->efer & MSR_EFER_SCE)) { + raise_exception_err(EXCP06_ILLOP, 0); + } + cpl = env->hflags & HF_CPL_MASK; + if (!(env->cr[0] & CR0_PE_MASK) || cpl != 0) { + raise_exception_err(EXCP0D_GPF, 0); + } + selector = (env->star >> 48) & 0xffff; + if (env->hflags & HF_LMA_MASK) { + if (dflag == 2) { + cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | + DESC_L_MASK); + env->eip = ECX; + } else { + cpu_x86_load_seg_cache(env, R_CS, selector | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + env->eip = (uint32_t)ECX; + } + cpu_x86_load_seg_cache(env, R_SS, selector + 8, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + load_eflags((uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK | + IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK); + cpu_x86_set_cpl(env, 3); + } else { + cpu_x86_load_seg_cache(env, R_CS, selector | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + env->eip = (uint32_t)ECX; + cpu_x86_load_seg_cache(env, R_SS, selector + 8, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + env->eflags |= IF_MASK; + cpu_x86_set_cpl(env, 3); + } +} +#endif + +#ifdef VBOX + +/** + * Checks and processes external VMM events. + * Called by op_check_external_event() when any of the flags is set and can be serviced. + */ +void helper_external_event(void) +{ +# if defined(RT_OS_DARWIN) && defined(VBOX_STRICT) + uintptr_t uSP; +# ifdef RT_ARCH_AMD64 + __asm__ __volatile__("movq %%rsp, %0" : "=r" (uSP)); +# else + __asm__ __volatile__("movl %%esp, %0" : "=r" (uSP)); +# endif + AssertMsg(!(uSP & 15), ("xSP=%#p\n", uSP)); +# endif + /* Keep in sync with flags checked by gen_check_external_event() */ + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_HARD); + cpu_interrupt(env, CPU_INTERRUPT_HARD); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_EXIT) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_EXIT); + cpu_exit(env); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_DMA) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_DMA); + remR3DmaRun(env); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_TIMER) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_TIMER); + remR3TimersRun(env); + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB) + { + ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, + ~CPU_INTERRUPT_EXTERNAL_HARD); + cpu_interrupt(env, CPU_INTERRUPT_HARD); + } +} + +/* helper for recording call instruction addresses for later scanning */ +void helper_record_call() +{ + if ( !(env->state & CPU_RAW_RING0) + && (env->cr[0] & CR0_PG_MASK) + && !(env->eflags & X86_EFL_IF)) + remR3RecordCall(env); +} + +#endif /* VBOX */ + +/* real mode interrupt */ +static void do_interrupt_real(int intno, int is_int, int error_code, + unsigned int next_eip) +{ + SegmentCache *dt; + target_ulong ptr, ssp; + int selector; + uint32_t offset, esp; + uint32_t old_cs, old_eip; + + /* real mode (simpler !) */ + dt = &env->idt; +#ifndef VBOX + if (intno * 4 + 3 > dt->limit) +#else + if ((unsigned)intno * 4 + 3 > dt->limit) +#endif + raise_exception_err(EXCP0D_GPF, intno * 8 + 2); + ptr = dt->base + intno * 4; + offset = lduw_kernel(ptr); + selector = lduw_kernel(ptr + 2); + esp = ESP; + ssp = env->segs[R_SS].base; + if (is_int) + old_eip = next_eip; + else + old_eip = env->eip; + old_cs = env->segs[R_CS].selector; + /* XXX: use SS segment size ? */ + PUSHW(ssp, esp, 0xffff, compute_eflags()); + PUSHW(ssp, esp, 0xffff, old_cs); + PUSHW(ssp, esp, 0xffff, old_eip); + + /* update processor state */ + ESP = (ESP & ~0xffff) | (esp & 0xffff); + env->eip = offset; + env->segs[R_CS].selector = selector; + env->segs[R_CS].base = (selector << 4); + env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK); +} + +/* fake user mode interrupt */ +void do_interrupt_user(int intno, int is_int, int error_code, + target_ulong next_eip) +{ + SegmentCache *dt; + target_ulong ptr; + int dpl, cpl, shift; + uint32_t e2; + + dt = &env->idt; + if (env->hflags & HF_LMA_MASK) { + shift = 4; + } else { + shift = 3; + } + ptr = dt->base + (intno << shift); + e2 = ldl_kernel(ptr + 4); + + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) + raise_exception_err(EXCP0D_GPF, (intno << shift) + 2); + + /* Since we emulate only user space, we cannot do more than + exiting the emulation with the suitable exception and error + code */ + if (is_int) + EIP = next_eip; +} + +#if !defined(CONFIG_USER_ONLY) +static void handle_even_inj(int intno, int is_int, int error_code, + int is_hw, int rm) +{ + uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); + if (!(event_inj & SVM_EVTINJ_VALID)) { + int type; + if (is_int) + type = SVM_EVTINJ_TYPE_SOFT; + else + type = SVM_EVTINJ_TYPE_EXEPT; + event_inj = intno | type | SVM_EVTINJ_VALID; + if (!rm && exeption_has_error_code(intno)) { + event_inj |= SVM_EVTINJ_VALID_ERR; + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code); + } + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj); + } +} +#endif + +/* + * Begin execution of an interruption. is_int is TRUE if coming from + * the int instruction. next_eip is the EIP value AFTER the interrupt + * instruction. It is only relevant if is_int is TRUE. + */ +void do_interrupt(int intno, int is_int, int error_code, + target_ulong next_eip, int is_hw) +{ + if (qemu_loglevel_mask(CPU_LOG_INT)) { + if ((env->cr[0] & CR0_PE_MASK)) { + static int count; + qemu_log("%6d: v=%02x e=%04x i=%d cpl=%d IP=%04x:" TARGET_FMT_lx " pc=" TARGET_FMT_lx " SP=%04x:" TARGET_FMT_lx, + count, intno, error_code, is_int, + env->hflags & HF_CPL_MASK, + env->segs[R_CS].selector, EIP, + (int)env->segs[R_CS].base + EIP, + env->segs[R_SS].selector, ESP); + if (intno == 0x0e) { + qemu_log(" CR2=" TARGET_FMT_lx, env->cr[2]); + } else { + qemu_log(" EAX=" TARGET_FMT_lx, EAX); + } + qemu_log("\n"); + log_cpu_state(env, X86_DUMP_CCOP); +#if 0 + { + int i; + uint8_t *ptr; + qemu_log(" code="); + ptr = env->segs[R_CS].base + env->eip; + for(i = 0; i < 16; i++) { + qemu_log(" %02x", ldub(ptr + i)); + } + qemu_log("\n"); + } +#endif + count++; + } + } +#ifdef VBOX + if (RT_UNLIKELY(env->state & CPU_EMULATE_SINGLE_STEP)) { + if (is_int) { + RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv%s\n", + intno, error_code, (RTGCPTR)env->eip, is_hw ? " hw" : ""); + } else { + RTLogPrintf("do_interrupt: %#04x err=%#x pc=%#RGv next=%#RGv%s\n", + intno, error_code, (RTGCPTR)env->eip, (RTGCPTR)next_eip, is_hw ? " hw" : ""); + } + } +#endif + if (env->cr[0] & CR0_PE_MASK) { +#if !defined(CONFIG_USER_ONLY) + if (env->hflags & HF_SVMI_MASK) + handle_even_inj(intno, is_int, error_code, is_hw, 0); +#endif +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + do_interrupt64(intno, is_int, error_code, next_eip, is_hw); + } else +#endif + { +#ifdef VBOX + /* int xx *, v86 code and VME enabled? */ + if ( (env->eflags & VM_MASK) + && (env->cr[4] & CR4_VME_MASK) + && is_int + && !is_hw + && env->eip + 1 != next_eip /* single byte int 3 goes straight to the protected mode handler */ + ) + do_soft_interrupt_vme(intno, error_code, next_eip); + else +#endif /* VBOX */ + do_interrupt_protected(intno, is_int, error_code, next_eip, is_hw); + } + } else { +#if !defined(CONFIG_USER_ONLY) + if (env->hflags & HF_SVMI_MASK) + handle_even_inj(intno, is_int, error_code, is_hw, 1); +#endif + do_interrupt_real(intno, is_int, error_code, next_eip); + } + +#if !defined(CONFIG_USER_ONLY) + if (env->hflags & HF_SVMI_MASK) { + uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID); + } +#endif +} + +/* This should come from sysemu.h - if we could include it here... */ +void qemu_system_reset_request(void); + +/* + * Check nested exceptions and change to double or triple fault if + * needed. It should only be called, if this is not an interrupt. + * Returns the new exception number. + */ +static int check_exception(int intno, int *error_code) +{ + int first_contributory = env->old_exception == 0 || + (env->old_exception >= 10 && + env->old_exception <= 13); + int second_contributory = intno == 0 || + (intno >= 10 && intno <= 13); + + qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n", + env->old_exception, intno); + +#if !defined(CONFIG_USER_ONLY) + if (env->old_exception == EXCP08_DBLE) { + if (env->hflags & HF_SVMI_MASK) + helper_vmexit(SVM_EXIT_SHUTDOWN, 0); /* does not return */ + + qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); + +# ifndef VBOX + qemu_system_reset_request(); + return EXCP_HLT; +# else + remR3RaiseRC(env->pVM, VINF_EM_TRIPLE_FAULT); + return EXCP_RC; +# endif + } +#endif + + if ((first_contributory && second_contributory) + || (env->old_exception == EXCP0E_PAGE && + (second_contributory || (intno == EXCP0E_PAGE)))) { + intno = EXCP08_DBLE; + *error_code = 0; + } + + if (second_contributory || (intno == EXCP0E_PAGE) || + (intno == EXCP08_DBLE)) + env->old_exception = intno; + + return intno; +} + +/* + * Signal an interruption. It is executed in the main CPU loop. + * is_int is TRUE if coming from the int instruction. next_eip is the + * EIP value AFTER the interrupt instruction. It is only relevant if + * is_int is TRUE. + */ +static void QEMU_NORETURN raise_interrupt(int intno, int is_int, int error_code, + int next_eip_addend) +{ +#if defined(VBOX) && defined(DEBUG) + Log2(("raise_interrupt: %x %x %x %RGv\n", intno, is_int, error_code, (RTGCPTR)env->eip + next_eip_addend)); +#endif + if (!is_int) { + helper_svm_check_intercept_param(SVM_EXIT_EXCP_BASE + intno, error_code); + intno = check_exception(intno, &error_code); + } else { + helper_svm_check_intercept_param(SVM_EXIT_SWINT, 0); + } + + env->exception_index = intno; + env->error_code = error_code; + env->exception_is_int = is_int; + env->exception_next_eip = env->eip + next_eip_addend; + cpu_loop_exit(); +} + +/* shortcuts to generate exceptions */ + +void raise_exception_err(int exception_index, int error_code) +{ + raise_interrupt(exception_index, 0, error_code, 0); +} + +void raise_exception(int exception_index) +{ + raise_interrupt(exception_index, 0, 0, 0); +} + +void raise_exception_env(int exception_index, CPUState *nenv) +{ + env = nenv; + raise_exception(exception_index); +} +/* SMM support */ + +#if defined(CONFIG_USER_ONLY) + +void do_smm_enter(void) +{ +} + +void helper_rsm(void) +{ +} + +#else + +#ifdef TARGET_X86_64 +#define SMM_REVISION_ID 0x00020064 +#else +#define SMM_REVISION_ID 0x00020000 +#endif + +void do_smm_enter(void) +{ + target_ulong sm_state; + SegmentCache *dt; + int i, offset; + + qemu_log_mask(CPU_LOG_INT, "SMM: enter\n"); + log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP); + + env->hflags |= HF_SMM_MASK; + cpu_smm_update(env); + + sm_state = env->smbase + 0x8000; + +#ifdef TARGET_X86_64 + for(i = 0; i < 6; i++) { + dt = &env->segs[i]; + offset = 0x7e00 + i * 16; + stw_phys(sm_state + offset, dt->selector); + stw_phys(sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff); + stl_phys(sm_state + offset + 4, dt->limit); + stq_phys(sm_state + offset + 8, dt->base); + } + + stq_phys(sm_state + 0x7e68, env->gdt.base); + stl_phys(sm_state + 0x7e64, env->gdt.limit); + + stw_phys(sm_state + 0x7e70, env->ldt.selector); + stq_phys(sm_state + 0x7e78, env->ldt.base); + stl_phys(sm_state + 0x7e74, env->ldt.limit); + stw_phys(sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff); + + stq_phys(sm_state + 0x7e88, env->idt.base); + stl_phys(sm_state + 0x7e84, env->idt.limit); + + stw_phys(sm_state + 0x7e90, env->tr.selector); + stq_phys(sm_state + 0x7e98, env->tr.base); + stl_phys(sm_state + 0x7e94, env->tr.limit); + stw_phys(sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff); + + stq_phys(sm_state + 0x7ed0, env->efer); + + stq_phys(sm_state + 0x7ff8, EAX); + stq_phys(sm_state + 0x7ff0, ECX); + stq_phys(sm_state + 0x7fe8, EDX); + stq_phys(sm_state + 0x7fe0, EBX); + stq_phys(sm_state + 0x7fd8, ESP); + stq_phys(sm_state + 0x7fd0, EBP); + stq_phys(sm_state + 0x7fc8, ESI); + stq_phys(sm_state + 0x7fc0, EDI); + for(i = 8; i < 16; i++) + stq_phys(sm_state + 0x7ff8 - i * 8, env->regs[i]); + stq_phys(sm_state + 0x7f78, env->eip); + stl_phys(sm_state + 0x7f70, compute_eflags()); + stl_phys(sm_state + 0x7f68, env->dr[6]); + stl_phys(sm_state + 0x7f60, env->dr[7]); + + stl_phys(sm_state + 0x7f48, env->cr[4]); + stl_phys(sm_state + 0x7f50, env->cr[3]); + stl_phys(sm_state + 0x7f58, env->cr[0]); + + stl_phys(sm_state + 0x7efc, SMM_REVISION_ID); + stl_phys(sm_state + 0x7f00, env->smbase); +#else + stl_phys(sm_state + 0x7ffc, env->cr[0]); + stl_phys(sm_state + 0x7ff8, env->cr[3]); + stl_phys(sm_state + 0x7ff4, compute_eflags()); + stl_phys(sm_state + 0x7ff0, env->eip); + stl_phys(sm_state + 0x7fec, EDI); + stl_phys(sm_state + 0x7fe8, ESI); + stl_phys(sm_state + 0x7fe4, EBP); + stl_phys(sm_state + 0x7fe0, ESP); + stl_phys(sm_state + 0x7fdc, EBX); + stl_phys(sm_state + 0x7fd8, EDX); + stl_phys(sm_state + 0x7fd4, ECX); + stl_phys(sm_state + 0x7fd0, EAX); + stl_phys(sm_state + 0x7fcc, env->dr[6]); + stl_phys(sm_state + 0x7fc8, env->dr[7]); + + stl_phys(sm_state + 0x7fc4, env->tr.selector); + stl_phys(sm_state + 0x7f64, env->tr.base); + stl_phys(sm_state + 0x7f60, env->tr.limit); + stl_phys(sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff); + + stl_phys(sm_state + 0x7fc0, env->ldt.selector); + stl_phys(sm_state + 0x7f80, env->ldt.base); + stl_phys(sm_state + 0x7f7c, env->ldt.limit); + stl_phys(sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff); + + stl_phys(sm_state + 0x7f74, env->gdt.base); + stl_phys(sm_state + 0x7f70, env->gdt.limit); + + stl_phys(sm_state + 0x7f58, env->idt.base); + stl_phys(sm_state + 0x7f54, env->idt.limit); + + for(i = 0; i < 6; i++) { + dt = &env->segs[i]; + if (i < 3) + offset = 0x7f84 + i * 12; + else + offset = 0x7f2c + (i - 3) * 12; + stl_phys(sm_state + 0x7fa8 + i * 4, dt->selector); + stl_phys(sm_state + offset + 8, dt->base); + stl_phys(sm_state + offset + 4, dt->limit); + stl_phys(sm_state + offset, (dt->flags >> 8) & 0xf0ff); + } + stl_phys(sm_state + 0x7f14, env->cr[4]); + + stl_phys(sm_state + 0x7efc, SMM_REVISION_ID); + stl_phys(sm_state + 0x7ef8, env->smbase); +#endif + /* init SMM cpu state */ + +#ifdef TARGET_X86_64 + cpu_load_efer(env, 0); +#endif + load_eflags(0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + env->eip = 0x00008000; + cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, + 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0); + cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0); + + cpu_x86_update_cr0(env, + env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | CR0_PG_MASK)); + cpu_x86_update_cr4(env, 0); + env->dr[7] = 0x00000400; + CC_OP = CC_OP_EFLAGS; +} + +void helper_rsm(void) +{ +#ifdef VBOX + cpu_abort(env, "helper_rsm"); +#else /* !VBOX */ + target_ulong sm_state; + int i, offset; + uint32_t val; + + sm_state = env->smbase + 0x8000; +#ifdef TARGET_X86_64 + cpu_load_efer(env, ldq_phys(sm_state + 0x7ed0)); + + for(i = 0; i < 6; i++) { + offset = 0x7e00 + i * 16; + cpu_x86_load_seg_cache(env, i, + lduw_phys(sm_state + offset), + ldq_phys(sm_state + offset + 8), + ldl_phys(sm_state + offset + 4), + (lduw_phys(sm_state + offset + 2) & 0xf0ff) << 8); + } + + env->gdt.base = ldq_phys(sm_state + 0x7e68); + env->gdt.limit = ldl_phys(sm_state + 0x7e64); + + env->ldt.selector = lduw_phys(sm_state + 0x7e70); + env->ldt.base = ldq_phys(sm_state + 0x7e78); + env->ldt.limit = ldl_phys(sm_state + 0x7e74); + env->ldt.flags = (lduw_phys(sm_state + 0x7e72) & 0xf0ff) << 8; +#ifdef VBOX + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + + env->idt.base = ldq_phys(sm_state + 0x7e88); + env->idt.limit = ldl_phys(sm_state + 0x7e84); + + env->tr.selector = lduw_phys(sm_state + 0x7e90); + env->tr.base = ldq_phys(sm_state + 0x7e98); + env->tr.limit = ldl_phys(sm_state + 0x7e94); + env->tr.flags = (lduw_phys(sm_state + 0x7e92) & 0xf0ff) << 8; +#ifdef VBOX + env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->tr.newselector = 0; +#endif + + EAX = ldq_phys(sm_state + 0x7ff8); + ECX = ldq_phys(sm_state + 0x7ff0); + EDX = ldq_phys(sm_state + 0x7fe8); + EBX = ldq_phys(sm_state + 0x7fe0); + ESP = ldq_phys(sm_state + 0x7fd8); + EBP = ldq_phys(sm_state + 0x7fd0); + ESI = ldq_phys(sm_state + 0x7fc8); + EDI = ldq_phys(sm_state + 0x7fc0); + for(i = 8; i < 16; i++) + env->regs[i] = ldq_phys(sm_state + 0x7ff8 - i * 8); + env->eip = ldq_phys(sm_state + 0x7f78); + load_eflags(ldl_phys(sm_state + 0x7f70), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + env->dr[6] = ldl_phys(sm_state + 0x7f68); + env->dr[7] = ldl_phys(sm_state + 0x7f60); + + cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f48)); + cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7f50)); + cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7f58)); + + val = ldl_phys(sm_state + 0x7efc); /* revision ID */ + if (val & 0x20000) { + env->smbase = ldl_phys(sm_state + 0x7f00) & ~0x7fff; + } +#else + cpu_x86_update_cr0(env, ldl_phys(sm_state + 0x7ffc)); + cpu_x86_update_cr3(env, ldl_phys(sm_state + 0x7ff8)); + load_eflags(ldl_phys(sm_state + 0x7ff4), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + env->eip = ldl_phys(sm_state + 0x7ff0); + EDI = ldl_phys(sm_state + 0x7fec); + ESI = ldl_phys(sm_state + 0x7fe8); + EBP = ldl_phys(sm_state + 0x7fe4); + ESP = ldl_phys(sm_state + 0x7fe0); + EBX = ldl_phys(sm_state + 0x7fdc); + EDX = ldl_phys(sm_state + 0x7fd8); + ECX = ldl_phys(sm_state + 0x7fd4); + EAX = ldl_phys(sm_state + 0x7fd0); + env->dr[6] = ldl_phys(sm_state + 0x7fcc); + env->dr[7] = ldl_phys(sm_state + 0x7fc8); + + env->tr.selector = ldl_phys(sm_state + 0x7fc4) & 0xffff; + env->tr.base = ldl_phys(sm_state + 0x7f64); + env->tr.limit = ldl_phys(sm_state + 0x7f60); + env->tr.flags = (ldl_phys(sm_state + 0x7f5c) & 0xf0ff) << 8; +#ifdef VBOX + env->tr.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->tr.newselector = 0; +#endif + + env->ldt.selector = ldl_phys(sm_state + 0x7fc0) & 0xffff; + env->ldt.base = ldl_phys(sm_state + 0x7f80); + env->ldt.limit = ldl_phys(sm_state + 0x7f7c); + env->ldt.flags = (ldl_phys(sm_state + 0x7f78) & 0xf0ff) << 8; +#ifdef VBOX + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + + env->gdt.base = ldl_phys(sm_state + 0x7f74); + env->gdt.limit = ldl_phys(sm_state + 0x7f70); + + env->idt.base = ldl_phys(sm_state + 0x7f58); + env->idt.limit = ldl_phys(sm_state + 0x7f54); + + for(i = 0; i < 6; i++) { + if (i < 3) + offset = 0x7f84 + i * 12; + else + offset = 0x7f2c + (i - 3) * 12; + cpu_x86_load_seg_cache(env, i, + ldl_phys(sm_state + 0x7fa8 + i * 4) & 0xffff, + ldl_phys(sm_state + offset + 8), + ldl_phys(sm_state + offset + 4), + (ldl_phys(sm_state + offset) & 0xf0ff) << 8); + } + cpu_x86_update_cr4(env, ldl_phys(sm_state + 0x7f14)); + + val = ldl_phys(sm_state + 0x7efc); /* revision ID */ + if (val & 0x20000) { + env->smbase = ldl_phys(sm_state + 0x7ef8) & ~0x7fff; + } +#endif + CC_OP = CC_OP_EFLAGS; + env->hflags &= ~HF_SMM_MASK; + cpu_smm_update(env); + + qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n"); + log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP); +#endif /* !VBOX */ +} + +#endif /* !CONFIG_USER_ONLY */ + + +/* division, flags are undefined */ + +void helper_divb_AL(target_ulong t0) +{ + unsigned int num, den, q, r; + + num = (EAX & 0xffff); + den = (t0 & 0xff); + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q > 0xff) + raise_exception(EXCP00_DIVZ); + q &= 0xff; + r = (num % den) & 0xff; + EAX = (EAX & ~0xffff) | (r << 8) | q; +} + +void helper_idivb_AL(target_ulong t0) +{ + int num, den, q, r; + + num = (int16_t)EAX; + den = (int8_t)t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q != (int8_t)q) + raise_exception(EXCP00_DIVZ); + q &= 0xff; + r = (num % den) & 0xff; + EAX = (EAX & ~0xffff) | (r << 8) | q; +} + +void helper_divw_AX(target_ulong t0) +{ + unsigned int num, den, q, r; + + num = (EAX & 0xffff) | ((EDX & 0xffff) << 16); + den = (t0 & 0xffff); + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q > 0xffff) + raise_exception(EXCP00_DIVZ); + q &= 0xffff; + r = (num % den) & 0xffff; + EAX = (EAX & ~0xffff) | q; + EDX = (EDX & ~0xffff) | r; +} + +void helper_idivw_AX(target_ulong t0) +{ + int num, den, q, r; + + num = (EAX & 0xffff) | ((EDX & 0xffff) << 16); + den = (int16_t)t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + if (q != (int16_t)q) + raise_exception(EXCP00_DIVZ); + q &= 0xffff; + r = (num % den) & 0xffff; + EAX = (EAX & ~0xffff) | q; + EDX = (EDX & ~0xffff) | r; +} + +void helper_divl_EAX(target_ulong t0) +{ + unsigned int den, r; + uint64_t num, q; + + num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32); + den = t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + r = (num % den); + if (q > 0xffffffff) + raise_exception(EXCP00_DIVZ); + EAX = (uint32_t)q; + EDX = (uint32_t)r; +} + +void helper_idivl_EAX(target_ulong t0) +{ + int den, r; + int64_t num, q; + + num = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32); + den = t0; + if (den == 0) { + raise_exception(EXCP00_DIVZ); + } + q = (num / den); + r = (num % den); + if (q != (int32_t)q) + raise_exception(EXCP00_DIVZ); + EAX = (uint32_t)q; + EDX = (uint32_t)r; +} + +/* bcd */ + +/* XXX: exception */ +void helper_aam(int base) +{ + int al, ah; + al = EAX & 0xff; + ah = al / base; + al = al % base; + EAX = (EAX & ~0xffff) | al | (ah << 8); + CC_DST = al; +} + +void helper_aad(int base) +{ + int al, ah; + al = EAX & 0xff; + ah = (EAX >> 8) & 0xff; + al = ((ah * base) + al) & 0xff; + EAX = (EAX & ~0xffff) | al; + CC_DST = al; +} + +void helper_aaa(void) +{ + int icarry; + int al, ah, af; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + af = eflags & CC_A; + al = EAX & 0xff; + ah = (EAX >> 8) & 0xff; + + icarry = (al > 0xf9); + if (((al & 0x0f) > 9 ) || af) { + al = (al + 6) & 0x0f; + ah = (ah + 1 + icarry) & 0xff; + eflags |= CC_C | CC_A; + } else { + eflags &= ~(CC_C | CC_A); + al &= 0x0f; + } + EAX = (EAX & ~0xffff) | al | (ah << 8); + CC_SRC = eflags; +} + +void helper_aas(void) +{ + int icarry; + int al, ah, af; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + af = eflags & CC_A; + al = EAX & 0xff; + ah = (EAX >> 8) & 0xff; + + icarry = (al < 6); + if (((al & 0x0f) > 9 ) || af) { + al = (al - 6) & 0x0f; + ah = (ah - 1 - icarry) & 0xff; + eflags |= CC_C | CC_A; + } else { + eflags &= ~(CC_C | CC_A); + al &= 0x0f; + } + EAX = (EAX & ~0xffff) | al | (ah << 8); + CC_SRC = eflags; +} + +void helper_daa(void) +{ + int al, af, cf; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + cf = eflags & CC_C; + af = eflags & CC_A; + al = EAX & 0xff; + + eflags = 0; + if (((al & 0x0f) > 9 ) || af) { + al = (al + 6) & 0xff; + eflags |= CC_A; + } + if ((al > 0x9f) || cf) { + al = (al + 0x60) & 0xff; + eflags |= CC_C; + } + EAX = (EAX & ~0xff) | al; + /* well, speed is not an issue here, so we compute the flags by hand */ + eflags |= (al == 0) << 6; /* zf */ + eflags |= parity_table[al]; /* pf */ + eflags |= (al & 0x80); /* sf */ + CC_SRC = eflags; +} + +void helper_das(void) +{ + int al, al1, af, cf; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + cf = eflags & CC_C; + af = eflags & CC_A; + al = EAX & 0xff; + + eflags = 0; + al1 = al; + if (((al & 0x0f) > 9 ) || af) { + eflags |= CC_A; + if (al < 6 || cf) + eflags |= CC_C; + al = (al - 6) & 0xff; + } + if ((al1 > 0x99) || cf) { + al = (al - 0x60) & 0xff; + eflags |= CC_C; + } + EAX = (EAX & ~0xff) | al; + /* well, speed is not an issue here, so we compute the flags by hand */ + eflags |= (al == 0) << 6; /* zf */ + eflags |= parity_table[al]; /* pf */ + eflags |= (al & 0x80); /* sf */ + CC_SRC = eflags; +} + +void helper_into(int next_eip_addend) +{ + int eflags; + eflags = helper_cc_compute_all(CC_OP); + if (eflags & CC_O) { + raise_interrupt(EXCP04_INTO, 1, 0, next_eip_addend); + } +} + +void helper_cmpxchg8b(target_ulong a0) +{ + uint64_t d; + int eflags; + + eflags = helper_cc_compute_all(CC_OP); + d = ldq(a0); + if (d == (((uint64_t)EDX << 32) | (uint32_t)EAX)) { + stq(a0, ((uint64_t)ECX << 32) | (uint32_t)EBX); + eflags |= CC_Z; + } else { + /* always do the store */ + stq(a0, d); + EDX = (uint32_t)(d >> 32); + EAX = (uint32_t)d; + eflags &= ~CC_Z; + } + CC_SRC = eflags; +} + +#ifdef TARGET_X86_64 +void helper_cmpxchg16b(target_ulong a0) +{ + uint64_t d0, d1; + int eflags; + + if ((a0 & 0xf) != 0) + raise_exception(EXCP0D_GPF); + eflags = helper_cc_compute_all(CC_OP); + d0 = ldq(a0); + d1 = ldq(a0 + 8); + if (d0 == EAX && d1 == EDX) { + stq(a0, EBX); + stq(a0 + 8, ECX); + eflags |= CC_Z; + } else { + /* always do the store */ + stq(a0, d0); + stq(a0 + 8, d1); + EDX = d1; + EAX = d0; + eflags &= ~CC_Z; + } + CC_SRC = eflags; +} +#endif + +void helper_single_step(void) +{ +#ifndef CONFIG_USER_ONLY + check_hw_breakpoints(env, 1); + env->dr[6] |= DR6_BS; +#endif + raise_exception(EXCP01_DB); +} + +void helper_cpuid(void) +{ + uint32_t eax, ebx, ecx, edx; + + helper_svm_check_intercept_param(SVM_EXIT_CPUID, 0); + + cpu_x86_cpuid(env, (uint32_t)EAX, (uint32_t)ECX, &eax, &ebx, &ecx, &edx); + EAX = eax; + EBX = ebx; + ECX = ecx; + EDX = edx; +} + +void helper_enter_level(int level, int data32, target_ulong t1) +{ + target_ulong ssp; + uint32_t esp_mask, esp, ebp; + + esp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + ebp = EBP; + esp = ESP; + if (data32) { + /* 32 bit */ + esp -= 4; + while (--level) { + esp -= 4; + ebp -= 4; + stl(ssp + (esp & esp_mask), ldl(ssp + (ebp & esp_mask))); + } + esp -= 4; + stl(ssp + (esp & esp_mask), t1); + } else { + /* 16 bit */ + esp -= 2; + while (--level) { + esp -= 2; + ebp -= 2; + stw(ssp + (esp & esp_mask), lduw(ssp + (ebp & esp_mask))); + } + esp -= 2; + stw(ssp + (esp & esp_mask), t1); + } +} + +#ifdef TARGET_X86_64 +void helper_enter64_level(int level, int data64, target_ulong t1) +{ + target_ulong esp, ebp; + ebp = EBP; + esp = ESP; + + if (data64) { + /* 64 bit */ + esp -= 8; + while (--level) { + esp -= 8; + ebp -= 8; + stq(esp, ldq(ebp)); + } + esp -= 8; + stq(esp, t1); + } else { + /* 16 bit */ + esp -= 2; + while (--level) { + esp -= 2; + ebp -= 2; + stw(esp, lduw(ebp)); + } + esp -= 2; + stw(esp, t1); + } +} +#endif + +void helper_lldt(int selector) +{ + SegmentCache *dt; + uint32_t e1, e2; +#ifndef VBOX + int index, entry_limit; +#else + unsigned int index, entry_limit; +#endif + target_ulong ptr; + +#ifdef VBOX + Log(("helper_lldt_T0: old ldtr=%RTsel {.base=%RGv, .limit=%RGv} new=%RTsel\n", + (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit, (RTSEL)(selector & 0xffff))); +#endif + + selector &= 0xffff; + if ((selector & 0xfffc) == 0) { + /* XXX: NULL selector case: invalid LDT */ + env->ldt.base = 0; + env->ldt.limit = 0; +#ifdef VBOX + env->ldt.flags = DESC_INTEL_UNUSABLE; + env->ldt.fVBoxFlags = CPUMSELREG_FLAGS_VALID; + env->ldt.newselector = 0; +#endif + } else { + if (selector & 0x4) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dt = &env->gdt; + index = selector & ~7; +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) + entry_limit = 15; + else +#endif + entry_limit = 7; + if ((index + entry_limit) > dt->limit) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint32_t e3; + e3 = ldl_kernel(ptr + 8); + load_seg_cache_raw_dt(&env->ldt, e1, e2); + env->ldt.base |= (target_ulong)e3 << 32; + } else +#endif + { + load_seg_cache_raw_dt(&env->ldt, e1, e2); + } + } + env->ldt.selector = selector; +#ifdef VBOX + Log(("helper_lldt_T0: new ldtr=%RTsel {.base=%RGv, .limit=%RGv}\n", + (RTSEL)env->ldt.selector, (RTGCPTR)env->ldt.base, (RTGCPTR)env->ldt.limit)); +#endif +} + +void helper_ltr(int selector) +{ + SegmentCache *dt; + uint32_t e1, e2; +#ifndef VBOX + int index, type, entry_limit; +#else + unsigned int index; + int type, entry_limit; +#endif + target_ulong ptr; + +#ifdef VBOX + Log(("helper_ltr: pc=%RGv old tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n", + (RTGCPTR)env->eip, (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit, + env->tr.flags, (RTSEL)(selector & 0xffff))); +#endif + selector &= 0xffff; + if ((selector & 0xfffc) == 0) { + /* NULL selector case: invalid TR */ +#ifdef VBOX + raise_exception_err(EXCP0A_TSS, 0); +#else + env->tr.base = 0; + env->tr.limit = 0; + env->tr.flags = 0; +#endif + } else { + if (selector & 0x4) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dt = &env->gdt; + index = selector & ~7; +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) + entry_limit = 15; + else +#endif + entry_limit = 7; + if ((index + entry_limit) > dt->limit) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + if ((e2 & DESC_S_MASK) || + (type != 1 && type != 9)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + uint32_t e3, e4; + e3 = ldl_kernel(ptr + 8); + e4 = ldl_kernel(ptr + 12); + if ((e4 >> DESC_TYPE_SHIFT) & 0xf) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + load_seg_cache_raw_dt(&env->tr, e1, e2); + env->tr.base |= (target_ulong)e3 << 32; + } else +#endif + { + load_seg_cache_raw_dt(&env->tr, e1, e2); + } + env->tr.flags |= DESC_TSS_BUSY_MASK; + e2 |= DESC_TSS_BUSY_MASK; + stl_kernel(ptr + 4, e2); + } + env->tr.selector = selector; +#ifdef VBOX + Log(("helper_ltr: new tr=%RTsel {.base=%RGv, .limit=%RGv, .flags=%RX32} new=%RTsel\n", + (RTSEL)env->tr.selector, (RTGCPTR)env->tr.base, (RTGCPTR)env->tr.limit, + env->tr.flags, (RTSEL)(selector & 0xffff))); +#endif +} + +/* only works if protected mode and not VM86. seg_reg must be != R_CS */ +void helper_load_seg(int seg_reg, int selector) +{ + uint32_t e1, e2; + int cpl, dpl, rpl; + SegmentCache *dt; +#ifndef VBOX + int index; +#else + unsigned int index; +#endif + target_ulong ptr; + + selector &= 0xffff; + cpl = env->hflags & HF_CPL_MASK; +#ifdef VBOX + + /* Trying to load a selector with CPL=1? */ + if (cpl == 0 && (selector & 3) == 1 && (env->state & CPU_RAW_RING0)) + { + Log(("RPL 1 -> sel %04X -> %04X (helper_load_seg)\n", selector, selector & 0xfffc)); + selector = selector & 0xfffc; + } +#endif /* VBOX */ + if ((selector & 0xfffc) == 0) { + /* null selector case */ +#ifndef VBOX + if (seg_reg == R_SS +#ifdef TARGET_X86_64 + && (!(env->hflags & HF_CS64_MASK) || cpl == 3) +#endif + ) + raise_exception_err(EXCP0D_GPF, 0); + cpu_x86_load_seg_cache(env, seg_reg, selector, 0, 0, 0); +#else + if (seg_reg == R_SS) { + if (!(env->hflags & HF_CS64_MASK) || cpl == 3) + raise_exception_err(EXCP0D_GPF, 0); + e2 = (cpl << DESC_DPL_SHIFT) | DESC_INTEL_UNUSABLE; + } else { + e2 = DESC_INTEL_UNUSABLE; + } + cpu_x86_load_seg_cache_with_clean_flags(env, seg_reg, selector, 0, 0, e2); +#endif + } else { + + if (selector & 0x4) + dt = &env->ldt; + else + dt = &env->gdt; + index = selector & ~7; + if ((index + 7) > dt->limit) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + ptr = dt->base + index; + e1 = ldl_kernel(ptr); + e2 = ldl_kernel(ptr + 4); + + if (!(e2 & DESC_S_MASK)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (seg_reg == R_SS) { + /* must be writable segment */ + if ((e2 & DESC_CS_MASK) || !(e2 & DESC_W_MASK)) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (rpl != cpl || dpl != cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + } else { + /* must be readable segment */ + if ((e2 & (DESC_CS_MASK | DESC_R_MASK)) == DESC_CS_MASK) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + + if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) { + /* if not conforming code, test rights */ + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + } + } + + if (!(e2 & DESC_P_MASK)) { + if (seg_reg == R_SS) + raise_exception_err(EXCP0C_STACK, selector & 0xfffc); + else + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + } + + /* set the access bit if not already set */ + if (!(e2 & DESC_A_MASK)) { + e2 |= DESC_A_MASK; + stl_kernel(ptr + 4, e2); + } + + cpu_x86_load_seg_cache(env, seg_reg, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); +#if 0 + qemu_log("load_seg: sel=0x%04x base=0x%08lx limit=0x%08lx flags=%08x\n", + selector, (unsigned long)sc->base, sc->limit, sc->flags); +#endif + } +} + +/* protected mode jump */ +void helper_ljmp_protected(int new_cs, target_ulong new_eip, + int next_eip_addend) +{ + int gate_cs, type; + uint32_t e1, e2, cpl, dpl, rpl, limit; + target_ulong next_eip; + +#ifdef VBOX /** @todo Why do we do this? */ + e1 = e2 = 0; +#endif + if ((new_cs & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + if (load_segment(&e1, &e2, new_cs) != 0) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_S_MASK) { + if (!(e2 & DESC_CS_MASK)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (e2 & DESC_C_MASK) { + /* conforming code segment */ + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } else { + /* non conforming code segment */ + rpl = new_cs & 3; + if (rpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + if (dpl != cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + limit = get_seg_limit(e1, e2); + if (new_eip > limit && + !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); +#endif + cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), limit, e2); + EIP = new_eip; + } else { + /* jump to call or task gate */ + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + rpl = new_cs & 3; + cpl = env->hflags & HF_CPL_MASK; + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + switch(type) { + case 1: /* 286 TSS */ + case 9: /* 386 TSS */ + case 5: /* task gate */ + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + next_eip = env->eip + next_eip_addend; + switch_tss(new_cs, e1, e2, SWITCH_TSS_JMP, next_eip); + CC_OP = CC_OP_EFLAGS; + break; + case 4: /* 286 call gate */ + case 12: /* 386 call gate */ + if ((dpl < cpl) || (dpl < rpl)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + gate_cs = e1 >> 16; + new_eip = (e1 & 0xffff); + if (type == 12) + new_eip |= (e2 & 0xffff0000); + if (load_segment(&e1, &e2, gate_cs) != 0) + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + /* must be code segment */ + if (((e2 & (DESC_S_MASK | DESC_CS_MASK)) != + (DESC_S_MASK | DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); + if (((e2 & DESC_C_MASK) && (dpl > cpl)) || + (!(e2 & DESC_C_MASK) && (dpl != cpl))) + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); + if (!(e2 & DESC_P_MASK)) +#ifdef VBOX /* See page 3-514 of 253666.pdf */ + raise_exception_err(EXCP0B_NOSEG, gate_cs & 0xfffc); +#else + raise_exception_err(EXCP0D_GPF, gate_cs & 0xfffc); +#endif + limit = get_seg_limit(e1, e2); + if (new_eip > limit) + raise_exception_err(EXCP0D_GPF, 0); + cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), limit, e2); + EIP = new_eip; + break; + default: + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + break; + } + } +} + +/* real mode call */ +void helper_lcall_real(int new_cs, target_ulong new_eip1, + int shift, int next_eip) +{ + int new_eip; + uint32_t esp, esp_mask; + target_ulong ssp; + + new_eip = new_eip1; + esp = ESP; + esp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + if (shift) { + PUSHL(ssp, esp, esp_mask, env->segs[R_CS].selector); + PUSHL(ssp, esp, esp_mask, next_eip); + } else { + PUSHW(ssp, esp, esp_mask, env->segs[R_CS].selector); + PUSHW(ssp, esp, esp_mask, next_eip); + } + + SET_ESP(esp, esp_mask); + env->eip = new_eip; + env->segs[R_CS].selector = new_cs; + env->segs[R_CS].base = (new_cs << 4); +} + +/* protected mode call */ +void helper_lcall_protected(int new_cs, target_ulong new_eip, + int shift, int next_eip_addend) +{ + int new_stack, i; + uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count; + uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask; + uint32_t val, limit, old_sp_mask; + target_ulong ssp, old_ssp, next_eip; + +#ifdef VBOX /** @todo Why do we do this? */ + e1 = e2 = 0; +#endif + next_eip = env->eip + next_eip_addend; + LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift); + LOG_PCALL_STATE(env); + if ((new_cs & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + if (load_segment(&e1, &e2, new_cs) != 0) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + cpl = env->hflags & HF_CPL_MASK; + LOG_PCALL("desc=%08x:%08x\n", e1, e2); + if (e2 & DESC_S_MASK) { + if (!(e2 & DESC_CS_MASK)) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (e2 & DESC_C_MASK) { + /* conforming code segment */ + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } else { + /* non conforming code segment */ + rpl = new_cs & 3; + if (rpl > cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + if (dpl != cpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); +#endif + +#ifdef TARGET_X86_64 + /* XXX: check 16/32 bit cases in long mode */ + if (shift == 2) { + target_ulong rsp; + /* 64 bit case */ + rsp = ESP; + PUSHQ(rsp, env->segs[R_CS].selector); + PUSHQ(rsp, next_eip); + /* from this point, not restartable */ + ESP = rsp; + cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), e2); + EIP = new_eip; + } else +#endif + { + sp = ESP; + sp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + if (shift) { + PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHL(ssp, sp, sp_mask, next_eip); + } else { + PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHW(ssp, sp, sp_mask, next_eip); + } + + limit = get_seg_limit(e1, e2); + if (new_eip > limit) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + /* from this point, not restartable */ + SET_ESP(sp, sp_mask); + cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl, + get_seg_base(e1, e2), limit, e2); + EIP = new_eip; + } + } else { + /* check gate type */ + type = (e2 >> DESC_TYPE_SHIFT) & 0x1f; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + rpl = new_cs & 3; + switch(type) { + case 1: /* available 286 TSS */ + case 9: /* available 386 TSS */ + case 5: /* task gate */ + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + switch_tss(new_cs, e1, e2, SWITCH_TSS_CALL, next_eip); + CC_OP = CC_OP_EFLAGS; + return; + case 4: /* 286 call gate */ + case 12: /* 386 call gate */ + break; + default: + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + break; + } + shift = type >> 3; + + if (dpl < cpl || dpl < rpl) + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + /* check valid bit */ + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + selector = e1 >> 16; + offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff); + param_count = e2 & 0x1f; + if ((selector & 0xfffc) == 0) + raise_exception_err(EXCP0D_GPF, 0); + + if (load_segment(&e1, &e2, selector) != 0) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_S_MASK) || !(e2 & (DESC_CS_MASK))) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (dpl > cpl) + raise_exception_err(EXCP0D_GPF, selector & 0xfffc); + if (!(e2 & DESC_P_MASK)) + raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc); + + if (!(e2 & DESC_C_MASK) && dpl < cpl) { + /* to inner privilege */ + get_ss_esp_from_tss(&ss, &sp, dpl); + LOG_PCALL("new ss:esp=%04x:%08x param_count=%d ESP=" TARGET_FMT_lx "\n", + ss, sp, param_count, ESP); + if ((ss & 0xfffc) == 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if ((ss & 3) != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (load_segment(&ss_e1, &ss_e2, ss) != 0) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3; + if (ss_dpl != dpl) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_S_MASK) || + (ss_e2 & DESC_CS_MASK) || + !(ss_e2 & DESC_W_MASK)) + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); + if (!(ss_e2 & DESC_P_MASK)) +#ifdef VBOX /* See page 3-99 of 253666.pdf */ + raise_exception_err(EXCP0C_STACK, ss & 0xfffc); +#else + raise_exception_err(EXCP0A_TSS, ss & 0xfffc); +#endif + + // push_size = ((param_count * 2) + 8) << shift; + + old_sp_mask = get_sp_mask(env->segs[R_SS].flags); + old_ssp = env->segs[R_SS].base; + + sp_mask = get_sp_mask(ss_e2); + ssp = get_seg_base(ss_e1, ss_e2); + if (shift) { + PUSHL(ssp, sp, sp_mask, env->segs[R_SS].selector); + PUSHL(ssp, sp, sp_mask, ESP); + for(i = param_count - 1; i >= 0; i--) { + val = ldl_kernel(old_ssp + ((ESP + i * 4) & old_sp_mask)); + PUSHL(ssp, sp, sp_mask, val); + } + } else { + PUSHW(ssp, sp, sp_mask, env->segs[R_SS].selector); + PUSHW(ssp, sp, sp_mask, ESP); + for(i = param_count - 1; i >= 0; i--) { + val = lduw_kernel(old_ssp + ((ESP + i * 2) & old_sp_mask)); + PUSHW(ssp, sp, sp_mask, val); + } + } + new_stack = 1; + } else { + /* to same privilege */ + sp = ESP; + sp_mask = get_sp_mask(env->segs[R_SS].flags); + ssp = env->segs[R_SS].base; + // push_size = (4 << shift); + new_stack = 0; + } + + if (shift) { + PUSHL(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHL(ssp, sp, sp_mask, next_eip); + } else { + PUSHW(ssp, sp, sp_mask, env->segs[R_CS].selector); + PUSHW(ssp, sp, sp_mask, next_eip); + } + + /* from this point, not restartable */ + + if (new_stack) { + ss = (ss & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_SS, ss, + ssp, + get_seg_limit(ss_e1, ss_e2), + ss_e2); + } + + selector = (selector & ~3) | dpl; + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, dpl); + SET_ESP(sp, sp_mask); + EIP = offset; + } +} + +/* real and vm86 mode iret */ +void helper_iret_real(int shift) +{ + uint32_t sp, new_cs, new_eip, new_eflags, sp_mask; + target_ulong ssp; + int eflags_mask; +#ifdef VBOX + bool fVME = false; + + remR3TrapClear(env->pVM); +#endif /* VBOX */ + + sp_mask = 0xffff; /* XXXX: use SS segment size ? */ + sp = ESP; + ssp = env->segs[R_SS].base; + if (shift == 1) { + /* 32 bits */ + POPL(ssp, sp, sp_mask, new_eip); + POPL(ssp, sp, sp_mask, new_cs); + new_cs &= 0xffff; + POPL(ssp, sp, sp_mask, new_eflags); + } else { + /* 16 bits */ + POPW(ssp, sp, sp_mask, new_eip); + POPW(ssp, sp, sp_mask, new_cs); + POPW(ssp, sp, sp_mask, new_eflags); + } +#ifdef VBOX + if ( (env->eflags & VM_MASK) + && ((env->eflags >> IOPL_SHIFT) & 3) != 3 + && (env->cr[4] & CR4_VME_MASK)) /* implied or else we would fault earlier */ + { + fVME = true; + /* if virtual interrupt pending and (virtual) interrupts will be enabled -> #GP */ + /* if TF will be set -> #GP */ + if ( ((new_eflags & IF_MASK) && (env->eflags & VIP_MASK)) + || (new_eflags & TF_MASK)) + raise_exception(EXCP0D_GPF); + } +#endif /* VBOX */ + ESP = (ESP & ~sp_mask) | (sp & sp_mask); + env->segs[R_CS].selector = new_cs; + env->segs[R_CS].base = (new_cs << 4); + env->eip = new_eip; +#ifdef VBOX + if (fVME) + eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK; + else +#endif + if (env->eflags & VM_MASK) + eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | RF_MASK | NT_MASK; + else + eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | RF_MASK | NT_MASK; + if (shift == 0) + eflags_mask &= 0xffff; + load_eflags(new_eflags, eflags_mask); + env->hflags2 &= ~HF2_NMI_MASK; +#ifdef VBOX + if (fVME) + { + if (new_eflags & IF_MASK) + env->eflags |= VIF_MASK; + else + env->eflags &= ~VIF_MASK; + } +#endif /* VBOX */ +} + +static inline void validate_seg(int seg_reg, int cpl) +{ + int dpl; + uint32_t e2; + + /* XXX: on x86_64, we do not want to nullify FS and GS because + they may still contain a valid base. I would be interested to + know how a real x86_64 CPU behaves */ + if ((seg_reg == R_FS || seg_reg == R_GS) && + (env->segs[seg_reg].selector & 0xfffc) == 0) + return; + + e2 = env->segs[seg_reg].flags; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) { + /* data or non conforming code segment */ + if (dpl < cpl) { + cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0); + } + } +} + +/* protected mode iret */ +static inline void helper_ret_protected(int shift, int is_iret, int addend) +{ + uint32_t new_cs, new_eflags, new_ss; + uint32_t new_es, new_ds, new_fs, new_gs; + uint32_t e1, e2, ss_e1, ss_e2; + int cpl, dpl, rpl, eflags_mask, iopl; + target_ulong ssp, sp, new_eip, new_esp, sp_mask; + +#ifdef VBOX /** @todo Why do we do this? */ + ss_e1 = ss_e2 = e1 = e2 = 0; +#endif + +#ifdef TARGET_X86_64 + if (shift == 2) + sp_mask = -1; + else +#endif + sp_mask = get_sp_mask(env->segs[R_SS].flags); + sp = ESP; + ssp = env->segs[R_SS].base; + new_eflags = 0; /* avoid warning */ +#ifdef TARGET_X86_64 + if (shift == 2) { + POPQ(sp, new_eip); + POPQ(sp, new_cs); + new_cs &= 0xffff; + if (is_iret) { + POPQ(sp, new_eflags); + } + } else +#endif + if (shift == 1) { + /* 32 bits */ + POPL(ssp, sp, sp_mask, new_eip); + POPL(ssp, sp, sp_mask, new_cs); + new_cs &= 0xffff; + if (is_iret) { + POPL(ssp, sp, sp_mask, new_eflags); +#define LOG_GROUP LOG_GROUP_REM +#if defined(VBOX) && defined(DEBUG) + Log(("iret: new CS %04X (old=%x)\n", new_cs, env->segs[R_CS].selector)); + Log(("iret: new EIP %08X\n", (uint32_t)new_eip)); + Log(("iret: new EFLAGS %08X\n", new_eflags)); + Log(("iret: EAX=%08x\n", (uint32_t)EAX)); +#endif + if (new_eflags & VM_MASK) + goto return_to_vm86; + } +#ifdef VBOX + if ((new_cs & 0x3) == 1 && (env->state & CPU_RAW_RING0)) + { + if ( !EMIsRawRing1Enabled(env->pVM) + || env->segs[R_CS].selector == (new_cs & 0xfffc)) + { + Log(("RPL 1 -> new_cs %04X -> %04X\n", new_cs, new_cs & 0xfffc)); + new_cs = new_cs & 0xfffc; + } + else + { + /* Ugly assumption: assume a genuine switch to ring-1. */ + Log(("Genuine switch to ring-1 (iret)\n")); + } + } + else if ((new_cs & 0x3) == 2 && (env->state & CPU_RAW_RING0) && EMIsRawRing1Enabled(env->pVM)) + { + Log(("RPL 2 -> new_cs %04X -> %04X\n", new_cs, (new_cs & 0xfffc) | 1)); + new_cs = (new_cs & 0xfffc) | 1; + } +#endif + } else { + /* 16 bits */ + POPW(ssp, sp, sp_mask, new_eip); + POPW(ssp, sp, sp_mask, new_cs); + if (is_iret) + POPW(ssp, sp, sp_mask, new_eflags); + } + LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n", + new_cs, new_eip, shift, addend); + LOG_PCALL_STATE(env); + if ((new_cs & 0xfffc) == 0) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_cs & 0xfffc) == 0\n")); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (load_segment(&e1, &e2, new_cs) != 0) + { +#if defined(VBOX) && defined(DEBUG) + Log(("load_segment failed\n")); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + if (!(e2 & DESC_S_MASK) || + !(e2 & DESC_CS_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("e2 mask %08x\n", e2)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + cpl = env->hflags & HF_CPL_MASK; + rpl = new_cs & 3; + if (rpl < cpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("rpl < cpl (%d vs %d)\n", rpl, cpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + + if (e2 & DESC_C_MASK) { + if (dpl > rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("dpl > rpl (%d vs %d)\n", dpl, rpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + } else { + if (dpl != rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("dpl != rpl (%d vs %d) e1=%x e2=%x\n", dpl, rpl, e1, e2)); +#endif + raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc); + } + } + if (!(e2 & DESC_P_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("DESC_P_MASK e2=%08x\n", e2)); +#endif + raise_exception_err(EXCP0B_NOSEG, new_cs & 0xfffc); + } + + sp += addend; + if (rpl == cpl && (!(env->hflags & HF_CS64_MASK) || + ((env->hflags & HF_CS64_MASK) && !is_iret))) { + /* return to same privilege level */ +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); +#endif + cpu_x86_load_seg_cache(env, R_CS, new_cs, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + } else { + /* return to different privilege level */ +#ifdef TARGET_X86_64 + if (shift == 2) { + POPQ(sp, new_esp); + POPQ(sp, new_ss); + new_ss &= 0xffff; + } else +#endif + if (shift == 1) { + /* 32 bits */ + POPL(ssp, sp, sp_mask, new_esp); + POPL(ssp, sp, sp_mask, new_ss); + new_ss &= 0xffff; + } else { + /* 16 bits */ + POPW(ssp, sp, sp_mask, new_esp); + POPW(ssp, sp, sp_mask, new_ss); + } + LOG_PCALL("new ss:esp=%04x:" TARGET_FMT_lx "\n", + new_ss, new_esp); + if ((new_ss & 0xfffc) == 0) { +#ifdef TARGET_X86_64 + /* NULL ss is allowed in long mode if cpl != 3*/ +# ifndef VBOX + /* XXX: test CS64 ? */ + if ((env->hflags & HF_LMA_MASK) && rpl != 3) { + cpu_x86_load_seg_cache(env, R_SS, new_ss, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (rpl << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + ss_e2 = DESC_B_MASK; /* XXX: should not be needed ? */ + } else +# else /* VBOX */ + if ((env->hflags & HF_LMA_MASK) && rpl != 3 && (e2 & DESC_L_MASK)) { + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); + cpu_x86_load_seg_cache_with_clean_flags(env, R_SS, new_ss, + 0, 0xffffffff, + DESC_INTEL_UNUSABLE | (rpl << DESC_DPL_SHIFT) ); + ss_e2 = DESC_B_MASK; /* not really used */ + } else +# endif +#endif + { +#if defined(VBOX) && defined(DEBUG) + Log(("NULL ss, rpl=%d\n", rpl)); +#endif + raise_exception_err(EXCP0D_GPF, 0); + } + } else { + if ((new_ss & 3) != rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%x != rpl=%d\n", new_ss, rpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + if (load_segment(&ss_e1, &ss_e2, new_ss) != 0) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%x load error\n", new_ss)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + if (!(ss_e2 & DESC_S_MASK) || + (ss_e2 & DESC_CS_MASK) || + !(ss_e2 & DESC_W_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%x ss_e2=%#x bad type\n", new_ss, ss_e2)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3; + if (dpl != rpl) + { +#if defined(VBOX) && defined(DEBUG) + Log(("SS.dpl=%u != rpl=%u\n", dpl, rpl)); +#endif + raise_exception_err(EXCP0D_GPF, new_ss & 0xfffc); + } + if (!(ss_e2 & DESC_P_MASK)) + { +#if defined(VBOX) && defined(DEBUG) + Log(("new_ss=%#x #NP\n", new_ss)); +#endif + raise_exception_err(EXCP0B_NOSEG, new_ss & 0xfffc); + } +#ifdef VBOX + if (!(e2 & DESC_A_MASK)) + e2 = set_segment_accessed(new_cs, e2); + if (!(ss_e2 & DESC_A_MASK)) + ss_e2 = set_segment_accessed(new_ss, ss_e2); +#endif + cpu_x86_load_seg_cache(env, R_SS, new_ss, + get_seg_base(ss_e1, ss_e2), + get_seg_limit(ss_e1, ss_e2), + ss_e2); + } + + cpu_x86_load_seg_cache(env, R_CS, new_cs, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + cpu_x86_set_cpl(env, rpl); + sp = new_esp; +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) + sp_mask = -1; + else +#endif + sp_mask = get_sp_mask(ss_e2); + + /* validate data segments */ + validate_seg(R_ES, rpl); + validate_seg(R_DS, rpl); + validate_seg(R_FS, rpl); + validate_seg(R_GS, rpl); + + sp += addend; + } + SET_ESP(sp, sp_mask); + env->eip = new_eip; + if (is_iret) { + /* NOTE: 'cpl' is the _old_ CPL */ + eflags_mask = TF_MASK | AC_MASK | ID_MASK | RF_MASK | NT_MASK; + if (cpl == 0) +#ifdef VBOX + eflags_mask |= IOPL_MASK | VIF_MASK | VIP_MASK; +#else + eflags_mask |= IOPL_MASK; +#endif + iopl = (env->eflags >> IOPL_SHIFT) & 3; + if (cpl <= iopl) + eflags_mask |= IF_MASK; + if (shift == 0) + eflags_mask &= 0xffff; + load_eflags(new_eflags, eflags_mask); + } + return; + + return_to_vm86: + POPL(ssp, sp, sp_mask, new_esp); + POPL(ssp, sp, sp_mask, new_ss); + POPL(ssp, sp, sp_mask, new_es); + POPL(ssp, sp, sp_mask, new_ds); + POPL(ssp, sp, sp_mask, new_fs); + POPL(ssp, sp, sp_mask, new_gs); + + /* modify processor state */ + load_eflags(new_eflags, TF_MASK | AC_MASK | ID_MASK | + IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK); + load_seg_vm(R_CS, new_cs & 0xffff); + cpu_x86_set_cpl(env, 3); + load_seg_vm(R_SS, new_ss & 0xffff); + load_seg_vm(R_ES, new_es & 0xffff); + load_seg_vm(R_DS, new_ds & 0xffff); + load_seg_vm(R_FS, new_fs & 0xffff); + load_seg_vm(R_GS, new_gs & 0xffff); + + env->eip = new_eip & 0xffff; + ESP = new_esp; +} + +void helper_iret_protected(int shift, int next_eip) +{ + int tss_selector, type; + uint32_t e1, e2; + +#ifdef VBOX + Log(("iret (shift=%d new_eip=%#x)\n", shift, next_eip)); + e1 = e2 = 0; /** @todo Why do we do this? */ + remR3TrapClear(env->pVM); +#endif + + /* specific case for TSS */ + if (env->eflags & NT_MASK) { +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) + { +#if defined(VBOX) && defined(DEBUG) + Log(("eflags.NT=1 on iret in long mode\n")); +#endif + raise_exception_err(EXCP0D_GPF, 0); + } +#endif + tss_selector = lduw_kernel(env->tr.base + 0); + if (tss_selector & 4) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + if (load_segment(&e1, &e2, tss_selector) != 0) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + type = (e2 >> DESC_TYPE_SHIFT) & 0x17; + /* NOTE: we check both segment and busy TSS */ + if (type != 3) + raise_exception_err(EXCP0A_TSS, tss_selector & 0xfffc); + switch_tss(tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip); + } else { + helper_ret_protected(shift, 1, 0); + } + env->hflags2 &= ~HF2_NMI_MASK; +} + +void helper_lret_protected(int shift, int addend) +{ + helper_ret_protected(shift, 0, addend); +} + +void helper_sysenter(void) +{ + if (env->sysenter_cs == 0) { + raise_exception_err(EXCP0D_GPF, 0); + } + env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK); + cpu_x86_set_cpl(env, 0); + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK); + } else +#endif + { + cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + } + cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + ESP = env->sysenter_esp; + EIP = env->sysenter_eip; +} + +void helper_sysexit(int dflag) +{ + int cpl; + + cpl = env->hflags & HF_CPL_MASK; + if (env->sysenter_cs == 0 || cpl != 0) { + raise_exception_err(EXCP0D_GPF, 0); + } + cpu_x86_set_cpl(env, 3); +#ifdef TARGET_X86_64 + if (dflag == 2) { + cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK); + cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 40) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + } else +#endif + { + cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 16) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, ((env->sysenter_cs + 24) & 0xfffc) | 3, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | (3 << DESC_DPL_SHIFT) | + DESC_W_MASK | DESC_A_MASK); + } + ESP = ECX; + EIP = EDX; +} + +#if defined(CONFIG_USER_ONLY) +target_ulong helper_read_crN(int reg) +{ + return 0; +} + +void helper_write_crN(int reg, target_ulong t0) +{ +} + +void helper_movl_drN_T0(int reg, target_ulong t0) +{ +} +#else +target_ulong helper_read_crN(int reg) +{ + target_ulong val; + + helper_svm_check_intercept_param(SVM_EXIT_READ_CR0 + reg, 0); + switch(reg) { + default: + val = env->cr[reg]; + break; + case 8: + if (!(env->hflags2 & HF2_VINTR_MASK)) { +#ifndef VBOX + val = cpu_get_apic_tpr(env->apic_state); +#else /* VBOX */ + val = cpu_get_apic_tpr(env); +#endif /* VBOX */ + } else { + val = env->v_tpr; + } + break; + } + return val; +} + +void helper_write_crN(int reg, target_ulong t0) +{ + helper_svm_check_intercept_param(SVM_EXIT_WRITE_CR0 + reg, 0); + switch(reg) { + case 0: + cpu_x86_update_cr0(env, t0); + break; + case 3: + cpu_x86_update_cr3(env, t0); + break; + case 4: + cpu_x86_update_cr4(env, t0); + break; + case 8: + if (!(env->hflags2 & HF2_VINTR_MASK)) { +#ifndef VBOX + cpu_set_apic_tpr(env->apic_state, t0); +#else /* VBOX */ + cpu_set_apic_tpr(env, t0); +#endif /* VBOX */ + } + env->v_tpr = t0 & 0x0f; + break; + default: + env->cr[reg] = t0; + break; + } +} + +void helper_movl_drN_T0(int reg, target_ulong t0) +{ + int i; + + if (reg < 4) { + hw_breakpoint_remove(env, reg); + env->dr[reg] = t0; + hw_breakpoint_insert(env, reg); +# ifndef VBOX + } else if (reg == 7) { +# else + } else if (reg == 7 || reg == 5) { /* (DR5 is an alias for DR7.) */ + if (t0 & X86_DR7_MBZ_MASK) + raise_exception_err(EXCP0D_GPF, 0); + t0 |= X86_DR7_RA1_MASK; + t0 &= ~X86_DR7_RAZ_MASK; +# endif + for (i = 0; i < 4; i++) + hw_breakpoint_remove(env, i); + env->dr[7] = t0; + for (i = 0; i < 4; i++) + hw_breakpoint_insert(env, i); + } else { +# ifndef VBOX + env->dr[reg] = t0; +# else + if (t0 & X86_DR6_MBZ_MASK) + raise_exception_err(EXCP0D_GPF, 0); + t0 |= X86_DR6_RA1_MASK; + t0 &= ~X86_DR6_RAZ_MASK; + env->dr[6] = t0; /* (DR4 is an alias for DR6.) */ +# endif + } +} +#endif + +void helper_lmsw(target_ulong t0) +{ + /* only 4 lower bits of CR0 are modified. PE cannot be set to zero + if already set to one. */ + t0 = (env->cr[0] & ~0xe) | (t0 & 0xf); + helper_write_crN(0, t0); +} + +void helper_clts(void) +{ + env->cr[0] &= ~CR0_TS_MASK; + env->hflags &= ~HF_TS_MASK; +} + +void helper_invlpg(target_ulong addr) +{ + helper_svm_check_intercept_param(SVM_EXIT_INVLPG, 0); + tlb_flush_page(env, addr); +} + +void helper_rdtsc(void) +{ + uint64_t val; + + if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception(EXCP0D_GPF); + } + helper_svm_check_intercept_param(SVM_EXIT_RDTSC, 0); + + val = cpu_get_tsc(env) + env->tsc_offset; + EAX = (uint32_t)(val); + EDX = (uint32_t)(val >> 32); +} + +void helper_rdtscp(void) +{ + helper_rdtsc(); +#ifndef VBOX + ECX = (uint32_t)(env->tsc_aux); +#else /* VBOX */ + uint64_t val; + if (cpu_rdmsr(env, MSR_K8_TSC_AUX, &val) == 0) + ECX = (uint32_t)(val); + else + ECX = 0; +#endif /* VBOX */ +} + +void helper_rdpmc(void) +{ +#ifdef VBOX + /* If X86_CR4_PCE is *not* set, then CPL must be zero. */ + if (!(env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception(EXCP0D_GPF); + } + /* Just return zero here; rather tricky to properly emulate this, especially as the specs are a mess. */ + EAX = 0; + EDX = 0; +#else /* !VBOX */ + if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception(EXCP0D_GPF); + } + helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0); + + /* currently unimplemented */ + raise_exception_err(EXCP06_ILLOP, 0); +#endif /* !VBOX */ +} + +#if defined(CONFIG_USER_ONLY) +void helper_wrmsr(void) +{ +} + +void helper_rdmsr(void) +{ +} +#else +void helper_wrmsr(void) +{ + uint64_t val; + + helper_svm_check_intercept_param(SVM_EXIT_MSR, 1); + + val = ((uint32_t)EAX) | ((uint64_t)((uint32_t)EDX) << 32); + + switch((uint32_t)ECX) { + case MSR_IA32_SYSENTER_CS: + env->sysenter_cs = val & 0xffff; + break; + case MSR_IA32_SYSENTER_ESP: + env->sysenter_esp = val; + break; + case MSR_IA32_SYSENTER_EIP: + env->sysenter_eip = val; + break; + case MSR_IA32_APICBASE: +# ifndef VBOX /* The CPUMSetGuestMsr call below does this now. */ + cpu_set_apic_base(env->apic_state, val); +# endif + break; + case MSR_EFER: + { + uint64_t update_mask; + update_mask = 0; + if (env->cpuid_ext2_features & CPUID_EXT2_SYSCALL) + update_mask |= MSR_EFER_SCE; + if (env->cpuid_ext2_features & CPUID_EXT2_LM) + update_mask |= MSR_EFER_LME; + if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR) + update_mask |= MSR_EFER_FFXSR; + if (env->cpuid_ext2_features & CPUID_EXT2_NX) + update_mask |= MSR_EFER_NXE; + if (env->cpuid_ext3_features & CPUID_EXT3_SVM) + update_mask |= MSR_EFER_SVME; + if (env->cpuid_ext2_features & CPUID_EXT2_FFXSR) + update_mask |= MSR_EFER_FFXSR; + cpu_load_efer(env, (env->efer & ~update_mask) | + (val & update_mask)); + } + break; + case MSR_STAR: + env->star = val; + break; + case MSR_PAT: + env->pat = val; + break; + case MSR_VM_HSAVE_PA: + env->vm_hsave = val; + break; +#ifdef TARGET_X86_64 + case MSR_LSTAR: + env->lstar = val; + break; + case MSR_CSTAR: + env->cstar = val; + break; + case MSR_FMASK: + env->fmask = val; + break; + case MSR_FSBASE: + env->segs[R_FS].base = val; + break; + case MSR_GSBASE: + env->segs[R_GS].base = val; + break; + case MSR_KERNELGSBASE: + env->kernelgsbase = val; + break; +#endif +# ifndef VBOX + case MSR_MTRRphysBase(0): + case MSR_MTRRphysBase(1): + case MSR_MTRRphysBase(2): + case MSR_MTRRphysBase(3): + case MSR_MTRRphysBase(4): + case MSR_MTRRphysBase(5): + case MSR_MTRRphysBase(6): + case MSR_MTRRphysBase(7): + env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base = val; + break; + case MSR_MTRRphysMask(0): + case MSR_MTRRphysMask(1): + case MSR_MTRRphysMask(2): + case MSR_MTRRphysMask(3): + case MSR_MTRRphysMask(4): + case MSR_MTRRphysMask(5): + case MSR_MTRRphysMask(6): + case MSR_MTRRphysMask(7): + env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask = val; + break; + case MSR_MTRRfix64K_00000: + env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix64K_00000] = val; + break; + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1] = val; + break; + case MSR_MTRRfix4K_C0000: + case MSR_MTRRfix4K_C8000: + case MSR_MTRRfix4K_D0000: + case MSR_MTRRfix4K_D8000: + case MSR_MTRRfix4K_E0000: + case MSR_MTRRfix4K_E8000: + case MSR_MTRRfix4K_F0000: + case MSR_MTRRfix4K_F8000: + env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3] = val; + break; + case MSR_MTRRdefType: + env->mtrr_deftype = val; + break; + case MSR_MCG_STATUS: + env->mcg_status = val; + break; + case MSR_MCG_CTL: + if ((env->mcg_cap & MCG_CTL_P) + && (val == 0 || val == ~(uint64_t)0)) + env->mcg_ctl = val; + break; + case MSR_TSC_AUX: + env->tsc_aux = val; + break; +# endif /* !VBOX */ + default: +# ifndef VBOX + if ((uint32_t)ECX >= MSR_MC0_CTL + && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) { + uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL; + if ((offset & 0x3) != 0 + || (val == 0 || val == ~(uint64_t)0)) + env->mce_banks[offset] = val; + break; + } + /* XXX: exception ? */ +# endif + break; + } + +# ifdef VBOX + /* call CPUM. */ + if (cpu_wrmsr(env, (uint32_t)ECX, val) != 0) + { + /** @todo be a brave man and raise a \#GP(0) here as we should... */ + } +# endif +} + +void helper_rdmsr(void) +{ + uint64_t val; + + helper_svm_check_intercept_param(SVM_EXIT_MSR, 0); + + switch((uint32_t)ECX) { + case MSR_IA32_SYSENTER_CS: + val = env->sysenter_cs; + break; + case MSR_IA32_SYSENTER_ESP: + val = env->sysenter_esp; + break; + case MSR_IA32_SYSENTER_EIP: + val = env->sysenter_eip; + break; + case MSR_IA32_APICBASE: +#ifndef VBOX + val = cpu_get_apic_base(env->apic_state); +#else /* VBOX */ + val = cpu_get_apic_base(env); +#endif /* VBOX */ + break; + case MSR_EFER: + val = env->efer; + break; + case MSR_STAR: + val = env->star; + break; + case MSR_PAT: + val = env->pat; + break; + case MSR_VM_HSAVE_PA: + val = env->vm_hsave; + break; +# ifndef VBOX /* forward to CPUMQueryGuestMsr. */ + case MSR_IA32_PERF_STATUS: + /* tsc_increment_by_tick */ + val = 1000ULL; + /* CPU multiplier */ + val |= (((uint64_t)4ULL) << 40); + break; +# endif /* !VBOX */ +#ifdef TARGET_X86_64 + case MSR_LSTAR: + val = env->lstar; + break; + case MSR_CSTAR: + val = env->cstar; + break; + case MSR_FMASK: + val = env->fmask; + break; + case MSR_FSBASE: + val = env->segs[R_FS].base; + break; + case MSR_GSBASE: + val = env->segs[R_GS].base; + break; + case MSR_KERNELGSBASE: + val = env->kernelgsbase; + break; +# ifndef VBOX + case MSR_TSC_AUX: + val = env->tsc_aux; + break; +# endif /*!VBOX*/ +#endif +# ifndef VBOX + case MSR_MTRRphysBase(0): + case MSR_MTRRphysBase(1): + case MSR_MTRRphysBase(2): + case MSR_MTRRphysBase(3): + case MSR_MTRRphysBase(4): + case MSR_MTRRphysBase(5): + case MSR_MTRRphysBase(6): + case MSR_MTRRphysBase(7): + val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysBase(0)) / 2].base; + break; + case MSR_MTRRphysMask(0): + case MSR_MTRRphysMask(1): + case MSR_MTRRphysMask(2): + case MSR_MTRRphysMask(3): + case MSR_MTRRphysMask(4): + case MSR_MTRRphysMask(5): + case MSR_MTRRphysMask(6): + case MSR_MTRRphysMask(7): + val = env->mtrr_var[((uint32_t)ECX - MSR_MTRRphysMask(0)) / 2].mask; + break; + case MSR_MTRRfix64K_00000: + val = env->mtrr_fixed[0]; + break; + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix16K_80000 + 1]; + break; + case MSR_MTRRfix4K_C0000: + case MSR_MTRRfix4K_C8000: + case MSR_MTRRfix4K_D0000: + case MSR_MTRRfix4K_D8000: + case MSR_MTRRfix4K_E0000: + case MSR_MTRRfix4K_E8000: + case MSR_MTRRfix4K_F0000: + case MSR_MTRRfix4K_F8000: + val = env->mtrr_fixed[(uint32_t)ECX - MSR_MTRRfix4K_C0000 + 3]; + break; + case MSR_MTRRdefType: + val = env->mtrr_deftype; + break; + case MSR_MTRRcap: + if (env->cpuid_features & CPUID_MTRR) + val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT | MSR_MTRRcap_WC_SUPPORTED; + else + /* XXX: exception ? */ + val = 0; + break; + case MSR_MCG_CAP: + val = env->mcg_cap; + break; + case MSR_MCG_CTL: + if (env->mcg_cap & MCG_CTL_P) + val = env->mcg_ctl; + else + val = 0; + break; + case MSR_MCG_STATUS: + val = env->mcg_status; + break; +# endif /* !VBOX */ + default: +# ifndef VBOX + if ((uint32_t)ECX >= MSR_MC0_CTL + && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) { + uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL; + val = env->mce_banks[offset]; + break; + } + /* XXX: exception ? */ + val = 0; +# else /* VBOX */ + if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0) + { + /** @todo be a brave man and raise a \#GP(0) here as we should... */ + val = 0; + } +# endif /* VBOX */ + break; + } + EAX = (uint32_t)(val); + EDX = (uint32_t)(val >> 32); + +# ifdef VBOX_STRICT + if ((uint32_t)ECX != MSR_IA32_TSC) { + if (cpu_rdmsr(env, (uint32_t)ECX, &val) != 0) + val = 0; + AssertMsg(val == RT_MAKE_U64(EAX, EDX), ("idMsr=%#x val=%#llx eax:edx=%#llx\n", (uint32_t)ECX, val, RT_MAKE_U64(EAX, EDX))); + } +# endif +} +#endif + +target_ulong helper_lsl(target_ulong selector1) +{ + unsigned int limit; + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl, type; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_S_MASK) { + if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) { + /* conforming */ + } else { + if (dpl < cpl || dpl < rpl) + goto fail; + } + } else { + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + switch(type) { + case 1: + case 2: + case 3: + case 9: + case 11: + break; + default: + goto fail; + } + if (dpl < cpl || dpl < rpl) { + fail: + CC_SRC = eflags & ~CC_Z; + return 0; + } + } + limit = get_seg_limit(e1, e2); + CC_SRC = eflags | CC_Z; + return limit; +} + +target_ulong helper_lar(target_ulong selector1) +{ + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl, type; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_S_MASK) { + if ((e2 & DESC_CS_MASK) && (e2 & DESC_C_MASK)) { + /* conforming */ + } else { + if (dpl < cpl || dpl < rpl) + goto fail; + } + } else { + type = (e2 >> DESC_TYPE_SHIFT) & 0xf; + switch(type) { + case 1: + case 2: + case 3: + case 4: + case 5: + case 9: + case 11: + case 12: + break; + default: + goto fail; + } + if (dpl < cpl || dpl < rpl) { + fail: + CC_SRC = eflags & ~CC_Z; + return 0; + } + } + CC_SRC = eflags | CC_Z; +#ifdef VBOX /* AMD says 0x00ffff00, while intel says 0x00fxff00. Bochs and IEM does like AMD says (x=f). */ + return e2 & 0x00ffff00; +#else + return e2 & 0x00f0ff00; +#endif +} + +void helper_verr(target_ulong selector1) +{ + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + if (!(e2 & DESC_S_MASK)) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_CS_MASK) { + if (!(e2 & DESC_R_MASK)) + goto fail; + if (!(e2 & DESC_C_MASK)) { + if (dpl < cpl || dpl < rpl) + goto fail; + } + } else { + if (dpl < cpl || dpl < rpl) { + fail: + CC_SRC = eflags & ~CC_Z; + return; + } + } + CC_SRC = eflags | CC_Z; +} + +void helper_verw(target_ulong selector1) +{ + uint32_t e1, e2, eflags, selector; + int rpl, dpl, cpl; + + selector = selector1 & 0xffff; + eflags = helper_cc_compute_all(CC_OP); + if ((selector & 0xfffc) == 0) + goto fail; + if (load_segment(&e1, &e2, selector) != 0) + goto fail; + if (!(e2 & DESC_S_MASK)) + goto fail; + rpl = selector & 3; + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + if (e2 & DESC_CS_MASK) { + goto fail; + } else { + if (dpl < cpl || dpl < rpl) + goto fail; + if (!(e2 & DESC_W_MASK)) { + fail: + CC_SRC = eflags & ~CC_Z; + return; + } + } + CC_SRC = eflags | CC_Z; +} + +/* x87 FPU helpers */ + +static void fpu_set_exception(int mask) +{ + env->fpus |= mask; + if (env->fpus & (~env->fpuc & FPUC_EM)) + env->fpus |= FPUS_SE | FPUS_B; +} + +static inline CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b) +{ + if (b == 0.0) + fpu_set_exception(FPUS_ZE); + return a / b; +} + +static void fpu_raise_exception(void) +{ + if (env->cr[0] & CR0_NE_MASK) { + raise_exception(EXCP10_COPR); + } +#if !defined(CONFIG_USER_ONLY) + else { + cpu_set_ferr(env); + } +#endif +} + +void helper_flds_FT0(uint32_t val) +{ + union { + float32 f; + uint32_t i; + } u; + u.i = val; + FT0 = float32_to_floatx(u.f, &env->fp_status); +} + +void helper_fldl_FT0(uint64_t val) +{ + union { + float64 f; + uint64_t i; + } u; + u.i = val; + FT0 = float64_to_floatx(u.f, &env->fp_status); +} + +void helper_fildl_FT0(int32_t val) +{ + FT0 = int32_to_floatx(val, &env->fp_status); +} + +void helper_flds_ST0(uint32_t val) +{ + int new_fpstt; + union { + float32 f; + uint32_t i; + } u; + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float32_to_floatx(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fldl_ST0(uint64_t val) +{ + int new_fpstt; + union { + float64 f; + uint64_t i; + } u; + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float64_to_floatx(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fildl_ST0(int32_t val) +{ + int new_fpstt; + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int32_to_floatx(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fildll_ST0(int64_t val) +{ + int new_fpstt; + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int64_to_floatx(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +#ifndef VBOX +uint32_t helper_fsts_ST0(void) +#else +RTCCUINTREG helper_fsts_ST0(void) +#endif +{ + union { + float32 f; + uint32_t i; + } u; + u.f = floatx_to_float32(ST0, &env->fp_status); + return u.i; +} + +uint64_t helper_fstl_ST0(void) +{ + union { + float64 f; + uint64_t i; + } u; + u.f = floatx_to_float64(ST0, &env->fp_status); + return u.i; +} + +#ifndef VBOX +int32_t helper_fist_ST0(void) +#else +RTCCINTREG helper_fist_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32(ST0, &env->fp_status); + if (val != (int16_t)val) + val = -32768; + return val; +} + +#ifndef VBOX +int32_t helper_fistl_ST0(void) +#else +RTCCINTREG helper_fistl_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32(ST0, &env->fp_status); + return val; +} + +int64_t helper_fistll_ST0(void) +{ + int64_t val; + val = floatx_to_int64(ST0, &env->fp_status); + return val; +} + +#ifndef VBOX +int32_t helper_fistt_ST0(void) +#else +RTCCINTREG helper_fistt_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32_round_to_zero(ST0, &env->fp_status); + if (val != (int16_t)val) + val = -32768; + return val; +} + +#ifndef VBOX +int32_t helper_fisttl_ST0(void) +#else +RTCCINTREG helper_fisttl_ST0(void) +#endif +{ + int32_t val; + val = floatx_to_int32_round_to_zero(ST0, &env->fp_status); + return val; +} + +int64_t helper_fisttll_ST0(void) +{ + int64_t val; + val = floatx_to_int64_round_to_zero(ST0, &env->fp_status); + return val; +} + +void helper_fldt_ST0(target_ulong ptr) +{ + int new_fpstt; + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = helper_fldt(ptr); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fstt_ST0(target_ulong ptr) +{ + helper_fstt(ST0, ptr); +} + +void helper_fpush(void) +{ + fpush(); +} + +void helper_fpop(void) +{ + fpop(); +} + +void helper_fdecstp(void) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fpus &= (~0x4700); +} + +void helper_fincstp(void) +{ + env->fpstt = (env->fpstt + 1) & 7; + env->fpus &= (~0x4700); +} + +/* FPU move */ + +void helper_ffree_STN(int st_index) +{ + env->fptags[(env->fpstt + st_index) & 7] = 1; +} + +void helper_fmov_ST0_FT0(void) +{ + ST0 = FT0; +} + +void helper_fmov_FT0_STN(int st_index) +{ + FT0 = ST(st_index); +} + +void helper_fmov_ST0_STN(int st_index) +{ + ST0 = ST(st_index); +} + +void helper_fmov_STN_ST0(int st_index) +{ + ST(st_index) = ST0; +} + +void helper_fxchg_ST0_STN(int st_index) +{ + CPU86_LDouble tmp; + tmp = ST(st_index); + ST(st_index) = ST0; + ST0 = tmp; +} + +/* FPU operations */ + +static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; + +void helper_fcom_ST0_FT0(void) +{ + int ret; + + ret = floatx_compare(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; +} + +void helper_fucom_ST0_FT0(void) +{ + int ret; + + ret = floatx_compare_quiet(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1]; +} + +static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; + +void helper_fcomi_ST0_FT0(void) +{ + int eflags; + int ret; + + ret = floatx_compare(ST0, FT0, &env->fp_status); + eflags = helper_cc_compute_all(CC_OP); + eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; + CC_SRC = eflags; +} + +void helper_fucomi_ST0_FT0(void) +{ + int eflags; + int ret; + + ret = floatx_compare_quiet(ST0, FT0, &env->fp_status); + eflags = helper_cc_compute_all(CC_OP); + eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; + CC_SRC = eflags; +} + +void helper_fadd_ST0_FT0(void) +{ + ST0 += FT0; +} + +void helper_fmul_ST0_FT0(void) +{ + ST0 *= FT0; +} + +void helper_fsub_ST0_FT0(void) +{ + ST0 -= FT0; +} + +void helper_fsubr_ST0_FT0(void) +{ + ST0 = FT0 - ST0; +} + +void helper_fdiv_ST0_FT0(void) +{ + ST0 = helper_fdiv(ST0, FT0); +} + +void helper_fdivr_ST0_FT0(void) +{ + ST0 = helper_fdiv(FT0, ST0); +} + +/* fp operations between STN and ST0 */ + +void helper_fadd_STN_ST0(int st_index) +{ + ST(st_index) += ST0; +} + +void helper_fmul_STN_ST0(int st_index) +{ + ST(st_index) *= ST0; +} + +void helper_fsub_STN_ST0(int st_index) +{ + ST(st_index) -= ST0; +} + +void helper_fsubr_STN_ST0(int st_index) +{ + CPU86_LDouble *p; + p = &ST(st_index); + *p = ST0 - *p; +} + +void helper_fdiv_STN_ST0(int st_index) +{ + CPU86_LDouble *p; + p = &ST(st_index); + *p = helper_fdiv(*p, ST0); +} + +void helper_fdivr_STN_ST0(int st_index) +{ + CPU86_LDouble *p; + p = &ST(st_index); + *p = helper_fdiv(ST0, *p); +} + +/* misc FPU operations */ +void helper_fchs_ST0(void) +{ + ST0 = floatx_chs(ST0); +} + +void helper_fabs_ST0(void) +{ + ST0 = floatx_abs(ST0); +} + +void helper_fld1_ST0(void) +{ + ST0 = f15rk[1]; +} + +void helper_fldl2t_ST0(void) +{ + ST0 = f15rk[6]; +} + +void helper_fldl2e_ST0(void) +{ + ST0 = f15rk[5]; +} + +void helper_fldpi_ST0(void) +{ + ST0 = f15rk[2]; +} + +void helper_fldlg2_ST0(void) +{ + ST0 = f15rk[3]; +} + +void helper_fldln2_ST0(void) +{ + ST0 = f15rk[4]; +} + +void helper_fldz_ST0(void) +{ + ST0 = f15rk[0]; +} + +void helper_fldz_FT0(void) +{ + FT0 = f15rk[0]; +} + +#ifndef VBOX +uint32_t helper_fnstsw(void) +#else +RTCCUINTREG helper_fnstsw(void) +#endif +{ + return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; +} + +#ifndef VBOX +uint32_t helper_fnstcw(void) +#else +RTCCUINTREG helper_fnstcw(void) +#endif +{ + return env->fpuc; +} + +static void update_fp_status(void) +{ + int rnd_type; + + /* set rounding mode */ + switch(env->fpuc & RC_MASK) { + default: + case RC_NEAR: + rnd_type = float_round_nearest_even; + break; + case RC_DOWN: + rnd_type = float_round_down; + break; + case RC_UP: + rnd_type = float_round_up; + break; + case RC_CHOP: + rnd_type = float_round_to_zero; + break; + } + set_float_rounding_mode(rnd_type, &env->fp_status); +#ifdef FLOATX80 + switch((env->fpuc >> 8) & 3) { + case 0: + rnd_type = 32; + break; + case 2: + rnd_type = 64; + break; + case 3: + default: + rnd_type = 80; + break; + } + set_floatx80_rounding_precision(rnd_type, &env->fp_status); +#endif +} + +void helper_fldcw(uint32_t val) +{ + env->fpuc = val; + update_fp_status(); +} + +void helper_fclex(void) +{ + env->fpus &= 0x7f00; +} + +void helper_fwait(void) +{ + if (env->fpus & FPUS_SE) + fpu_raise_exception(); +} + +void helper_fninit(void) +{ + env->fpus = 0; + env->fpstt = 0; + env->fpuc = 0x37f; + env->fptags[0] = 1; + env->fptags[1] = 1; + env->fptags[2] = 1; + env->fptags[3] = 1; + env->fptags[4] = 1; + env->fptags[5] = 1; + env->fptags[6] = 1; + env->fptags[7] = 1; +} + +/* BCD ops */ + +void helper_fbld_ST0(target_ulong ptr) +{ + CPU86_LDouble tmp; + uint64_t val; + unsigned int v; + int i; + + val = 0; + for(i = 8; i >= 0; i--) { + v = ldub(ptr + i); + val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); + } + tmp = val; + if (ldub(ptr + 9) & 0x80) + tmp = -tmp; + fpush(); + ST0 = tmp; +} + +void helper_fbst_ST0(target_ulong ptr) +{ + int v; + target_ulong mem_ref, mem_end; + int64_t val; + + val = floatx_to_int64(ST0, &env->fp_status); + mem_ref = ptr; + mem_end = mem_ref + 9; + if (val < 0) { + stb(mem_end, 0x80); + val = -val; + } else { + stb(mem_end, 0x00); + } + while (mem_ref < mem_end) { + if (val == 0) + break; + v = val % 100; + val = val / 100; + v = ((v / 10) << 4) | (v % 10); + stb(mem_ref++, v); + } + while (mem_ref < mem_end) { + stb(mem_ref++, 0); + } +} + +void helper_f2xm1(void) +{ + ST0 = pow(2.0,ST0) - 1.0; +} + +void helper_fyl2x(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if (fptemp>0.0){ + fptemp = log(fptemp)/log(2.0); /* log2(ST) */ + ST1 *= fptemp; + fpop(); + } else { + env->fpus &= (~0x4700); + env->fpus |= 0x400; + } +} + +void helper_fptan(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = tan(fptemp); + fpush(); + ST0 = 1.0; + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**52 only */ + } +} + +void helper_fpatan(void) +{ + CPU86_LDouble fptemp, fpsrcop; + + fpsrcop = ST1; + fptemp = ST0; + ST1 = atan2(fpsrcop,fptemp); + fpop(); +} + +void helper_fxtract(void) +{ + CPU86_LDoubleU temp; + unsigned int expdif; + + temp.d = ST0; + expdif = EXPD(temp) - EXPBIAS; + /*DP exponent bias*/ + ST0 = expdif; + fpush(); + BIASEXPONENT(temp); + ST0 = temp.d; +} + +void helper_fprem1(void) +{ + CPU86_LDouble dblq, fpsrcop, fptemp; + CPU86_LDoubleU fpsrcop1, fptemp1; + int expdif; + signed long long int q; + +#ifndef VBOX /* Unfortunately, we cannot handle isinf/isnan easily in wrapper */ + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) { +#else + if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) { +#endif + ST0 = 0.0 / 0.0; /* NaN */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + return; + } + + fpsrcop = ST0; + fptemp = ST1; + fpsrcop1.d = fpsrcop; + fptemp1.d = fptemp; + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + + if (expdif < 0) { + /* optimisation? taken from the AMD docs */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* ST0 is unchanged */ + return; + } + + if (expdif < 53) { + dblq = fpsrcop / fptemp; + /* round dblq towards nearest integer */ + dblq = rint(dblq); + ST0 = fpsrcop - fptemp * dblq; + + /* convert dblq to q by truncating towards zero */ + if (dblq < 0.0) + q = (signed long long int)(-dblq); + else + q = (signed long long int)dblq; + + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C3,C1) <-- (q2,q1,q0) */ + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + } else { + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, expdif - 50); + fpsrcop = (ST0 / ST1) / fptemp; + /* fpsrcop = integer obtained by chopping */ + fpsrcop = (fpsrcop < 0.0) ? + -(floor(fabs(fpsrcop))) : floor(fpsrcop); + ST0 -= (ST1 * fpsrcop * fptemp); + } +} + +void helper_fprem(void) +{ + CPU86_LDouble dblq, fpsrcop, fptemp; + CPU86_LDoubleU fpsrcop1, fptemp1; + int expdif; + signed long long int q; + +#ifndef VBOX /* Unfortunately, we cannot easily handle isinf/isnan in wrapper */ + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) { +#else + if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) { +#endif + ST0 = 0.0 / 0.0; /* NaN */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + return; + } + + fpsrcop = (CPU86_LDouble)ST0; + fptemp = (CPU86_LDouble)ST1; + fpsrcop1.d = fpsrcop; + fptemp1.d = fptemp; + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + + if (expdif < 0) { + /* optimisation? taken from the AMD docs */ + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* ST0 is unchanged */ + return; + } + + if ( expdif < 53 ) { + dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/; + /* round dblq towards zero */ + dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); + ST0 = fpsrcop/*ST0*/ - fptemp * dblq; + + /* convert dblq to q by truncating towards zero */ + if (dblq < 0.0) + q = (signed long long int)(-dblq); + else + q = (signed long long int)dblq; + + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C3,C1) <-- (q2,q1,q0) */ + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + } else { + int N = 32 + (expdif % 32); /* as per AMD docs */ + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, (double)(expdif - N)); + fpsrcop = (ST0 / ST1) / fptemp; + /* fpsrcop = integer obtained by chopping */ + fpsrcop = (fpsrcop < 0.0) ? + -(floor(fabs(fpsrcop))) : floor(fpsrcop); + ST0 -= (ST1 * fpsrcop * fptemp); + } +} + +void helper_fyl2xp1(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp+1.0)>0.0) { + fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */ + ST1 *= fptemp; + fpop(); + } else { + env->fpus &= (~0x4700); + env->fpus |= 0x400; + } +} + +void helper_fsqrt(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if (fptemp<0.0) { + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + env->fpus |= 0x400; + } + ST0 = sqrt(fptemp); +} + +void helper_fsincos(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = sin(fptemp); + fpush(); + ST0 = cos(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**63 only */ + } +} + +void helper_frndint(void) +{ + ST0 = floatx_round_to_int(ST0, &env->fp_status); +} + +void helper_fscale(void) +{ + ST0 = ldexp (ST0, (int)(ST1)); +} + +void helper_fsin(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = sin(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg| < 2**53 only */ + } +} + +void helper_fcos(void) +{ + CPU86_LDouble fptemp; + + fptemp = ST0; + if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = cos(fptemp); + env->fpus &= (~0x400); /* C2 <-- 0 */ + /* the above code is for |arg5 < 2**63 only */ + } +} + +void helper_fxam_ST0(void) +{ + CPU86_LDoubleU temp; + int expdif; + + temp.d = ST0; + + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ + if (SIGND(temp)) + env->fpus |= 0x200; /* C1 <-- 1 */ + + /* XXX: test fptags too */ + expdif = EXPD(temp); + if (expdif == MAXEXPD) { +#ifdef USE_X86LDOUBLE + if (MANTD(temp) == 0x8000000000000000ULL) +#else + if (MANTD(temp) == 0) +#endif + env->fpus |= 0x500 /*Infinity*/; + else + env->fpus |= 0x100 /*NaN*/; + } else if (expdif == 0) { + if (MANTD(temp) == 0) + env->fpus |= 0x4000 /*Zero*/; + else + env->fpus |= 0x4400 /*Denormal*/; + } else { + env->fpus |= 0x400; + } +} + +void helper_fstenv(target_ulong ptr, int data32) +{ + int fpus, fptag, exp, i; + uint64_t mant; + CPU86_LDoubleU tmp; + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i=7; i>=0; i--) { + fptag <<= 2; + if (env->fptags[i]) { + fptag |= 3; + } else { + tmp.d = env->fpregs[i].d; + exp = EXPD(tmp); + mant = MANTD(tmp); + if (exp == 0 && mant == 0) { + /* zero */ + fptag |= 1; + } else if (exp == 0 || exp == MAXEXPD +#ifdef USE_X86LDOUBLE + || (mant & (1LL << 63)) == 0 +#endif + ) { + /* NaNs, infinity, denormal */ + fptag |= 2; + } + } + } + if (data32) { + /* 32 bit */ + stl(ptr, env->fpuc); + stl(ptr + 4, fpus); + stl(ptr + 8, fptag); + stl(ptr + 12, 0); /* fpip */ + stl(ptr + 16, 0); /* fpcs */ + stl(ptr + 20, 0); /* fpoo */ + stl(ptr + 24, 0); /* fpos */ + } else { + /* 16 bit */ + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag); + stw(ptr + 6, 0); + stw(ptr + 8, 0); + stw(ptr + 10, 0); + stw(ptr + 12, 0); + } +} + +void helper_fldenv(target_ulong ptr, int data32) +{ + int i, fpus, fptag; + + if (data32) { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 4); + fptag = lduw(ptr + 8); + } + else { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + } + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag & 3) == 3); + fptag >>= 2; + } +} + +void helper_fsave(target_ulong ptr, int data32) +{ + CPU86_LDouble tmp; + int i; + + helper_fstenv(ptr, data32); + + ptr += (14 << data32); + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt(tmp, ptr); + ptr += 10; + } + + /* fninit */ + env->fpus = 0; + env->fpstt = 0; + env->fpuc = 0x37f; + env->fptags[0] = 1; + env->fptags[1] = 1; + env->fptags[2] = 1; + env->fptags[3] = 1; + env->fptags[4] = 1; + env->fptags[5] = 1; + env->fptags[6] = 1; + env->fptags[7] = 1; +} + +void helper_frstor(target_ulong ptr, int data32) +{ + CPU86_LDouble tmp; + int i; + + helper_fldenv(ptr, data32); + ptr += (14 << data32); + + for(i = 0;i < 8; i++) { + tmp = helper_fldt(ptr); + ST(i) = tmp; + ptr += 10; + } +} + +void helper_fxsave(target_ulong ptr, int data64) +{ + int fpus, fptag, i, nb_xmm_regs; + CPU86_LDouble tmp; + target_ulong addr; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception(EXCP0D_GPF); + } + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for(i = 0; i < 8; i++) { + fptag |= (env->fptags[i] << i); + } + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag ^ 0xff); +#ifdef TARGET_X86_64 + if (data64) { + stq(ptr + 0x08, 0); /* rip */ + stq(ptr + 0x10, 0); /* rdp */ + } else +#endif + { + stl(ptr + 0x08, 0); /* eip */ + stl(ptr + 0x0c, 0); /* sel */ + stl(ptr + 0x10, 0); /* dp */ + stl(ptr + 0x14, 0); /* sel */ + } + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt(tmp, addr); + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + stl(ptr + 0x18, env->mxcsr); /* mxcsr */ + stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */ + if (env->hflags & HF_CS64_MASK) + nb_xmm_regs = 16; + else + nb_xmm_regs = 8; + addr = ptr + 0xa0; + /* Fast FXSAVE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + for(i = 0; i < nb_xmm_regs; i++) { + stq(addr, env->xmm_regs[i].XMM_Q(0)); + stq(addr + 8, env->xmm_regs[i].XMM_Q(1)); + addr += 16; + } + } + } +} + +void helper_fxrstor(target_ulong ptr, int data64) +{ + int i, fpus, fptag, nb_xmm_regs; + CPU86_LDouble tmp; + target_ulong addr; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception(EXCP0D_GPF); + } + + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + fptag ^= 0xff; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag >> i) & 1); + } + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = helper_fldt(addr); + ST(i) = tmp; + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + env->mxcsr = ldl(ptr + 0x18); + //ldl(ptr + 0x1c); + if (env->hflags & HF_CS64_MASK) + nb_xmm_regs = 16; + else + nb_xmm_regs = 8; + addr = ptr + 0xa0; + /* Fast FXRESTORE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + for(i = 0; i < nb_xmm_regs; i++) { +#if !defined(VBOX) || __GNUC__ < 4 + env->xmm_regs[i].XMM_Q(0) = ldq(addr); + env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8); +#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */ +# if 1 + env->xmm_regs[i].XMM_L(0) = ldl(addr); + env->xmm_regs[i].XMM_L(1) = ldl(addr + 4); + env->xmm_regs[i].XMM_L(2) = ldl(addr + 8); + env->xmm_regs[i].XMM_L(3) = ldl(addr + 12); +# else + /* this works fine on Mac OS X, gcc 4.0.1 */ + uint64_t u64 = ldq(addr); + env->xmm_regs[i].XMM_Q(0); + u64 = ldq(addr + 4); + env->xmm_regs[i].XMM_Q(1) = u64; +# endif +#endif + addr += 16; + } + } + } +} + +#ifndef USE_X86LDOUBLE + +void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f) +{ + CPU86_LDoubleU temp; + int e; + + temp.d = f; + /* mantissa */ + *pmant = (MANTD(temp) << 11) | (1LL << 63); + /* exponent + sign */ + e = EXPD(temp) - EXPBIAS + 16383; + e |= SIGND(temp) >> 16; + *pexp = e; +} + +CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper) +{ + CPU86_LDoubleU temp; + int e; + uint64_t ll; + + /* XXX: handle overflow ? */ + e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */ + e |= (upper >> 4) & 0x800; /* sign */ + ll = (mant >> 11) & ((1LL << 52) - 1); +#ifdef __arm__ + temp.l.upper = (e << 20) | (ll >> 32); + temp.l.lower = ll; +#else + temp.ll = ll | ((uint64_t)e << 52); +#endif + return temp.d; +} + +#else + +void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f) +{ + CPU86_LDoubleU temp; + + temp.d = f; + *pmant = temp.l.lower; + *pexp = temp.l.upper; +} + +CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper) +{ + CPU86_LDoubleU temp; + + temp.l.upper = upper; + temp.l.lower = mant; + return temp.d; +} +#endif + +#ifdef TARGET_X86_64 + +//#define DEBUG_MULDIV + +static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ + *plow += a; + /* carry test */ + if (*plow < a) + (*phigh)++; + *phigh += b; +} + +static void neg128(uint64_t *plow, uint64_t *phigh) +{ + *plow = ~ *plow; + *phigh = ~ *phigh; + add128(plow, phigh, 1, 0); +} + +/* return TRUE if overflow */ +static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b) +{ + uint64_t q, r, a1, a0; + int i, qb, ab; + + a0 = *plow; + a1 = *phigh; + if (a1 == 0) { + q = a0 / b; + r = a0 % b; + *plow = q; + *phigh = r; + } else { + if (a1 >= b) + return 1; + /* XXX: use a better algorithm */ + for(i = 0; i < 64; i++) { + ab = a1 >> 63; + a1 = (a1 << 1) | (a0 >> 63); + if (ab || a1 >= b) { + a1 -= b; + qb = 1; + } else { + qb = 0; + } + a0 = (a0 << 1) | qb; + } +#if defined(DEBUG_MULDIV) + printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n", + *phigh, *plow, b, a0, a1); +#endif + *plow = a0; + *phigh = a1; + } + return 0; +} + +/* return TRUE if overflow */ +static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b) +{ + int sa, sb; + sa = ((int64_t)*phigh < 0); + if (sa) + neg128(plow, phigh); + sb = (b < 0); + if (sb) + b = -b; + if (div64(plow, phigh, b) != 0) + return 1; + if (sa ^ sb) { + if (*plow > (1ULL << 63)) + return 1; + *plow = - *plow; + } else { + if (*plow >= (1ULL << 63)) + return 1; + } + if (sa) + *phigh = - *phigh; + return 0; +} + +void helper_mulq_EAX_T0(target_ulong t0) +{ + uint64_t r0, r1; + + mulu64(&r0, &r1, EAX, t0); + EAX = r0; + EDX = r1; + CC_DST = r0; + CC_SRC = r1; +} + +void helper_imulq_EAX_T0(target_ulong t0) +{ + uint64_t r0, r1; + + muls64(&r0, &r1, EAX, t0); + EAX = r0; + EDX = r1; + CC_DST = r0; + CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); +} + +target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1) +{ + uint64_t r0, r1; + + muls64(&r0, &r1, t0, t1); + CC_DST = r0; + CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); + return r0; +} + +void helper_divq_EAX(target_ulong t0) +{ + uint64_t r0, r1; + if (t0 == 0) { + raise_exception(EXCP00_DIVZ); + } + r0 = EAX; + r1 = EDX; + if (div64(&r0, &r1, t0)) + raise_exception(EXCP00_DIVZ); + EAX = r0; + EDX = r1; +} + +void helper_idivq_EAX(target_ulong t0) +{ + uint64_t r0, r1; + if (t0 == 0) { + raise_exception(EXCP00_DIVZ); + } + r0 = EAX; + r1 = EDX; + if (idiv64(&r0, &r1, t0)) + raise_exception(EXCP00_DIVZ); + EAX = r0; + EDX = r1; +} +#endif + +static void do_hlt(void) +{ + env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */ + env->halted = 1; + env->exception_index = EXCP_HLT; + cpu_loop_exit(); +} + +void helper_hlt(int next_eip_addend) +{ + helper_svm_check_intercept_param(SVM_EXIT_HLT, 0); + EIP += next_eip_addend; + + do_hlt(); +} + +void helper_monitor(target_ulong ptr) +{ +#ifdef VBOX + if ((uint32_t)ECX > 1) + raise_exception(EXCP0D_GPF); +#else /* !VBOX */ + if ((uint32_t)ECX != 0) + raise_exception(EXCP0D_GPF); +#endif /* !VBOX */ + /* XXX: store address ? */ + helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0); +} + +void helper_mwait(int next_eip_addend) +{ + if ((uint32_t)ECX != 0) + raise_exception(EXCP0D_GPF); +#ifdef VBOX + helper_hlt(next_eip_addend); +#else /* !VBOX */ + helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0); + EIP += next_eip_addend; + + /* XXX: not complete but not completely erroneous */ + if (env->cpu_index != 0 || env->next_cpu != NULL) { + /* more than one CPU: do not sleep because another CPU may + wake this one */ + } else { + do_hlt(); + } +#endif /* !VBOX */ +} + +void helper_debug(void) +{ + env->exception_index = EXCP_DEBUG; + cpu_loop_exit(); +} + +void helper_reset_rf(void) +{ + env->eflags &= ~RF_MASK; +} + +void helper_raise_interrupt(int intno, int next_eip_addend) +{ + raise_interrupt(intno, 1, 0, next_eip_addend); +} + +void helper_raise_exception(int exception_index) +{ + raise_exception(exception_index); +} + +void helper_cli(void) +{ + env->eflags &= ~IF_MASK; +} + +void helper_sti(void) +{ + env->eflags |= IF_MASK; +} + +#ifdef VBOX +void helper_cli_vme(void) +{ + env->eflags &= ~VIF_MASK; +} + +void helper_sti_vme(void) +{ + /* First check, then change eflags according to the AMD manual */ + if (env->eflags & VIP_MASK) { + raise_exception(EXCP0D_GPF); + } + env->eflags |= VIF_MASK; +} +#endif /* VBOX */ + +#if 0 +/* vm86plus instructions */ +void helper_cli_vm(void) +{ + env->eflags &= ~VIF_MASK; +} + +void helper_sti_vm(void) +{ + env->eflags |= VIF_MASK; + if (env->eflags & VIP_MASK) { + raise_exception(EXCP0D_GPF); + } +} +#endif + +void helper_set_inhibit_irq(void) +{ + env->hflags |= HF_INHIBIT_IRQ_MASK; +} + +void helper_reset_inhibit_irq(void) +{ + env->hflags &= ~HF_INHIBIT_IRQ_MASK; +} + +void helper_boundw(target_ulong a0, int v) +{ + int low, high; + low = ldsw(a0); + high = ldsw(a0 + 2); + v = (int16_t)v; + if (v < low || v > high) { + raise_exception(EXCP05_BOUND); + } +} + +void helper_boundl(target_ulong a0, int v) +{ + int low, high; + low = ldl(a0); + high = ldl(a0 + 4); + if (v < low || v > high) { + raise_exception(EXCP05_BOUND); + } +} + +static float approx_rsqrt(float a) +{ + return 1.0 / sqrt(a); +} + +static float approx_rcp(float a) +{ + return 1.0 / a; +} + +#if !defined(CONFIG_USER_ONLY) + +#define MMUSUFFIX _mmu + +#define SHIFT 0 +#include "softmmu_template.h" + +#define SHIFT 1 +#include "softmmu_template.h" + +#define SHIFT 2 +#include "softmmu_template.h" + +#define SHIFT 3 +#include "softmmu_template.h" + +#endif + +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) +/* This code assumes real physical address always fit into host CPU reg, + which is wrong in general, but true for our current use cases. */ +RTCCUINTREG REGPARM __ldb_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadS8(addr); +} +RTCCUINTREG REGPARM __ldub_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU8(addr); +} +void REGPARM __stb_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val) +{ + remR3PhysWriteU8(addr, val); +} +RTCCUINTREG REGPARM __ldw_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadS16(addr); +} +RTCCUINTREG REGPARM __lduw_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU16(addr); +} +void REGPARM __stw_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val) +{ + remR3PhysWriteU16(addr, val); +} +RTCCUINTREG REGPARM __ldl_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadS32(addr); +} +RTCCUINTREG REGPARM __ldul_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU32(addr); +} +void REGPARM __stl_vbox_phys(RTCCUINTREG addr, RTCCUINTREG val) +{ + remR3PhysWriteU32(addr, val); +} +uint64_t REGPARM __ldq_vbox_phys(RTCCUINTREG addr) +{ + return remR3PhysReadU64(addr); +} +void REGPARM __stq_vbox_phys(RTCCUINTREG addr, uint64_t val) +{ + remR3PhysWriteU64(addr, val); +} +#endif /* VBOX */ + +#if !defined(CONFIG_USER_ONLY) +/* try to fill the TLB and return an exception if error. If retaddr is + NULL, it means that the function was called in C code (i.e. not + from generated code or from helper.c) */ +/* XXX: fix it to restore all registers */ +void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr) +{ + TranslationBlock *tb; + int ret; + uintptr_t pc; + CPUX86State *saved_env; + + /* XXX: hack to restore env in all cases, even if not called from + generated code */ + saved_env = env; + env = cpu_single_env; + + ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx, 1); + if (ret) { + if (retaddr) { + /* now we have a real cpu fault */ + pc = (uintptr_t)retaddr; + tb = tb_find_pc(pc); + if (tb) { + /* the PC is inside the translated code. It means that we have + a virtual CPU fault */ + cpu_restore_state(tb, env, pc, NULL); + } + } + raise_exception_err(env->exception_index, env->error_code); + } + env = saved_env; +} +#endif + +#ifdef VBOX + +/** + * Correctly computes the eflags. + * @returns eflags. + * @param env1 CPU environment. + */ +uint32_t raw_compute_eflags(CPUX86State *env1) +{ + CPUX86State *savedenv = env; + uint32_t efl; + env = env1; + efl = compute_eflags(); + env = savedenv; + return efl; +} + +/** + * Reads byte from virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + */ +uint8_t read_byte(CPUX86State *env1, target_ulong addr) +{ + CPUX86State *savedenv = env; + uint8_t u8; + env = env1; + u8 = ldub_kernel(addr); + env = savedenv; + return u8; +} + +/** + * Reads byte from virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + */ +uint16_t read_word(CPUX86State *env1, target_ulong addr) +{ + CPUX86State *savedenv = env; + uint16_t u16; + env = env1; + u16 = lduw_kernel(addr); + env = savedenv; + return u16; +} + +/** + * Reads byte from virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + */ +uint32_t read_dword(CPUX86State *env1, target_ulong addr) +{ + CPUX86State *savedenv = env; + uint32_t u32; + env = env1; + u32 = ldl_kernel(addr); + env = savedenv; + return u32; +} + +/** + * Writes byte to virtual address in guest memory area. + * XXX: is it working for any addresses? swapped out pages? + * @returns read data byte. + * @param env1 CPU environment. + * @param pvAddr GC Virtual address. + * @param val byte value + */ +void write_byte(CPUX86State *env1, target_ulong addr, uint8_t val) +{ + CPUX86State *savedenv = env; + env = env1; + stb(addr, val); + env = savedenv; +} + +void write_word(CPUX86State *env1, target_ulong addr, uint16_t val) +{ + CPUX86State *savedenv = env; + env = env1; + stw(addr, val); + env = savedenv; +} + +void write_dword(CPUX86State *env1, target_ulong addr, uint32_t val) +{ + CPUX86State *savedenv = env; + env = env1; + stl(addr, val); + env = savedenv; +} + +/** + * Correctly loads selector into segment register with updating internal + * qemu data/caches. + * @param env1 CPU environment. + * @param seg_reg Segment register. + * @param selector Selector to load. + */ +void sync_seg(CPUX86State *env1, int seg_reg, int selector) +{ + CPUX86State *savedenv = env; +#ifdef FORCE_SEGMENT_SYNC + jmp_buf old_buf; +#endif + + env = env1; + + if ( env->eflags & X86_EFL_VM + || !(env->cr[0] & X86_CR0_PE)) + { + load_seg_vm(seg_reg, selector); + + env = savedenv; + + /* Successful sync. */ + Assert(env1->segs[seg_reg].newselector == 0); + } + else + { + /* For some reasons, it works even w/o save/restore of the jump buffer, so as code is + time critical - let's not do that */ +#ifdef FORCE_SEGMENT_SYNC + memcpy(&old_buf, &env1->jmp_env, sizeof(old_buf)); +#endif + if (setjmp(env1->jmp_env) == 0) + { + if (seg_reg == R_CS) + { + uint32_t e1, e2; + e1 = e2 = 0; + load_segment(&e1, &e2, selector); + cpu_x86_load_seg_cache(env, R_CS, selector, + get_seg_base(e1, e2), + get_seg_limit(e1, e2), + e2); + } + else + helper_load_seg(seg_reg, selector); + /* We used to use tss_load_seg(seg_reg, selector); which, for some reasons ignored + loading 0 selectors, what, in order, lead to subtle problems like #3588 */ + + env = savedenv; + + /* Successful sync. */ + Assert(env1->segs[seg_reg].newselector == 0); + } + else + { + env = savedenv; + + /* Postpone sync until the guest uses the selector. */ + env1->segs[seg_reg].selector = selector; /* hidden values are now incorrect, but will be resynced when this register is accessed. */ + env1->segs[seg_reg].newselector = selector; + Log(("sync_seg: out of sync seg_reg=%d selector=%#x\n", seg_reg, selector)); + env1->exception_index = -1; + env1->error_code = 0; + env1->old_exception = -1; + } +#ifdef FORCE_SEGMENT_SYNC + memcpy(&env1->jmp_env, &old_buf, sizeof(old_buf)); +#endif + } + +} + +DECLINLINE(void) tb_reset_jump(TranslationBlock *tb, int n) +{ + tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n])); +} + + +int emulate_single_instr(CPUX86State *env1) +{ + TranslationBlock *tb; + TranslationBlock *current; + int flags; + uint8_t *tc_ptr; + target_ulong old_eip; + + /* ensures env is loaded! */ + CPUX86State *savedenv = env; + env = env1; + + RAWEx_ProfileStart(env, STATS_EMULATE_SINGLE_INSTR); + + current = env->current_tb; + env->current_tb = NULL; + flags = env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK)); + + /* + * Translate only one instruction. + */ + ASMAtomicOrU32(&env->state, CPU_EMULATE_SINGLE_INSTR); + tb = tb_gen_code(env, env->eip + env->segs[R_CS].base, + env->segs[R_CS].base, flags, 0); + + ASMAtomicAndU32(&env->state, ~CPU_EMULATE_SINGLE_INSTR); + + + /* tb_link_phys: */ + tb->jmp_first = (TranslationBlock *)((intptr_t)tb | 2); + tb->jmp_next[0] = NULL; + tb->jmp_next[1] = NULL; + Assert(tb->jmp_next[0] == NULL); + Assert(tb->jmp_next[1] == NULL); + if (tb->tb_next_offset[0] != 0xffff) + tb_reset_jump(tb, 0); + if (tb->tb_next_offset[1] != 0xffff) + tb_reset_jump(tb, 1); + + /* + * Execute it using emulation + */ + old_eip = env->eip; + env->current_tb = tb; + + /* + * eip remains the same for repeated instructions; no idea why qemu doesn't do a jump inside the generated code + * perhaps not a very safe hack + */ + while (old_eip == env->eip) + { + tc_ptr = tb->tc_ptr; + +#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) + int fake_ret; + tcg_qemu_tb_exec(tc_ptr, fake_ret); +#else + tcg_qemu_tb_exec(tc_ptr); +#endif + + /* + * Exit once we detect an external interrupt and interrupts are enabled + */ + if ( (env->interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_TIMER)) + || ( (env->eflags & IF_MASK) + && !(env->hflags & HF_INHIBIT_IRQ_MASK) + && (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_HARD) ) + ) + { + break; + } + if (env->interrupt_request & CPU_INTERRUPT_EXTERNAL_FLUSH_TLB) { + tlb_flush(env, true); + } + } + env->current_tb = current; + + tb_phys_invalidate(tb, -1); + tb_free(tb); +/* + Assert(tb->tb_next_offset[0] == 0xffff); + Assert(tb->tb_next_offset[1] == 0xffff); + Assert(tb->tb_next[0] == 0xffff); + Assert(tb->tb_next[1] == 0xffff); + Assert(tb->jmp_next[0] == NULL); + Assert(tb->jmp_next[1] == NULL); + Assert(tb->jmp_first == NULL); */ + + RAWEx_ProfileStop(env, STATS_EMULATE_SINGLE_INSTR); + + /* + * Execute the next instruction when we encounter instruction fusing. + */ + if (env->hflags & HF_INHIBIT_IRQ_MASK) + { + Log(("REM: Emulating next instruction due to instruction fusing (HF_INHIBIT_IRQ_MASK) at %RGv\n", env->eip)); + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + emulate_single_instr(env); + } + + env = savedenv; + return 0; +} + +/** + * Correctly loads a new ldtr selector. + * + * @param env1 CPU environment. + * @param selector Selector to load. + */ +void sync_ldtr(CPUX86State *env1, int selector) +{ + CPUX86State *saved_env = env; + if (setjmp(env1->jmp_env) == 0) + { + env = env1; + helper_lldt(selector); + env = saved_env; + } + else + { + env = saved_env; +#ifdef VBOX_STRICT + cpu_abort(env1, "sync_ldtr: selector=%#x\n", selector); +#endif + } +} + +int get_ss_esp_from_tss_raw(CPUX86State *env1, uint32_t *ss_ptr, + uint32_t *esp_ptr, int dpl) +{ + int type, index, shift; + + CPUX86State *savedenv = env; + env = env1; + + if (!(env->tr.flags & DESC_P_MASK)) + cpu_abort(env, "invalid tss"); + type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; + if ((type & 7) != 3) + cpu_abort(env, "invalid tss type %d", type); + shift = type >> 3; + index = (dpl * 4 + 2) << shift; + if (index + (4 << shift) - 1 > env->tr.limit) + { + env = savedenv; + return 0; + } + //raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc); + + if (shift == 0) { + *esp_ptr = lduw_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 2); + } else { + *esp_ptr = ldl_kernel(env->tr.base + index); + *ss_ptr = lduw_kernel(env->tr.base + index + 4); + } + + env = savedenv; + return 1; +} + +//***************************************************************************** +// Needs to be at the bottom of the file (overriding macros) + +static inline CPU86_LDouble helper_fldt_raw(uint8_t *ptr) +{ +#ifdef USE_X86LDOUBLE + CPU86_LDoubleU tmp; + tmp.l.lower = *(uint64_t const *)ptr; + tmp.l.upper = *(uint16_t const *)(ptr + 8); + return tmp.d; +#else +# error "Busted FPU saving/restoring!" + return *(CPU86_LDouble *)ptr; +#endif +} + +static inline void helper_fstt_raw(CPU86_LDouble f, uint8_t *ptr) +{ +#ifdef USE_X86LDOUBLE + CPU86_LDoubleU tmp; + tmp.d = f; + *(uint64_t *)(ptr + 0) = tmp.l.lower; + *(uint16_t *)(ptr + 8) = tmp.l.upper; + *(uint16_t *)(ptr + 10) = 0; + *(uint32_t *)(ptr + 12) = 0; + AssertCompile(sizeof(long double) > 8); +#else +# error "Busted FPU saving/restoring!" + *(CPU86_LDouble *)ptr = f; +#endif +} + +#undef stw +#undef stl +#undef stq +#define stw(a,b) *(uint16_t *)(a) = (uint16_t)(b) +#define stl(a,b) *(uint32_t *)(a) = (uint32_t)(b) +#define stq(a,b) *(uint64_t *)(a) = (uint64_t)(b) + +//***************************************************************************** +void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr) +{ + int fpus, fptag, i, nb_xmm_regs; + CPU86_LDouble tmp; + uint8_t *addr; + int data64 = !!(env->hflags & HF_LMA_MASK); + + if (env->cpuid_features & CPUID_FXSR) + { + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for(i = 0; i < 8; i++) { + fptag |= (env->fptags[i] << i); + } + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag ^ 0xff); + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt_raw(tmp, addr); + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + stl(ptr + 0x18, env->mxcsr); /* mxcsr */ + stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */ + nb_xmm_regs = 8 << data64; + addr = ptr + 0xa0; + for(i = 0; i < nb_xmm_regs; i++) { +#if __GNUC__ < 4 + stq(addr, env->xmm_regs[i].XMM_Q(0)); + stq(addr + 8, env->xmm_regs[i].XMM_Q(1)); +#else /* VBOX + __GNUC__ >= 4: gcc 4.x compiler bug - it runs out of registers for the 64-bit value. */ + stl(addr, env->xmm_regs[i].XMM_L(0)); + stl(addr + 4, env->xmm_regs[i].XMM_L(1)); + stl(addr + 8, env->xmm_regs[i].XMM_L(2)); + stl(addr + 12, env->xmm_regs[i].XMM_L(3)); +#endif + addr += 16; + } + } + } + else + { + PX86FPUSTATE fp = (PX86FPUSTATE)ptr; + int fptag; + + fp->FCW = env->fpuc; + fp->FSW = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i=7; i>=0; i--) { + fptag <<= 2; + if (env->fptags[i]) { + fptag |= 3; + } else { + /* the FPU automatically computes it */ + } + } + fp->FTW = fptag; + + for(i = 0;i < 8; i++) { + tmp = ST(i); + helper_fstt_raw(tmp, &fp->regs[i].au8[0]); + } + } +} + +//***************************************************************************** +#undef lduw +#undef ldl +#undef ldq +#define lduw(a) *(uint16_t *)(a) +#define ldl(a) *(uint32_t *)(a) +#define ldq(a) *(uint64_t *)(a) +//***************************************************************************** +void save_raw_fp_state(CPUX86State *env, uint8_t *ptr) +{ + int i, fpus, fptag, nb_xmm_regs; + CPU86_LDouble tmp; + uint8_t *addr; + int data64 = !!(env->hflags & HF_LMA_MASK); /* don't use HF_CS64_MASK here as cs hasn't been synced when this function is called. */ + + if (env->cpuid_features & CPUID_FXSR) + { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + fptag ^= 0xff; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag >> i) & 1); + } + + addr = ptr + 0x20; + for(i = 0;i < 8; i++) { + tmp = helper_fldt_raw(addr); + ST(i) = tmp; + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it, endianness */ + env->mxcsr = ldl(ptr + 0x18); + //ldl(ptr + 0x1c); + nb_xmm_regs = 8 << data64; + addr = ptr + 0xa0; + for(i = 0; i < nb_xmm_regs; i++) { +#if HC_ARCH_BITS == 32 + /* this is a workaround for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35135 */ + env->xmm_regs[i].XMM_L(0) = ldl(addr); + env->xmm_regs[i].XMM_L(1) = ldl(addr + 4); + env->xmm_regs[i].XMM_L(2) = ldl(addr + 8); + env->xmm_regs[i].XMM_L(3) = ldl(addr + 12); +#else + env->xmm_regs[i].XMM_Q(0) = ldq(addr); + env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8); +#endif + addr += 16; + } + } + } + else + { + PX86FPUSTATE fp = (PX86FPUSTATE)ptr; + int fptag, j; + + env->fpuc = fp->FCW; + env->fpstt = (fp->FSW >> 11) & 7; + env->fpus = fp->FSW & ~0x3800; + fptag = fp->FTW; + for(i = 0;i < 8; i++) { + env->fptags[i] = ((fptag & 3) == 3); + fptag >>= 2; + } + j = env->fpstt; + for(i = 0;i < 8; i++) { + tmp = helper_fldt_raw(&fp->regs[i].au8[0]); + ST(i) = tmp; + } + } +} +//***************************************************************************** +//***************************************************************************** + +#endif /* VBOX */ + +/* Secure Virtual Machine helpers */ + +#if defined(CONFIG_USER_ONLY) + +void helper_vmrun(int aflag, int next_eip_addend) +{ +} +void helper_vmmcall(void) +{ +} +void helper_vmload(int aflag) +{ +} +void helper_vmsave(int aflag) +{ +} +void helper_stgi(void) +{ +} +void helper_clgi(void) +{ +} +void helper_skinit(void) +{ +} +void helper_invlpga(int aflag) +{ +} +void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) +{ +} +void helper_svm_check_intercept_param(uint32_t type, uint64_t param) +{ +} + +void helper_svm_check_io(uint32_t port, uint32_t param, + uint32_t next_eip_addend) +{ +} +#else + +static inline void svm_save_seg(target_phys_addr_t addr, + const SegmentCache *sc) +{ + stw_phys(addr + offsetof(struct vmcb_seg, selector), + sc->selector); + stq_phys(addr + offsetof(struct vmcb_seg, base), + sc->base); + stl_phys(addr + offsetof(struct vmcb_seg, limit), + sc->limit); + stw_phys(addr + offsetof(struct vmcb_seg, attrib), + ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00)); +} + +static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc) +{ + unsigned int flags; + + sc->selector = lduw_phys(addr + offsetof(struct vmcb_seg, selector)); + sc->base = ldq_phys(addr + offsetof(struct vmcb_seg, base)); + sc->limit = ldl_phys(addr + offsetof(struct vmcb_seg, limit)); + flags = lduw_phys(addr + offsetof(struct vmcb_seg, attrib)); + sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12); +} + +static inline void svm_load_seg_cache(target_phys_addr_t addr, + CPUState *env, int seg_reg) +{ + SegmentCache sc1, *sc = &sc1; + svm_load_seg(addr, sc); + cpu_x86_load_seg_cache(env, seg_reg, sc->selector, + sc->base, sc->limit, sc->flags); +} + +void helper_vmrun(int aflag, int next_eip_addend) +{ + target_ulong addr; + uint32_t event_inj; + uint32_t int_ctl; + + helper_svm_check_intercept_param(SVM_EXIT_VMRUN, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr); + + env->vm_vmcb = addr; + + /* save the current CPU state in the hsave page */ + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base); + stl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base); + stl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags()); + + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es), + &env->segs[R_ES]); + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs), + &env->segs[R_CS]); + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss), + &env->segs[R_SS]); + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds), + &env->segs[R_DS]); + + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip), + EIP + next_eip_addend); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp), ESP); + stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax), EAX); + + /* load the interception bitmaps so we do not need to access the + vmcb in svm mode */ + env->intercept = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept)); + env->intercept_cr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read)); + env->intercept_cr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write)); + env->intercept_dr_read = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read)); + env->intercept_dr_write = lduw_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write)); + env->intercept_exceptions = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions)); + + /* enable intercepts */ + env->hflags |= HF_SVMI_MASK; + + env->tsc_offset = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset)); + + env->gdt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base)); + env->gdt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit)); + + env->idt.base = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base)); + env->idt.limit = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit)); + + /* clear exit_info_2 so we behave like the real hardware */ + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0); + + cpu_x86_update_cr0(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0))); + cpu_x86_update_cr4(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4))); + cpu_x86_update_cr3(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3))); + env->cr[2] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2)); + int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); + env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK); + if (int_ctl & V_INTR_MASKING_MASK) { + env->v_tpr = int_ctl & V_TPR_MASK; + env->hflags2 |= HF2_VINTR_MASK; + if (env->eflags & IF_MASK) + env->hflags2 |= HF2_HIF_MASK; + } + + cpu_load_efer(env, + ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer))); + env->eflags = 0; + load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + CC_OP = CC_OP_EFLAGS; + + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.es), + env, R_ES); + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.cs), + env, R_CS); + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ss), + env, R_SS); + svm_load_seg_cache(env->vm_vmcb + offsetof(struct vmcb, save.ds), + env, R_DS); + + EIP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip)); + env->eip = EIP; + ESP = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp)); + EAX = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax)); + env->dr[7] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7)); + env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6)); + cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl))); + + /* FIXME: guest state consistency checks */ + + switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) { + case TLB_CONTROL_DO_NOTHING: + break; + case TLB_CONTROL_FLUSH_ALL_ASID: + /* FIXME: this is not 100% correct but should work for now */ + tlb_flush(env, 1); + break; + } + + env->hflags2 |= HF2_GIF_MASK; + + if (int_ctl & V_IRQ_MASK) { + env->interrupt_request |= CPU_INTERRUPT_VIRQ; + } + + /* maybe we need to inject an event */ + event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); + if (event_inj & SVM_EVTINJ_VALID) { + uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK; + uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR; + uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)); + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err); + /* FIXME: need to implement valid_err */ + switch (event_inj & SVM_EVTINJ_TYPE_MASK) { + case SVM_EVTINJ_TYPE_INTR: + env->exception_index = vector; + env->error_code = event_inj_err; + env->exception_is_int = 0; + env->exception_next_eip = -1; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR"); + /* XXX: is it always correct ? */ + do_interrupt(vector, 0, 0, 0, 1); + break; + case SVM_EVTINJ_TYPE_NMI: + env->exception_index = EXCP02_NMI; + env->error_code = event_inj_err; + env->exception_is_int = 0; + env->exception_next_eip = EIP; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI"); + cpu_loop_exit(); + break; + case SVM_EVTINJ_TYPE_EXEPT: + env->exception_index = vector; + env->error_code = event_inj_err; + env->exception_is_int = 0; + env->exception_next_eip = -1; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT"); + cpu_loop_exit(); + break; + case SVM_EVTINJ_TYPE_SOFT: + env->exception_index = vector; + env->error_code = event_inj_err; + env->exception_is_int = 1; + env->exception_next_eip = EIP; + qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT"); + cpu_loop_exit(); + break; + } + qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code); + } +} + +void helper_vmmcall(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_VMMCALL, 0); + raise_exception(EXCP06_ILLOP); +} + +void helper_vmload(int aflag) +{ + target_ulong addr; + helper_svm_check_intercept_param(SVM_EXIT_VMLOAD, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", + addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)), + env->segs[R_FS].base); + + svm_load_seg_cache(addr + offsetof(struct vmcb, save.fs), + env, R_FS); + svm_load_seg_cache(addr + offsetof(struct vmcb, save.gs), + env, R_GS); + svm_load_seg(addr + offsetof(struct vmcb, save.tr), + &env->tr); + svm_load_seg(addr + offsetof(struct vmcb, save.ldtr), + &env->ldt); + +#ifdef TARGET_X86_64 + env->kernelgsbase = ldq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base)); + env->lstar = ldq_phys(addr + offsetof(struct vmcb, save.lstar)); + env->cstar = ldq_phys(addr + offsetof(struct vmcb, save.cstar)); + env->fmask = ldq_phys(addr + offsetof(struct vmcb, save.sfmask)); +#endif + env->star = ldq_phys(addr + offsetof(struct vmcb, save.star)); + env->sysenter_cs = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_cs)); + env->sysenter_esp = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_esp)); + env->sysenter_eip = ldq_phys(addr + offsetof(struct vmcb, save.sysenter_eip)); +} + +void helper_vmsave(int aflag) +{ + target_ulong addr; + helper_svm_check_intercept_param(SVM_EXIT_VMSAVE, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", + addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)), + env->segs[R_FS].base); + + svm_save_seg(addr + offsetof(struct vmcb, save.fs), + &env->segs[R_FS]); + svm_save_seg(addr + offsetof(struct vmcb, save.gs), + &env->segs[R_GS]); + svm_save_seg(addr + offsetof(struct vmcb, save.tr), + &env->tr); + svm_save_seg(addr + offsetof(struct vmcb, save.ldtr), + &env->ldt); + +#ifdef TARGET_X86_64 + stq_phys(addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase); + stq_phys(addr + offsetof(struct vmcb, save.lstar), env->lstar); + stq_phys(addr + offsetof(struct vmcb, save.cstar), env->cstar); + stq_phys(addr + offsetof(struct vmcb, save.sfmask), env->fmask); +#endif + stq_phys(addr + offsetof(struct vmcb, save.star), env->star); + stq_phys(addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs); + stq_phys(addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp); + stq_phys(addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip); +} + +void helper_stgi(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_STGI, 0); + env->hflags2 |= HF2_GIF_MASK; +} + +void helper_clgi(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_CLGI, 0); + env->hflags2 &= ~HF2_GIF_MASK; +} + +void helper_skinit(void) +{ + helper_svm_check_intercept_param(SVM_EXIT_SKINIT, 0); + /* XXX: not implemented */ + raise_exception(EXCP06_ILLOP); +} + +void helper_invlpga(int aflag) +{ + target_ulong addr; + helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0); + + if (aflag == 2) + addr = EAX; + else + addr = (uint32_t)EAX; + + /* XXX: could use the ASID to see if it is needed to do the + flush */ + tlb_flush_page(env, addr); +} + +void helper_svm_check_intercept_param(uint32_t type, uint64_t param) +{ + if (likely(!(env->hflags & HF_SVMI_MASK))) + return; +#ifndef VBOX + switch(type) { + case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8: + if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8: + if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7: + if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7: + if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31: + if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) { + helper_vmexit(type, param); + } + break; + case SVM_EXIT_MSR: + if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) { + /* FIXME: this should be read in at vmrun (faster this way?) */ + uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa)); + uint32_t t0, t1; + switch((uint32_t)ECX) { + case 0 ... 0x1fff: + t0 = (ECX * 2) % 8; + t1 = ECX / 8; + break; + case 0xc0000000 ... 0xc0001fff: + t0 = (8192 + ECX - 0xc0000000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + case 0xc0010000 ... 0xc0011fff: + t0 = (16384 + ECX - 0xc0010000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + default: + helper_vmexit(type, param); + t0 = 0; + t1 = 0; + break; + } + if (ldub_phys(addr + t1) & ((1 << param) << t0)) + helper_vmexit(type, param); + } + break; + default: + if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) { + helper_vmexit(type, param); + } + break; + } +#else /* VBOX */ + AssertMsgFailed(("We shouldn't be here, HM supported differently!")); +#endif /* VBOX */ +} + +void helper_svm_check_io(uint32_t port, uint32_t param, + uint32_t next_eip_addend) +{ + if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) { + /* FIXME: this should be read in at vmrun (faster this way?) */ + uint64_t addr = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa)); + uint16_t mask = (1 << ((param >> 4) & 7)) - 1; + if(lduw_phys(addr + port / 8) & (mask << (port & 7))) { + /* next EIP */ + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), + env->eip + next_eip_addend); + helper_vmexit(SVM_EXIT_IOIO, param | (port << 16)); + } + } +} + +/* Note: currently only 32 bits of exit_code are used */ +void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) +{ + uint32_t int_ctl; + + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n", + exit_code, exit_info_1, + ldq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)), + EIP); + + if(env->hflags & HF_INHIBIT_IRQ_MASK) { + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK); + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + } else { + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0); + } + + /* Save the VM state in the vmcb */ + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es), + &env->segs[R_ES]); + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs), + &env->segs[R_CS]); + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss), + &env->segs[R_SS]); + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds), + &env->segs[R_DS]); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]); + + int_ctl = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); + int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK); + int_ctl |= env->v_tpr & V_TPR_MASK; + if (env->interrupt_request & CPU_INTERRUPT_VIRQ) + int_ctl |= V_IRQ_MASK; + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl); + + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags), compute_eflags()); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rsp), ESP); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rax), EAX); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]); + stb_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK); + + /* Reload the host state from vm_hsave */ + env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK); + env->hflags &= ~HF_SVMI_MASK; + env->intercept = 0; + env->intercept_exceptions = 0; + env->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + env->tsc_offset = 0; + + env->gdt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.base)); + env->gdt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit)); + + env->idt.base = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.base)); + env->idt.limit = ldl_phys(env->vm_hsave + offsetof(struct vmcb, save.idtr.limit)); + + cpu_x86_update_cr0(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK); + cpu_x86_update_cr4(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr4))); + cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3))); + /* we need to set the efer after the crs so the hidden flags get + set properly */ + cpu_load_efer(env, + ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer))); + env->eflags = 0; + load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)), + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + CC_OP = CC_OP_EFLAGS; + + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.es), + env, R_ES); + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.cs), + env, R_CS); + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ss), + env, R_SS); + svm_load_seg_cache(env->vm_hsave + offsetof(struct vmcb, save.ds), + env, R_DS); + + EIP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip)); + ESP = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rsp)); + EAX = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rax)); + + env->dr[6] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr6)); + env->dr[7] = ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.dr7)); + + /* other setups */ + cpu_x86_set_cpl(env, 0); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code); + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1); + + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info), + ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj))); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err), + ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err))); + stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), 0); + + env->hflags2 &= ~HF2_GIF_MASK; + /* FIXME: Resets the current ASID register to zero (host ASID). */ + + /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ + + /* Clears the TSC_OFFSET inside the processor. */ + + /* If the host is in PAE mode, the processor reloads the host's PDPEs + from the page table indicated the host's CR3. If the PDPEs contain + illegal state, the processor causes a shutdown. */ + + /* Forces CR0.PE = 1, RFLAGS.VM = 0. */ + env->cr[0] |= CR0_PE_MASK; + env->eflags &= ~VM_MASK; + + /* Disables all breakpoints in the host DR7 register. */ + + /* Checks the reloaded host state for consistency. */ + + /* If the host's rIP reloaded by #VMEXIT is outside the limit of the + host's code segment or non-canonical (in the case of long mode), a + #GP fault is delivered inside the host.) */ + + /* remove any pending exception */ + env->exception_index = -1; + env->error_code = 0; + env->old_exception = -1; + + cpu_loop_exit(); +} + +#endif + +/* MMX/SSE */ +/* XXX: optimize by storing fptt and fptags in the static cpu state */ +void helper_enter_mmx(void) +{ + env->fpstt = 0; + *(uint32_t *)(env->fptags) = 0; + *(uint32_t *)(env->fptags + 4) = 0; +} + +void helper_emms(void) +{ + /* set to empty state */ + *(uint32_t *)(env->fptags) = 0x01010101; + *(uint32_t *)(env->fptags + 4) = 0x01010101; +} + +/* XXX: suppress */ +void helper_movq(void *d, void *s) +{ + *(uint64_t *)d = *(uint64_t *)s; +} + +#define SHIFT 0 +#include "ops_sse.h" + +#define SHIFT 1 +#include "ops_sse.h" + +#define SHIFT 0 +#include "helper_template.h" +#undef SHIFT + +#define SHIFT 1 +#include "helper_template.h" +#undef SHIFT + +#define SHIFT 2 +#include "helper_template.h" +#undef SHIFT + +#ifdef TARGET_X86_64 + +#define SHIFT 3 +#include "helper_template.h" +#undef SHIFT + +#endif + +/* bit operations */ +target_ulong helper_bsf(target_ulong t0) +{ + int count; + target_ulong res; + + res = t0; + count = 0; + while ((res & 1) == 0) { + count++; + res >>= 1; + } + return count; +} + +target_ulong helper_lzcnt(target_ulong t0, int wordsize) +{ + int count; + target_ulong res, mask; + + if (wordsize > 0 && t0 == 0) { + return wordsize; + } + res = t0; + count = TARGET_LONG_BITS - 1; + mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); + while ((res & mask) == 0) { + count--; + res <<= 1; + } + if (wordsize > 0) { + return wordsize - 1 - count; + } + return count; +} + +target_ulong helper_bsr(target_ulong t0) +{ + return helper_lzcnt(t0, 0); +} + +static int compute_all_eflags(void) +{ + return CC_SRC; +} + +static int compute_c_eflags(void) +{ + return CC_SRC & CC_C; +} + +uint32_t helper_cc_compute_all(int op) +{ + switch (op) { + default: /* should never happen */ return 0; + + case CC_OP_EFLAGS: return compute_all_eflags(); + + case CC_OP_MULB: return compute_all_mulb(); + case CC_OP_MULW: return compute_all_mulw(); + case CC_OP_MULL: return compute_all_mull(); + + case CC_OP_ADDB: return compute_all_addb(); + case CC_OP_ADDW: return compute_all_addw(); + case CC_OP_ADDL: return compute_all_addl(); + + case CC_OP_ADCB: return compute_all_adcb(); + case CC_OP_ADCW: return compute_all_adcw(); + case CC_OP_ADCL: return compute_all_adcl(); + + case CC_OP_SUBB: return compute_all_subb(); + case CC_OP_SUBW: return compute_all_subw(); + case CC_OP_SUBL: return compute_all_subl(); + + case CC_OP_SBBB: return compute_all_sbbb(); + case CC_OP_SBBW: return compute_all_sbbw(); + case CC_OP_SBBL: return compute_all_sbbl(); + + case CC_OP_LOGICB: return compute_all_logicb(); + case CC_OP_LOGICW: return compute_all_logicw(); + case CC_OP_LOGICL: return compute_all_logicl(); + + case CC_OP_INCB: return compute_all_incb(); + case CC_OP_INCW: return compute_all_incw(); + case CC_OP_INCL: return compute_all_incl(); + + case CC_OP_DECB: return compute_all_decb(); + case CC_OP_DECW: return compute_all_decw(); + case CC_OP_DECL: return compute_all_decl(); + + case CC_OP_SHLB: return compute_all_shlb(); + case CC_OP_SHLW: return compute_all_shlw(); + case CC_OP_SHLL: return compute_all_shll(); + + case CC_OP_SARB: return compute_all_sarb(); + case CC_OP_SARW: return compute_all_sarw(); + case CC_OP_SARL: return compute_all_sarl(); + +#ifdef TARGET_X86_64 + case CC_OP_MULQ: return compute_all_mulq(); + + case CC_OP_ADDQ: return compute_all_addq(); + + case CC_OP_ADCQ: return compute_all_adcq(); + + case CC_OP_SUBQ: return compute_all_subq(); + + case CC_OP_SBBQ: return compute_all_sbbq(); + + case CC_OP_LOGICQ: return compute_all_logicq(); + + case CC_OP_INCQ: return compute_all_incq(); + + case CC_OP_DECQ: return compute_all_decq(); + + case CC_OP_SHLQ: return compute_all_shlq(); + + case CC_OP_SARQ: return compute_all_sarq(); +#endif + } +} + +uint32_t helper_cc_compute_c(int op) +{ + switch (op) { + default: /* should never happen */ return 0; + + case CC_OP_EFLAGS: return compute_c_eflags(); + + case CC_OP_MULB: return compute_c_mull(); + case CC_OP_MULW: return compute_c_mull(); + case CC_OP_MULL: return compute_c_mull(); + + case CC_OP_ADDB: return compute_c_addb(); + case CC_OP_ADDW: return compute_c_addw(); + case CC_OP_ADDL: return compute_c_addl(); + + case CC_OP_ADCB: return compute_c_adcb(); + case CC_OP_ADCW: return compute_c_adcw(); + case CC_OP_ADCL: return compute_c_adcl(); + + case CC_OP_SUBB: return compute_c_subb(); + case CC_OP_SUBW: return compute_c_subw(); + case CC_OP_SUBL: return compute_c_subl(); + + case CC_OP_SBBB: return compute_c_sbbb(); + case CC_OP_SBBW: return compute_c_sbbw(); + case CC_OP_SBBL: return compute_c_sbbl(); + + case CC_OP_LOGICB: return compute_c_logicb(); + case CC_OP_LOGICW: return compute_c_logicw(); + case CC_OP_LOGICL: return compute_c_logicl(); + + case CC_OP_INCB: return compute_c_incl(); + case CC_OP_INCW: return compute_c_incl(); + case CC_OP_INCL: return compute_c_incl(); + + case CC_OP_DECB: return compute_c_incl(); + case CC_OP_DECW: return compute_c_incl(); + case CC_OP_DECL: return compute_c_incl(); + + case CC_OP_SHLB: return compute_c_shlb(); + case CC_OP_SHLW: return compute_c_shlw(); + case CC_OP_SHLL: return compute_c_shll(); + + case CC_OP_SARB: return compute_c_sarl(); + case CC_OP_SARW: return compute_c_sarl(); + case CC_OP_SARL: return compute_c_sarl(); + +#ifdef TARGET_X86_64 + case CC_OP_MULQ: return compute_c_mull(); + + case CC_OP_ADDQ: return compute_c_addq(); + + case CC_OP_ADCQ: return compute_c_adcq(); + + case CC_OP_SUBQ: return compute_c_subq(); + + case CC_OP_SBBQ: return compute_c_sbbq(); + + case CC_OP_LOGICQ: return compute_c_logicq(); + + case CC_OP_INCQ: return compute_c_incl(); + + case CC_OP_DECQ: return compute_c_incl(); + + case CC_OP_SHLQ: return compute_c_shlq(); + + case CC_OP_SARQ: return compute_c_sarl(); +#endif + } +} diff --git a/src/recompiler/target-i386/ops_sse.h b/src/recompiler/target-i386/ops_sse.h new file mode 100644 index 00000000..4c8b89e2 --- /dev/null +++ b/src/recompiler/target-i386/ops_sse.h @@ -0,0 +1,2111 @@ +/* + * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support + * + * Copyright (c) 2005 Fabrice Bellard + * Copyright (c) 2008 Intel Corporation <andrew.zaborowski@intel.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#if SHIFT == 0 +#define Reg MMXReg +#define XMM_ONLY(...) +#define B(n) MMX_B(n) +#define W(n) MMX_W(n) +#define L(n) MMX_L(n) +#define Q(n) q +#define SUFFIX _mmx +#else +#define Reg XMMReg +#define XMM_ONLY(...) __VA_ARGS__ +#define B(n) XMM_B(n) +#define W(n) XMM_W(n) +#define L(n) XMM_L(n) +#define Q(n) XMM_Q(n) +#define SUFFIX _xmm +#endif + +void glue(helper_psrlw, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 15) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->W(0) >>= shift; + d->W(1) >>= shift; + d->W(2) >>= shift; + d->W(3) >>= shift; +#if SHIFT == 1 + d->W(4) >>= shift; + d->W(5) >>= shift; + d->W(6) >>= shift; + d->W(7) >>= shift; +#endif + } +} + +void glue(helper_psraw, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 15) { + shift = 15; + } else { + shift = s->B(0); + } + d->W(0) = (int16_t)d->W(0) >> shift; + d->W(1) = (int16_t)d->W(1) >> shift; + d->W(2) = (int16_t)d->W(2) >> shift; + d->W(3) = (int16_t)d->W(3) >> shift; +#if SHIFT == 1 + d->W(4) = (int16_t)d->W(4) >> shift; + d->W(5) = (int16_t)d->W(5) >> shift; + d->W(6) = (int16_t)d->W(6) >> shift; + d->W(7) = (int16_t)d->W(7) >> shift; +#endif +} + +void glue(helper_psllw, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 15) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->W(0) <<= shift; + d->W(1) <<= shift; + d->W(2) <<= shift; + d->W(3) <<= shift; +#if SHIFT == 1 + d->W(4) <<= shift; + d->W(5) <<= shift; + d->W(6) <<= shift; + d->W(7) <<= shift; +#endif + } +} + +void glue(helper_psrld, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 31) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->L(0) >>= shift; + d->L(1) >>= shift; +#if SHIFT == 1 + d->L(2) >>= shift; + d->L(3) >>= shift; +#endif + } +} + +void glue(helper_psrad, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 31) { + shift = 31; + } else { + shift = s->B(0); + } + d->L(0) = (int32_t)d->L(0) >> shift; + d->L(1) = (int32_t)d->L(1) >> shift; +#if SHIFT == 1 + d->L(2) = (int32_t)d->L(2) >> shift; + d->L(3) = (int32_t)d->L(3) >> shift; +#endif +} + +void glue(helper_pslld, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 31) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->L(0) <<= shift; + d->L(1) <<= shift; +#if SHIFT == 1 + d->L(2) <<= shift; + d->L(3) <<= shift; +#endif + } +} + +void glue(helper_psrlq, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 63) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->Q(0) >>= shift; +#if SHIFT == 1 + d->Q(1) >>= shift; +#endif + } +} + +void glue(helper_psllq, SUFFIX)(Reg *d, Reg *s) +{ + int shift; + + if (s->Q(0) > 63) { + d->Q(0) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif + } else { + shift = s->B(0); + d->Q(0) <<= shift; +#if SHIFT == 1 + d->Q(1) <<= shift; +#endif + } +} + +#if SHIFT == 1 +void glue(helper_psrldq, SUFFIX)(Reg *d, Reg *s) +{ + int shift, i; + + shift = s->L(0); + if (shift > 16) + shift = 16; + for(i = 0; i < 16 - shift; i++) + d->B(i) = d->B(i + shift); + for(i = 16 - shift; i < 16; i++) + d->B(i) = 0; +} + +void glue(helper_pslldq, SUFFIX)(Reg *d, Reg *s) +{ + int shift, i; + + shift = s->L(0); + if (shift > 16) + shift = 16; + for(i = 15; i >= shift; i--) + d->B(i) = d->B(i - shift); + for(i = 0; i < shift; i++) + d->B(i) = 0; +} +#endif + +#define SSE_HELPER_B(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->B(0) = F(d->B(0), s->B(0));\ + d->B(1) = F(d->B(1), s->B(1));\ + d->B(2) = F(d->B(2), s->B(2));\ + d->B(3) = F(d->B(3), s->B(3));\ + d->B(4) = F(d->B(4), s->B(4));\ + d->B(5) = F(d->B(5), s->B(5));\ + d->B(6) = F(d->B(6), s->B(6));\ + d->B(7) = F(d->B(7), s->B(7));\ + XMM_ONLY(\ + d->B(8) = F(d->B(8), s->B(8));\ + d->B(9) = F(d->B(9), s->B(9));\ + d->B(10) = F(d->B(10), s->B(10));\ + d->B(11) = F(d->B(11), s->B(11));\ + d->B(12) = F(d->B(12), s->B(12));\ + d->B(13) = F(d->B(13), s->B(13));\ + d->B(14) = F(d->B(14), s->B(14));\ + d->B(15) = F(d->B(15), s->B(15));\ + )\ +} + +#define SSE_HELPER_W(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->W(0) = F(d->W(0), s->W(0));\ + d->W(1) = F(d->W(1), s->W(1));\ + d->W(2) = F(d->W(2), s->W(2));\ + d->W(3) = F(d->W(3), s->W(3));\ + XMM_ONLY(\ + d->W(4) = F(d->W(4), s->W(4));\ + d->W(5) = F(d->W(5), s->W(5));\ + d->W(6) = F(d->W(6), s->W(6));\ + d->W(7) = F(d->W(7), s->W(7));\ + )\ +} + +#define SSE_HELPER_L(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->L(0) = F(d->L(0), s->L(0));\ + d->L(1) = F(d->L(1), s->L(1));\ + XMM_ONLY(\ + d->L(2) = F(d->L(2), s->L(2));\ + d->L(3) = F(d->L(3), s->L(3));\ + )\ +} + +#define SSE_HELPER_Q(name, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->Q(0) = F(d->Q(0), s->Q(0));\ + XMM_ONLY(\ + d->Q(1) = F(d->Q(1), s->Q(1));\ + )\ +} + +#if SHIFT == 0 +static inline int satub(int x) +{ + if (x < 0) + return 0; + else if (x > 255) + return 255; + else + return x; +} + +static inline int satuw(int x) +{ + if (x < 0) + return 0; + else if (x > 65535) + return 65535; + else + return x; +} + +static inline int satsb(int x) +{ + if (x < -128) + return -128; + else if (x > 127) + return 127; + else + return x; +} + +static inline int satsw(int x) +{ + if (x < -32768) + return -32768; + else if (x > 32767) + return 32767; + else + return x; +} + +#define FADD(a, b) ((a) + (b)) +#define FADDUB(a, b) satub((a) + (b)) +#define FADDUW(a, b) satuw((a) + (b)) +#define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b)) +#define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b)) + +#define FSUB(a, b) ((a) - (b)) +#define FSUBUB(a, b) satub((a) - (b)) +#define FSUBUW(a, b) satuw((a) - (b)) +#define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b)) +#define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b)) +#define FMINUB(a, b) ((a) < (b)) ? (a) : (b) +#define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b) +#define FMAXUB(a, b) ((a) > (b)) ? (a) : (b) +#define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b) + +#define FAND(a, b) (a) & (b) +#define FANDN(a, b) ((~(a)) & (b)) +#define FOR(a, b) (a) | (b) +#define FXOR(a, b) (a) ^ (b) + +#define FCMPGTB(a, b) (int8_t)(a) > (int8_t)(b) ? -1 : 0 +#define FCMPGTW(a, b) (int16_t)(a) > (int16_t)(b) ? -1 : 0 +#define FCMPGTL(a, b) (int32_t)(a) > (int32_t)(b) ? -1 : 0 +#define FCMPEQ(a, b) (a) == (b) ? -1 : 0 + +#define FMULLW(a, b) (a) * (b) +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16 +#define FMULHUW(a, b) (a) * (b) >> 16 +#define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 + +#define FAVG(a, b) ((a) + (b) + 1) >> 1 +#endif + +SSE_HELPER_B(helper_paddb, FADD) +SSE_HELPER_W(helper_paddw, FADD) +SSE_HELPER_L(helper_paddl, FADD) +SSE_HELPER_Q(helper_paddq, FADD) + +SSE_HELPER_B(helper_psubb, FSUB) +SSE_HELPER_W(helper_psubw, FSUB) +SSE_HELPER_L(helper_psubl, FSUB) +SSE_HELPER_Q(helper_psubq, FSUB) + +SSE_HELPER_B(helper_paddusb, FADDUB) +SSE_HELPER_B(helper_paddsb, FADDSB) +SSE_HELPER_B(helper_psubusb, FSUBUB) +SSE_HELPER_B(helper_psubsb, FSUBSB) + +SSE_HELPER_W(helper_paddusw, FADDUW) +SSE_HELPER_W(helper_paddsw, FADDSW) +SSE_HELPER_W(helper_psubusw, FSUBUW) +SSE_HELPER_W(helper_psubsw, FSUBSW) + +SSE_HELPER_B(helper_pminub, FMINUB) +SSE_HELPER_B(helper_pmaxub, FMAXUB) + +SSE_HELPER_W(helper_pminsw, FMINSW) +SSE_HELPER_W(helper_pmaxsw, FMAXSW) + +SSE_HELPER_Q(helper_pand, FAND) +SSE_HELPER_Q(helper_pandn, FANDN) +SSE_HELPER_Q(helper_por, FOR) +SSE_HELPER_Q(helper_pxor, FXOR) + +SSE_HELPER_B(helper_pcmpgtb, FCMPGTB) +SSE_HELPER_W(helper_pcmpgtw, FCMPGTW) +SSE_HELPER_L(helper_pcmpgtl, FCMPGTL) + +SSE_HELPER_B(helper_pcmpeqb, FCMPEQ) +SSE_HELPER_W(helper_pcmpeqw, FCMPEQ) +SSE_HELPER_L(helper_pcmpeql, FCMPEQ) + +SSE_HELPER_W(helper_pmullw, FMULLW) +#if SHIFT == 0 +SSE_HELPER_W(helper_pmulhrw, FMULHRW) +#endif +SSE_HELPER_W(helper_pmulhuw, FMULHUW) +SSE_HELPER_W(helper_pmulhw, FMULHW) + +SSE_HELPER_B(helper_pavgb, FAVG) +SSE_HELPER_W(helper_pavgw, FAVG) + +void glue(helper_pmuludq, SUFFIX) (Reg *d, Reg *s) +{ + d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0); +#if SHIFT == 1 + d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2); +#endif +} + +void glue(helper_pmaddwd, SUFFIX) (Reg *d, Reg *s) +{ + int i; + + for(i = 0; i < (2 << SHIFT); i++) { + d->L(i) = (int16_t)s->W(2*i) * (int16_t)d->W(2*i) + + (int16_t)s->W(2*i+1) * (int16_t)d->W(2*i+1); + } +} + +#if SHIFT == 0 +static inline int abs1(int a) +{ + if (a < 0) + return -a; + else + return a; +} +#endif +void glue(helper_psadbw, SUFFIX) (Reg *d, Reg *s) +{ + unsigned int val; + + val = 0; + val += abs1(d->B(0) - s->B(0)); + val += abs1(d->B(1) - s->B(1)); + val += abs1(d->B(2) - s->B(2)); + val += abs1(d->B(3) - s->B(3)); + val += abs1(d->B(4) - s->B(4)); + val += abs1(d->B(5) - s->B(5)); + val += abs1(d->B(6) - s->B(6)); + val += abs1(d->B(7) - s->B(7)); + d->Q(0) = val; +#if SHIFT == 1 + val = 0; + val += abs1(d->B(8) - s->B(8)); + val += abs1(d->B(9) - s->B(9)); + val += abs1(d->B(10) - s->B(10)); + val += abs1(d->B(11) - s->B(11)); + val += abs1(d->B(12) - s->B(12)); + val += abs1(d->B(13) - s->B(13)); + val += abs1(d->B(14) - s->B(14)); + val += abs1(d->B(15) - s->B(15)); + d->Q(1) = val; +#endif +} + +void glue(helper_maskmov, SUFFIX) (Reg *d, Reg *s, target_ulong a0) +{ + int i; + for(i = 0; i < (8 << SHIFT); i++) { + if (s->B(i) & 0x80) + stb(a0 + i, d->B(i)); + } +} + +void glue(helper_movl_mm_T0, SUFFIX) (Reg *d, uint32_t val) +{ + d->L(0) = val; + d->L(1) = 0; +#if SHIFT == 1 + d->Q(1) = 0; +#endif +} + +#ifdef TARGET_X86_64 +void glue(helper_movq_mm_T0, SUFFIX) (Reg *d, uint64_t val) +{ + d->Q(0) = val; +#if SHIFT == 1 + d->Q(1) = 0; +#endif +} +#endif + +#if SHIFT == 0 +void glue(helper_pshufw, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.W(0) = s->W(order & 3); + r.W(1) = s->W((order >> 2) & 3); + r.W(2) = s->W((order >> 4) & 3); + r.W(3) = s->W((order >> 6) & 3); + *d = r; +} +#else +void helper_shufps(Reg *d, Reg *s, int order) +{ + Reg r; + r.L(0) = d->L(order & 3); + r.L(1) = d->L((order >> 2) & 3); + r.L(2) = s->L((order >> 4) & 3); + r.L(3) = s->L((order >> 6) & 3); + *d = r; +} + +void helper_shufpd(Reg *d, Reg *s, int order) +{ + Reg r; + r.Q(0) = d->Q(order & 1); + r.Q(1) = s->Q((order >> 1) & 1); + *d = r; +} + +void glue(helper_pshufd, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.L(0) = s->L(order & 3); + r.L(1) = s->L((order >> 2) & 3); + r.L(2) = s->L((order >> 4) & 3); + r.L(3) = s->L((order >> 6) & 3); + *d = r; +} + +void glue(helper_pshuflw, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.W(0) = s->W(order & 3); + r.W(1) = s->W((order >> 2) & 3); + r.W(2) = s->W((order >> 4) & 3); + r.W(3) = s->W((order >> 6) & 3); + r.Q(1) = s->Q(1); + *d = r; +} + +void glue(helper_pshufhw, SUFFIX) (Reg *d, Reg *s, int order) +{ + Reg r; + r.Q(0) = s->Q(0); + r.W(4) = s->W(4 + (order & 3)); + r.W(5) = s->W(4 + ((order >> 2) & 3)); + r.W(6) = s->W(4 + ((order >> 4) & 3)); + r.W(7) = s->W(4 + ((order >> 6) & 3)); + *d = r; +} +#endif + +#if SHIFT == 1 +/* FPU ops */ +/* XXX: not accurate */ + +#define SSE_HELPER_S(name, F)\ +void helper_ ## name ## ps (Reg *d, Reg *s)\ +{\ + d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ + d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ + d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ + d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3));\ +}\ +\ +void helper_ ## name ## ss (Reg *d, Reg *s)\ +{\ + d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ +}\ +void helper_ ## name ## pd (Reg *d, Reg *s)\ +{\ + d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ + d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ +}\ +\ +void helper_ ## name ## sd (Reg *d, Reg *s)\ +{\ + d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ +} + +#define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status) +#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status) +#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status) +#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status) +#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b) +#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b) +#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status) + +SSE_HELPER_S(add, FPU_ADD) +SSE_HELPER_S(sub, FPU_SUB) +SSE_HELPER_S(mul, FPU_MUL) +SSE_HELPER_S(div, FPU_DIV) +SSE_HELPER_S(min, FPU_MIN) +SSE_HELPER_S(max, FPU_MAX) +SSE_HELPER_S(sqrt, FPU_SQRT) + + +/* float to float conversions */ +void helper_cvtps2pd(Reg *d, Reg *s) +{ + float32 s0, s1; + s0 = s->XMM_S(0); + s1 = s->XMM_S(1); + d->XMM_D(0) = float32_to_float64(s0, &env->sse_status); + d->XMM_D(1) = float32_to_float64(s1, &env->sse_status); +} + +void helper_cvtpd2ps(Reg *d, Reg *s) +{ + d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); + d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status); + d->Q(1) = 0; +} + +void helper_cvtss2sd(Reg *d, Reg *s) +{ + d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status); +} + +void helper_cvtsd2ss(Reg *d, Reg *s) +{ + d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); +} + +/* integer to float */ +void helper_cvtdq2ps(Reg *d, Reg *s) +{ + d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status); + d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status); + d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status); + d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status); +} + +void helper_cvtdq2pd(Reg *d, Reg *s) +{ + int32_t l0, l1; + l0 = (int32_t)s->XMM_L(0); + l1 = (int32_t)s->XMM_L(1); + d->XMM_D(0) = int32_to_float64(l0, &env->sse_status); + d->XMM_D(1) = int32_to_float64(l1, &env->sse_status); +} + +void helper_cvtpi2ps(XMMReg *d, MMXReg *s) +{ + d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); + d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status); +} + +void helper_cvtpi2pd(XMMReg *d, MMXReg *s) +{ + d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); + d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status); +} + +void helper_cvtsi2ss(XMMReg *d, uint32_t val) +{ + d->XMM_S(0) = int32_to_float32(val, &env->sse_status); +} + +void helper_cvtsi2sd(XMMReg *d, uint32_t val) +{ + d->XMM_D(0) = int32_to_float64(val, &env->sse_status); +} + +#ifdef TARGET_X86_64 +void helper_cvtsq2ss(XMMReg *d, uint64_t val) +{ + d->XMM_S(0) = int64_to_float32(val, &env->sse_status); +} + +void helper_cvtsq2sd(XMMReg *d, uint64_t val) +{ + d->XMM_D(0) = int64_to_float64(val, &env->sse_status); +} +#endif + +/* float to integer */ +void helper_cvtps2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); + d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); + d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status); + d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status); +} + +void helper_cvtpd2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); + d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); + d->XMM_Q(1) = 0; +} + +void helper_cvtps2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); + d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); +} + +void helper_cvtpd2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); + d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); +} + +int32_t helper_cvtss2si(XMMReg *s) +{ + return float32_to_int32(s->XMM_S(0), &env->sse_status); +} + +int32_t helper_cvtsd2si(XMMReg *s) +{ + return float64_to_int32(s->XMM_D(0), &env->sse_status); +} + +#ifdef TARGET_X86_64 +int64_t helper_cvtss2sq(XMMReg *s) +{ + return float32_to_int64(s->XMM_S(0), &env->sse_status); +} + +int64_t helper_cvtsd2sq(XMMReg *s) +{ + return float64_to_int64(s->XMM_D(0), &env->sse_status); +} +#endif + +/* float to integer truncated */ +void helper_cvttps2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); + d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); + d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status); + d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status); +} + +void helper_cvttpd2dq(XMMReg *d, XMMReg *s) +{ + d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); + d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); + d->XMM_Q(1) = 0; +} + +void helper_cvttps2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); + d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); +} + +void helper_cvttpd2pi(MMXReg *d, XMMReg *s) +{ + d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); + d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); +} + +int32_t helper_cvttss2si(XMMReg *s) +{ + return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); +} + +int32_t helper_cvttsd2si(XMMReg *s) +{ + return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); +} + +#ifdef TARGET_X86_64 +int64_t helper_cvttss2sq(XMMReg *s) +{ + return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status); +} + +int64_t helper_cvttsd2sq(XMMReg *s) +{ + return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status); +} +#endif + +void helper_rsqrtps(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); + d->XMM_S(1) = approx_rsqrt(s->XMM_S(1)); + d->XMM_S(2) = approx_rsqrt(s->XMM_S(2)); + d->XMM_S(3) = approx_rsqrt(s->XMM_S(3)); +} + +void helper_rsqrtss(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rsqrt(s->XMM_S(0)); +} + +void helper_rcpps(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rcp(s->XMM_S(0)); + d->XMM_S(1) = approx_rcp(s->XMM_S(1)); + d->XMM_S(2) = approx_rcp(s->XMM_S(2)); + d->XMM_S(3) = approx_rcp(s->XMM_S(3)); +} + +void helper_rcpss(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = approx_rcp(s->XMM_S(0)); +} + +static inline uint64_t helper_extrq(uint64_t src, int shift, int len) +{ + uint64_t mask; + + if (len == 0) { + mask = ~0LL; + } else { + mask = (1ULL << len) - 1; + } + return (src >> shift) & mask; +} + +void helper_extrq_r(XMMReg *d, XMMReg *s) +{ + d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), s->XMM_B(1), s->XMM_B(0)); +} + +void helper_extrq_i(XMMReg *d, int index, int length) +{ + d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), index, length); +} + +static inline uint64_t helper_insertq(uint64_t src, int shift, int len) +{ + uint64_t mask; + + if (len == 0) { + mask = ~0ULL; + } else { + mask = (1ULL << len) - 1; + } + return (src & ~(mask << shift)) | ((src & mask) << shift); +} + +void helper_insertq_r(XMMReg *d, XMMReg *s) +{ + d->XMM_Q(0) = helper_insertq(s->XMM_Q(0), s->XMM_B(9), s->XMM_B(8)); +} + +void helper_insertq_i(XMMReg *d, int index, int length) +{ + d->XMM_Q(0) = helper_insertq(d->XMM_Q(0), index, length); +} + +void helper_haddps(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_S(0) = d->XMM_S(0) + d->XMM_S(1); + r.XMM_S(1) = d->XMM_S(2) + d->XMM_S(3); + r.XMM_S(2) = s->XMM_S(0) + s->XMM_S(1); + r.XMM_S(3) = s->XMM_S(2) + s->XMM_S(3); + *d = r; +} + +void helper_haddpd(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_D(0) = d->XMM_D(0) + d->XMM_D(1); + r.XMM_D(1) = s->XMM_D(0) + s->XMM_D(1); + *d = r; +} + +void helper_hsubps(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_S(0) = d->XMM_S(0) - d->XMM_S(1); + r.XMM_S(1) = d->XMM_S(2) - d->XMM_S(3); + r.XMM_S(2) = s->XMM_S(0) - s->XMM_S(1); + r.XMM_S(3) = s->XMM_S(2) - s->XMM_S(3); + *d = r; +} + +void helper_hsubpd(XMMReg *d, XMMReg *s) +{ + XMMReg r; + r.XMM_D(0) = d->XMM_D(0) - d->XMM_D(1); + r.XMM_D(1) = s->XMM_D(0) - s->XMM_D(1); + *d = r; +} + +void helper_addsubps(XMMReg *d, XMMReg *s) +{ + d->XMM_S(0) = d->XMM_S(0) - s->XMM_S(0); + d->XMM_S(1) = d->XMM_S(1) + s->XMM_S(1); + d->XMM_S(2) = d->XMM_S(2) - s->XMM_S(2); + d->XMM_S(3) = d->XMM_S(3) + s->XMM_S(3); +} + +void helper_addsubpd(XMMReg *d, XMMReg *s) +{ + d->XMM_D(0) = d->XMM_D(0) - s->XMM_D(0); + d->XMM_D(1) = d->XMM_D(1) + s->XMM_D(1); +} + +/* XXX: unordered */ +#define SSE_HELPER_CMP(name, F)\ +void helper_ ## name ## ps (Reg *d, Reg *s)\ +{\ + d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ + d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1));\ + d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2));\ + d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3));\ +}\ +\ +void helper_ ## name ## ss (Reg *d, Reg *s)\ +{\ + d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0));\ +}\ +void helper_ ## name ## pd (Reg *d, Reg *s)\ +{\ + d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ + d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1));\ +}\ +\ +void helper_ ## name ## sd (Reg *d, Reg *s)\ +{\ + d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\ +} + +#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0 +#define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0 +#define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0 +#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0 +#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1 +#define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1 +#define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1 +#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1 + +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) +SSE_HELPER_CMP(cmplt, FPU_CMPLT) +SSE_HELPER_CMP(cmple, FPU_CMPLE) +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) +SSE_HELPER_CMP(cmpord, FPU_CMPORD) + +static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; + +void helper_ucomiss(Reg *d, Reg *s) +{ + int ret; + float32 s0, s1; + + s0 = d->XMM_S(0); + s1 = s->XMM_S(0); + ret = float32_compare_quiet(s0, s1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +void helper_comiss(Reg *d, Reg *s) +{ + int ret; + float32 s0, s1; + + s0 = d->XMM_S(0); + s1 = s->XMM_S(0); + ret = float32_compare(s0, s1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +void helper_ucomisd(Reg *d, Reg *s) +{ + int ret; + float64 d0, d1; + + d0 = d->XMM_D(0); + d1 = s->XMM_D(0); + ret = float64_compare_quiet(d0, d1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +void helper_comisd(Reg *d, Reg *s) +{ + int ret; + float64 d0, d1; + + d0 = d->XMM_D(0); + d1 = s->XMM_D(0); + ret = float64_compare(d0, d1, &env->sse_status); + CC_SRC = comis_eflags[ret + 1]; +} + +uint32_t helper_movmskps(Reg *s) +{ + int b0, b1, b2, b3; + b0 = s->XMM_L(0) >> 31; + b1 = s->XMM_L(1) >> 31; + b2 = s->XMM_L(2) >> 31; + b3 = s->XMM_L(3) >> 31; + return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3); +} + +uint32_t helper_movmskpd(Reg *s) +{ + int b0, b1; + b0 = s->XMM_L(1) >> 31; + b1 = s->XMM_L(3) >> 31; + return b0 | (b1 << 1); +} + +#endif + +uint32_t glue(helper_pmovmskb, SUFFIX)(Reg *s) +{ + uint32_t val; + val = 0; + val |= (s->B(0) >> 7); + val |= (s->B(1) >> 6) & 0x02; + val |= (s->B(2) >> 5) & 0x04; + val |= (s->B(3) >> 4) & 0x08; + val |= (s->B(4) >> 3) & 0x10; + val |= (s->B(5) >> 2) & 0x20; + val |= (s->B(6) >> 1) & 0x40; + val |= (s->B(7)) & 0x80; +#if SHIFT == 1 + val |= (s->B(8) << 1) & 0x0100; + val |= (s->B(9) << 2) & 0x0200; + val |= (s->B(10) << 3) & 0x0400; + val |= (s->B(11) << 4) & 0x0800; + val |= (s->B(12) << 5) & 0x1000; + val |= (s->B(13) << 6) & 0x2000; + val |= (s->B(14) << 7) & 0x4000; + val |= (s->B(15) << 8) & 0x8000; +#endif + return val; +} + +void glue(helper_packsswb, SUFFIX) (Reg *d, Reg *s) +{ + Reg r; + + r.B(0) = satsb((int16_t)d->W(0)); + r.B(1) = satsb((int16_t)d->W(1)); + r.B(2) = satsb((int16_t)d->W(2)); + r.B(3) = satsb((int16_t)d->W(3)); +#if SHIFT == 1 + r.B(4) = satsb((int16_t)d->W(4)); + r.B(5) = satsb((int16_t)d->W(5)); + r.B(6) = satsb((int16_t)d->W(6)); + r.B(7) = satsb((int16_t)d->W(7)); +#endif + r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0)); + r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1)); + r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2)); + r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3)); +#if SHIFT == 1 + r.B(12) = satsb((int16_t)s->W(4)); + r.B(13) = satsb((int16_t)s->W(5)); + r.B(14) = satsb((int16_t)s->W(6)); + r.B(15) = satsb((int16_t)s->W(7)); +#endif + *d = r; +} + +void glue(helper_packuswb, SUFFIX) (Reg *d, Reg *s) +{ + Reg r; + + r.B(0) = satub((int16_t)d->W(0)); + r.B(1) = satub((int16_t)d->W(1)); + r.B(2) = satub((int16_t)d->W(2)); + r.B(3) = satub((int16_t)d->W(3)); +#if SHIFT == 1 + r.B(4) = satub((int16_t)d->W(4)); + r.B(5) = satub((int16_t)d->W(5)); + r.B(6) = satub((int16_t)d->W(6)); + r.B(7) = satub((int16_t)d->W(7)); +#endif + r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0)); + r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1)); + r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2)); + r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3)); +#if SHIFT == 1 + r.B(12) = satub((int16_t)s->W(4)); + r.B(13) = satub((int16_t)s->W(5)); + r.B(14) = satub((int16_t)s->W(6)); + r.B(15) = satub((int16_t)s->W(7)); +#endif + *d = r; +} + +void glue(helper_packssdw, SUFFIX) (Reg *d, Reg *s) +{ + Reg r; + + r.W(0) = satsw(d->L(0)); + r.W(1) = satsw(d->L(1)); +#if SHIFT == 1 + r.W(2) = satsw(d->L(2)); + r.W(3) = satsw(d->L(3)); +#endif + r.W((2 << SHIFT) + 0) = satsw(s->L(0)); + r.W((2 << SHIFT) + 1) = satsw(s->L(1)); +#if SHIFT == 1 + r.W(6) = satsw(s->L(2)); + r.W(7) = satsw(s->L(3)); +#endif + *d = r; +} + +#define UNPCK_OP(base_name, base) \ + \ +void glue(helper_punpck ## base_name ## bw, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.B(0) = d->B((base << (SHIFT + 2)) + 0); \ + r.B(1) = s->B((base << (SHIFT + 2)) + 0); \ + r.B(2) = d->B((base << (SHIFT + 2)) + 1); \ + r.B(3) = s->B((base << (SHIFT + 2)) + 1); \ + r.B(4) = d->B((base << (SHIFT + 2)) + 2); \ + r.B(5) = s->B((base << (SHIFT + 2)) + 2); \ + r.B(6) = d->B((base << (SHIFT + 2)) + 3); \ + r.B(7) = s->B((base << (SHIFT + 2)) + 3); \ +XMM_ONLY( \ + r.B(8) = d->B((base << (SHIFT + 2)) + 4); \ + r.B(9) = s->B((base << (SHIFT + 2)) + 4); \ + r.B(10) = d->B((base << (SHIFT + 2)) + 5); \ + r.B(11) = s->B((base << (SHIFT + 2)) + 5); \ + r.B(12) = d->B((base << (SHIFT + 2)) + 6); \ + r.B(13) = s->B((base << (SHIFT + 2)) + 6); \ + r.B(14) = d->B((base << (SHIFT + 2)) + 7); \ + r.B(15) = s->B((base << (SHIFT + 2)) + 7); \ +) \ + *d = r; \ +} \ + \ +void glue(helper_punpck ## base_name ## wd, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.W(0) = d->W((base << (SHIFT + 1)) + 0); \ + r.W(1) = s->W((base << (SHIFT + 1)) + 0); \ + r.W(2) = d->W((base << (SHIFT + 1)) + 1); \ + r.W(3) = s->W((base << (SHIFT + 1)) + 1); \ +XMM_ONLY( \ + r.W(4) = d->W((base << (SHIFT + 1)) + 2); \ + r.W(5) = s->W((base << (SHIFT + 1)) + 2); \ + r.W(6) = d->W((base << (SHIFT + 1)) + 3); \ + r.W(7) = s->W((base << (SHIFT + 1)) + 3); \ +) \ + *d = r; \ +} \ + \ +void glue(helper_punpck ## base_name ## dq, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.L(0) = d->L((base << SHIFT) + 0); \ + r.L(1) = s->L((base << SHIFT) + 0); \ +XMM_ONLY( \ + r.L(2) = d->L((base << SHIFT) + 1); \ + r.L(3) = s->L((base << SHIFT) + 1); \ +) \ + *d = r; \ +} \ + \ +XMM_ONLY( \ +void glue(helper_punpck ## base_name ## qdq, SUFFIX) (Reg *d, Reg *s) \ +{ \ + Reg r; \ + \ + r.Q(0) = d->Q(base); \ + r.Q(1) = s->Q(base); \ + *d = r; \ +} \ +) + +UNPCK_OP(l, 0) +UNPCK_OP(h, 1) + +/* 3DNow! float ops */ +#if SHIFT == 0 +void helper_pi2fd(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); +} + +void helper_pi2fw(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); +} + +void helper_pf2id(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); + d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); +} + +void helper_pf2iw(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); + d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); +} + +void helper_pfacc(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void helper_pfadd(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void helper_pfcmpeq(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void helper_pfcmpge(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void helper_pfcmpgt(MMXReg *d, MMXReg *s) +{ + d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void helper_pfmax(MMXReg *d, MMXReg *s) +{ + if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void helper_pfmin(MMXReg *d, MMXReg *s) +{ + if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void helper_pfmul(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void helper_pfnacc(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void helper_pfpnacc(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void helper_pfrcp(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = approx_rcp(s->MMX_S(0)); + d->MMX_S(1) = d->MMX_S(0); +} + +void helper_pfrsqrt(MMXReg *d, MMXReg *s) +{ + d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; + d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); + d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; + d->MMX_L(0) = d->MMX_L(1); +} + +void helper_pfsub(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void helper_pfsubr(MMXReg *d, MMXReg *s) +{ + d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); +} + +void helper_pswapd(MMXReg *d, MMXReg *s) +{ + MMXReg r; + r.MMX_L(0) = s->MMX_L(1); + r.MMX_L(1) = s->MMX_L(0); + *d = r; +} +#endif + +/* SSSE3 op helpers */ +void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s) +{ + int i; + Reg r; + + for (i = 0; i < (8 << SHIFT); i++) + r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); + + *d = r; +} + +void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); + d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); + XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); + XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); + d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); + d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); + XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); + XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); +} + +void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s) +{ + d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); + XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); + d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); + XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); +} + +void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); + d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); + XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); + XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); + d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); + d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); + XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); + XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); +} + +void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) + + (int8_t)s->B( 1) * (uint8_t)d->B( 1)); + d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) + + (int8_t)s->B( 3) * (uint8_t)d->B( 3)); + d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) + + (int8_t)s->B( 5) * (uint8_t)d->B( 5)); + d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) + + (int8_t)s->B( 7) * (uint8_t)d->B( 7)); +#if SHIFT == 1 + d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) + + (int8_t)s->B( 9) * (uint8_t)d->B( 9)); + d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) + + (int8_t)s->B(11) * (uint8_t)d->B(11)); + d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) + + (int8_t)s->B(13) * (uint8_t)d->B(13)); + d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) + + (int8_t)s->B(15) * (uint8_t)d->B(15)); +#endif +} + +void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1); + d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3); + XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5)); + XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7)); + d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1); + d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3); + XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5)); + XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7)); +} + +void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s) +{ + d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1); + XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3)); + d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1); + XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3)); +} + +void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1)); + d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3)); + XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5))); + XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7))); + d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1)); + d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3)); + XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5))); + XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7))); +} + +#define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x +#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x +#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x +SSE_HELPER_B(helper_pabsb, FABSB) +SSE_HELPER_W(helper_pabsw, FABSW) +SSE_HELPER_L(helper_pabsd, FABSL) + +#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15 +SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) + +#define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d +#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d +#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d +SSE_HELPER_B(helper_psignb, FSIGNB) +SSE_HELPER_W(helper_psignw, FSIGNW) +SSE_HELPER_L(helper_psignd, FSIGNL) + +void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift) +{ + Reg r; + + /* XXX could be checked during translation */ + if (shift >= (16 << SHIFT)) { + r.Q(0) = 0; + XMM_ONLY(r.Q(1) = 0); + } else { + shift <<= 3; +#define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) +#if SHIFT == 0 + r.Q(0) = SHR(s->Q(0), shift - 0) | + SHR(d->Q(0), shift - 64); +#else + r.Q(0) = SHR(s->Q(0), shift - 0) | + SHR(s->Q(1), shift - 64) | + SHR(d->Q(0), shift - 128) | + SHR(d->Q(1), shift - 192); + r.Q(1) = SHR(s->Q(0), shift + 64) | + SHR(s->Q(1), shift - 0) | + SHR(d->Q(0), shift - 64) | + SHR(d->Q(1), shift - 128); +#endif +#undef SHR + } + + *d = r; +} + +#define XMM0 env->xmm_regs[0] + +#if SHIFT == 1 +#define SSE_HELPER_V(name, elem, num, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));\ + d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));\ + if (num > 2) {\ + d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));\ + d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));\ + if (num > 4) {\ + d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));\ + d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));\ + d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));\ + d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));\ + if (num > 8) {\ + d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8));\ + d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9));\ + d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10));\ + d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11));\ + d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12));\ + d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13));\ + d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14));\ + d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15));\ + }\ + }\ + }\ +} + +#define SSE_HELPER_I(name, elem, num, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s, uint32_t imm)\ +{\ + d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));\ + d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));\ + if (num > 2) {\ + d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));\ + d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));\ + if (num > 4) {\ + d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1));\ + d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1));\ + d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1));\ + d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1));\ + if (num > 8) {\ + d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1));\ + d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1));\ + d->elem(10) = F(d->elem(10), s->elem(10), ((imm >> 10) & 1));\ + d->elem(11) = F(d->elem(11), s->elem(11), ((imm >> 11) & 1));\ + d->elem(12) = F(d->elem(12), s->elem(12), ((imm >> 12) & 1));\ + d->elem(13) = F(d->elem(13), s->elem(13), ((imm >> 13) & 1));\ + d->elem(14) = F(d->elem(14), s->elem(14), ((imm >> 14) & 1));\ + d->elem(15) = F(d->elem(15), s->elem(15), ((imm >> 15) & 1));\ + }\ + }\ + }\ +} + +/* SSE4.1 op helpers */ +#define FBLENDVB(d, s, m) (m & 0x80) ? s : d +#define FBLENDVPS(d, s, m) (m & 0x80000000) ? s : d +#define FBLENDVPD(d, s, m) (m & 0x8000000000000000LL) ? s : d +SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB) +SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS) +SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD) + +void glue(helper_ptest, SUFFIX) (Reg *d, Reg *s) +{ + uint64_t zf = (s->Q(0) & d->Q(0)) | (s->Q(1) & d->Q(1)); + uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1)); + + CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C); +} + +#define SSE_HELPER_F(name, elem, num, F)\ +void glue(name, SUFFIX) (Reg *d, Reg *s)\ +{\ + d->elem(0) = F(0);\ + d->elem(1) = F(1);\ + if (num > 2) {\ + d->elem(2) = F(2);\ + d->elem(3) = F(3);\ + if (num > 4) {\ + d->elem(4) = F(4);\ + d->elem(5) = F(5);\ + d->elem(6) = F(6);\ + d->elem(7) = F(7);\ + }\ + }\ +} + +SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W) +SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W) +SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L) +SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B) +SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B) +SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B) +SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W) +SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W) +SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L) + +void glue(helper_pmuldq, SUFFIX) (Reg *d, Reg *s) +{ + d->Q(0) = (int64_t) (int32_t) d->L(0) * (int32_t) s->L(0); + d->Q(1) = (int64_t) (int32_t) d->L(2) * (int32_t) s->L(2); +} + +#define FCMPEQQ(d, s) d == s ? -1 : 0 +SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ) + +void glue(helper_packusdw, SUFFIX) (Reg *d, Reg *s) +{ + d->W(0) = satuw((int32_t) d->L(0)); + d->W(1) = satuw((int32_t) d->L(1)); + d->W(2) = satuw((int32_t) d->L(2)); + d->W(3) = satuw((int32_t) d->L(3)); + d->W(4) = satuw((int32_t) s->L(0)); + d->W(5) = satuw((int32_t) s->L(1)); + d->W(6) = satuw((int32_t) s->L(2)); + d->W(7) = satuw((int32_t) s->L(3)); +} + +#define FMINSB(d, s) MIN((int8_t) d, (int8_t) s) +#define FMINSD(d, s) MIN((int32_t) d, (int32_t) s) +#define FMAXSB(d, s) MAX((int8_t) d, (int8_t) s) +#define FMAXSD(d, s) MAX((int32_t) d, (int32_t) s) +SSE_HELPER_B(helper_pminsb, FMINSB) +SSE_HELPER_L(helper_pminsd, FMINSD) +SSE_HELPER_W(helper_pminuw, MIN) +SSE_HELPER_L(helper_pminud, MIN) +SSE_HELPER_B(helper_pmaxsb, FMAXSB) +SSE_HELPER_L(helper_pmaxsd, FMAXSD) +SSE_HELPER_W(helper_pmaxuw, MAX) +SSE_HELPER_L(helper_pmaxud, MAX) + +#define FMULLD(d, s) (int32_t) d * (int32_t) s +SSE_HELPER_L(helper_pmulld, FMULLD) + +void glue(helper_phminposuw, SUFFIX) (Reg *d, Reg *s) +{ + int idx = 0; + + if (s->W(1) < s->W(idx)) + idx = 1; + if (s->W(2) < s->W(idx)) + idx = 2; + if (s->W(3) < s->W(idx)) + idx = 3; + if (s->W(4) < s->W(idx)) + idx = 4; + if (s->W(5) < s->W(idx)) + idx = 5; + if (s->W(6) < s->W(idx)) + idx = 6; + if (s->W(7) < s->W(idx)) + idx = 7; + + d->Q(1) = 0; + d->L(1) = 0; + d->W(1) = idx; + d->W(0) = s->W(idx); +} + +void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); + d->L(1) = float64_round_to_int(s->L(1), &env->sse_status); + d->L(2) = float64_round_to_int(s->L(2), &env->sse_status); + d->L(3) = float64_round_to_int(s->L(3), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); + d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) +{ + signed char prev_rounding_mode; + + prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) + switch (mode & 3) { + case 0: + set_float_rounding_mode(float_round_nearest_even, &env->sse_status); + break; + case 1: + set_float_rounding_mode(float_round_down, &env->sse_status); + break; + case 2: + set_float_rounding_mode(float_round_up, &env->sse_status); + break; + case 3: + set_float_rounding_mode(float_round_to_zero, &env->sse_status); + break; + } + + d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); + +#if 0 /* TODO */ + if (mode & (1 << 3)) + set_float_exception_flags( + get_float_exception_flags(&env->sse_status) & + ~float_flag_inexact, + &env->sse_status); +#endif + env->sse_status.float_rounding_mode = prev_rounding_mode; +} + +#define FBLENDP(d, s, m) m ? s : d +SSE_HELPER_I(helper_blendps, L, 4, FBLENDP) +SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP) +SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP) + +void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask) +{ + float32 iresult = 0 /*float32_zero*/; + + if (mask & (1 << 4)) + iresult = float32_add(iresult, + float32_mul(d->L(0), s->L(0), &env->sse_status), + &env->sse_status); + if (mask & (1 << 5)) + iresult = float32_add(iresult, + float32_mul(d->L(1), s->L(1), &env->sse_status), + &env->sse_status); + if (mask & (1 << 6)) + iresult = float32_add(iresult, + float32_mul(d->L(2), s->L(2), &env->sse_status), + &env->sse_status); + if (mask & (1 << 7)) + iresult = float32_add(iresult, + float32_mul(d->L(3), s->L(3), &env->sse_status), + &env->sse_status); + d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/; + d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/; + d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/; + d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/; +} + +void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask) +{ + float64 iresult = 0 /*float64_zero*/; + + if (mask & (1 << 4)) + iresult = float64_add(iresult, + float64_mul(d->Q(0), s->Q(0), &env->sse_status), + &env->sse_status); + if (mask & (1 << 5)) + iresult = float64_add(iresult, + float64_mul(d->Q(1), s->Q(1), &env->sse_status), + &env->sse_status); + d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/; + d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/; +} + +void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset) +{ + int s0 = (offset & 3) << 2; + int d0 = (offset & 4) << 0; + int i; + Reg r; + + for (i = 0; i < 8; i++, d0++) { + r.W(i) = 0; + r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0)); + r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1)); + r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2)); + r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3)); + } + + *d = r; +} + +/* SSE4.2 op helpers */ +/* it's unclear whether signed or unsigned */ +#define FCMPGTQ(d, s) d > s ? -1 : 0 +SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ) + +static inline int pcmp_elen(int reg, uint32_t ctrl) +{ + int val; + + /* Presence of REX.W is indicated by a bit higher than 7 set */ + if (ctrl >> 8) + val = abs1((int64_t) env->regs[reg]); + else + val = abs1((int32_t) env->regs[reg]); + + if (ctrl & 1) { + if (val > 8) + return 8; + } else + if (val > 16) + return 16; + + return val; +} + +static inline int pcmp_ilen(Reg *r, uint8_t ctrl) +{ + int val = 0; + + if (ctrl & 1) { + while (val < 8 && r->W(val)) + val++; + } else + while (val < 16 && r->B(val)) + val++; + + return val; +} + +static inline int pcmp_val(Reg *r, uint8_t ctrl, int i) +{ + switch ((ctrl >> 0) & 3) { + case 0: + return r->B(i); + case 1: + return r->W(i); + case 2: + return (int8_t) r->B(i); + case 3: + default: + return (int16_t) r->W(i); + } +} + +static inline unsigned pcmpxstrx(Reg *d, Reg *s, + int8_t ctrl, int valids, int validd) +{ + unsigned int res = 0; + int v; + int j, i; + int upper = (ctrl & 1) ? 7 : 15; + + valids--; + validd--; + + CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0); + + switch ((ctrl >> 2) & 3) { + case 0: + for (j = valids; j >= 0; j--) { + res <<= 1; + v = pcmp_val(s, ctrl, j); + for (i = validd; i >= 0; i--) + res |= (v == pcmp_val(d, ctrl, i)); + } + break; + case 1: + for (j = valids; j >= 0; j--) { + res <<= 1; + v = pcmp_val(s, ctrl, j); + for (i = ((validd - 1) | 1); i >= 0; i -= 2) + res |= (pcmp_val(d, ctrl, i - 0) <= v && + pcmp_val(d, ctrl, i - 1) >= v); + } + break; + case 2: + res = (2 << (upper - MAX(valids, validd))) - 1; + res <<= MAX(valids, validd) - MIN(valids, validd); + for (i = MIN(valids, validd); i >= 0; i--) { + res <<= 1; + v = pcmp_val(s, ctrl, i); + res |= (v == pcmp_val(d, ctrl, i)); + } + break; + case 3: + for (j = valids - validd; j >= 0; j--) { + res <<= 1; + res |= 1; + for (i = MIN(upper - j, validd); i >= 0; i--) + res &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i)); + } + break; + } + + switch ((ctrl >> 4) & 3) { + case 1: + res ^= (2 << upper) - 1; + break; + case 3: + res ^= (2 << valids) - 1; + break; + } + + if (res) + CC_SRC |= CC_C; + if (res & 1) + CC_SRC |= CC_O; + + return res; +} + +static inline int rffs1(unsigned int val) +{ + int ret = 1, hi; + + for (hi = sizeof(val) * 4; hi; hi /= 2) + if (val >> hi) { + val >>= hi; + ret += hi; + } + + return ret; +} + +static inline int ffs1(unsigned int val) +{ + int ret = 1, hi; + + for (hi = sizeof(val) * 4; hi; hi /= 2) + if (val << hi) { + val <<= hi; + ret += hi; + } + + return ret; +} + +void glue(helper_pcmpestri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_elen(R_EDX, ctrl), + pcmp_elen(R_EAX, ctrl)); + + if (res) +#ifndef VBOX + env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1; +#else + env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1(res) : ffs1(res)) - 1; +#endif + else + env->regs[R_ECX] = 16 >> (ctrl & (1 << 0)); +} + +void glue(helper_pcmpestrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + int i; + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_elen(R_EDX, ctrl), + pcmp_elen(R_EAX, ctrl)); + + if ((ctrl >> 6) & 1) { +#ifndef VBOX + if (ctrl & 1) + for (i = 0; i <= 8; i--, res >>= 1) + d->W(i) = (res & 1) ? ~0 : 0; + else + for (i = 0; i <= 16; i--, res >>= 1) + d->B(i) = (res & 1) ? ~0 : 0; +#else + if (ctrl & 1) + for (i = 0; i < 8; i++, res >>= 1) { + d->W(i) = (res & 1) ? ~0 : 0; + } + else + for (i = 0; i < 16; i++, res >>= 1) { + d->B(i) = (res & 1) ? ~0 : 0; + } +#endif + } else { + d->Q(1) = 0; + d->Q(0) = res; + } +} + +void glue(helper_pcmpistri, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_ilen(s, ctrl), + pcmp_ilen(d, ctrl)); + + if (res) + env->regs[R_ECX] = ((ctrl & (1 << 6)) ? rffs1 : ffs1)(res) - 1; + else + env->regs[R_ECX] = 16 >> (ctrl & (1 << 0)); +} + +void glue(helper_pcmpistrm, SUFFIX) (Reg *d, Reg *s, uint32_t ctrl) +{ + int i; + unsigned int res = pcmpxstrx(d, s, ctrl, + pcmp_ilen(s, ctrl), + pcmp_ilen(d, ctrl)); + + if ((ctrl >> 6) & 1) { +#ifndef VBOX + if (ctrl & 1) + for (i = 0; i <= 8; i--, res >>= 1) + d->W(i) = (res & 1) ? ~0 : 0; + else + for (i = 0; i <= 16; i--, res >>= 1) + d->B(i) = (res & 1) ? ~0 : 0; +#else + if (ctrl & 1) + for (i = 0; i < 8; i++, res >>= 1) { + d->W(i) = (res & 1) ? ~0 : 0; + } + else + for (i = 0; i < 16; i++, res >>= 1) { + d->B(i) = (res & 1) ? ~0 : 0; + } +#endif + } else { + d->Q(1) = 0; + d->Q(0) = res; + } +} + +#define CRCPOLY 0x1edc6f41 +#define CRCPOLY_BITREV 0x82f63b78 +target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len) +{ + target_ulong crc = (msg & ((target_ulong) -1 >> + (TARGET_LONG_BITS - len))) ^ crc1; + + while (len--) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0); + + return crc; +} + +#define POPMASK(i) ((target_ulong) -1 / ((1LL << (1 << i)) + 1)) +#define POPCOUNT(n, i) (n & POPMASK(i)) + ((n >> (1 << i)) & POPMASK(i)) +target_ulong helper_popcnt(target_ulong n, uint32_t type) +{ + CC_SRC = n ? 0 : CC_Z; + + n = POPCOUNT(n, 0); + n = POPCOUNT(n, 1); + n = POPCOUNT(n, 2); + n = POPCOUNT(n, 3); + if (type == 1) + return n & 0xff; + + n = POPCOUNT(n, 4); +#ifndef TARGET_X86_64 + return n; +#else + if (type == 2) + return n & 0xff; + + return POPCOUNT(n, 5); +#endif +} +#endif + +#undef SHIFT +#undef XMM_ONLY +#undef Reg +#undef B +#undef W +#undef L +#undef Q +#undef SUFFIX diff --git a/src/recompiler/target-i386/ops_sse_header.h b/src/recompiler/target-i386/ops_sse_header.h new file mode 100644 index 00000000..efa3e905 --- /dev/null +++ b/src/recompiler/target-i386/ops_sse_header.h @@ -0,0 +1,359 @@ +/* + * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support + * + * Copyright (c) 2005 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#if SHIFT == 0 +#define Reg MMXReg +#define SUFFIX _mmx +#else +#define Reg XMMReg +#define SUFFIX _xmm +#endif + +#define dh_alias_Reg ptr +#define dh_alias_XMMReg ptr +#define dh_alias_MMXReg ptr +#define dh_ctype_Reg Reg * +#define dh_ctype_XMMReg XMMReg * +#define dh_ctype_MMXReg MMXReg * +#define dh_is_signed_Reg dh_is_signed_ptr +#define dh_is_signed_XMMReg dh_is_signed_ptr +#define dh_is_signed_MMXReg dh_is_signed_ptr + +DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg) + +#if SHIFT == 1 +DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg) +#endif + +#define SSE_HELPER_B(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +#define SSE_HELPER_W(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +#define SSE_HELPER_L(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +#define SSE_HELPER_Q(name, F)\ + DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) + +SSE_HELPER_B(paddb, FADD) +SSE_HELPER_W(paddw, FADD) +SSE_HELPER_L(paddl, FADD) +SSE_HELPER_Q(paddq, FADD) + +SSE_HELPER_B(psubb, FSUB) +SSE_HELPER_W(psubw, FSUB) +SSE_HELPER_L(psubl, FSUB) +SSE_HELPER_Q(psubq, FSUB) + +SSE_HELPER_B(paddusb, FADDUB) +SSE_HELPER_B(paddsb, FADDSB) +SSE_HELPER_B(psubusb, FSUBUB) +SSE_HELPER_B(psubsb, FSUBSB) + +SSE_HELPER_W(paddusw, FADDUW) +SSE_HELPER_W(paddsw, FADDSW) +SSE_HELPER_W(psubusw, FSUBUW) +SSE_HELPER_W(psubsw, FSUBSW) + +SSE_HELPER_B(pminub, FMINUB) +SSE_HELPER_B(pmaxub, FMAXUB) + +SSE_HELPER_W(pminsw, FMINSW) +SSE_HELPER_W(pmaxsw, FMAXSW) + +SSE_HELPER_Q(pand, FAND) +SSE_HELPER_Q(pandn, FANDN) +SSE_HELPER_Q(por, FOR) +SSE_HELPER_Q(pxor, FXOR) + +SSE_HELPER_B(pcmpgtb, FCMPGTB) +SSE_HELPER_W(pcmpgtw, FCMPGTW) +SSE_HELPER_L(pcmpgtl, FCMPGTL) + +SSE_HELPER_B(pcmpeqb, FCMPEQ) +SSE_HELPER_W(pcmpeqw, FCMPEQ) +SSE_HELPER_L(pcmpeql, FCMPEQ) + +SSE_HELPER_W(pmullw, FMULLW) +#if SHIFT == 0 +SSE_HELPER_W(pmulhrw, FMULHRW) +#endif +SSE_HELPER_W(pmulhuw, FMULHUW) +SSE_HELPER_W(pmulhw, FMULHW) + +SSE_HELPER_B(pavgb, FAVG) +SSE_HELPER_W(pavgw, FAVG) + +DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg) + +DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl) +DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32) +#ifdef TARGET_X86_64 +DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64) +#endif + +#if SHIFT == 0 +DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int) +#else +DEF_HELPER_3(shufps, void, Reg, Reg, int) +DEF_HELPER_3(shufpd, void, Reg, Reg, int) +DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) +#endif + +#if SHIFT == 1 +/* FPU ops */ +/* XXX: not accurate */ + +#define SSE_HELPER_S(name, F)\ + DEF_HELPER_2(name ## ps , void, Reg, Reg) \ + DEF_HELPER_2(name ## ss , void, Reg, Reg) \ + DEF_HELPER_2(name ## pd , void, Reg, Reg) \ + DEF_HELPER_2(name ## sd , void, Reg, Reg) + +SSE_HELPER_S(add, FPU_ADD) +SSE_HELPER_S(sub, FPU_SUB) +SSE_HELPER_S(mul, FPU_MUL) +SSE_HELPER_S(div, FPU_DIV) +SSE_HELPER_S(min, FPU_MIN) +SSE_HELPER_S(max, FPU_MAX) +SSE_HELPER_S(sqrt, FPU_SQRT) + + +DEF_HELPER_2(cvtps2pd, void, Reg, Reg) +DEF_HELPER_2(cvtpd2ps, void, Reg, Reg) +DEF_HELPER_2(cvtss2sd, void, Reg, Reg) +DEF_HELPER_2(cvtsd2ss, void, Reg, Reg) +DEF_HELPER_2(cvtdq2ps, void, Reg, Reg) +DEF_HELPER_2(cvtdq2pd, void, Reg, Reg) +DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg) +DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg) +DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32) +DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32) + +#ifdef TARGET_X86_64 +DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64) +DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64) +#endif + +DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg) +DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg) +DEF_HELPER_1(cvtss2si, s32, XMMReg) +DEF_HELPER_1(cvtsd2si, s32, XMMReg) +#ifdef TARGET_X86_64 +DEF_HELPER_1(cvtss2sq, s64, XMMReg) +DEF_HELPER_1(cvtsd2sq, s64, XMMReg) +#endif + +DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg) +DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg) +DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg) +DEF_HELPER_1(cvttss2si, s32, XMMReg) +DEF_HELPER_1(cvttsd2si, s32, XMMReg) +#ifdef TARGET_X86_64 +DEF_HELPER_1(cvttss2sq, s64, XMMReg) +DEF_HELPER_1(cvttsd2sq, s64, XMMReg) +#endif + +DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg) +DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg) +DEF_HELPER_2(rcpps, void, XMMReg, XMMReg) +DEF_HELPER_2(rcpss, void, XMMReg, XMMReg) +DEF_HELPER_2(extrq_r, void, XMMReg, XMMReg) +DEF_HELPER_3(extrq_i, void, XMMReg, int, int) +DEF_HELPER_2(insertq_r, void, XMMReg, XMMReg) +DEF_HELPER_3(insertq_i, void, XMMReg, int, int) +DEF_HELPER_2(haddps, void, XMMReg, XMMReg) +DEF_HELPER_2(haddpd, void, XMMReg, XMMReg) +DEF_HELPER_2(hsubps, void, XMMReg, XMMReg) +DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg) +DEF_HELPER_2(addsubps, void, XMMReg, XMMReg) +DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg) + +#define SSE_HELPER_CMP(name, F)\ + DEF_HELPER_2( name ## ps , void, Reg, Reg) \ + DEF_HELPER_2( name ## ss , void, Reg, Reg) \ + DEF_HELPER_2( name ## pd , void, Reg, Reg) \ + DEF_HELPER_2( name ## sd , void, Reg, Reg) + +SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) +SSE_HELPER_CMP(cmplt, FPU_CMPLT) +SSE_HELPER_CMP(cmple, FPU_CMPLE) +SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) +SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) +SSE_HELPER_CMP(cmpord, FPU_CMPORD) + +DEF_HELPER_2(ucomiss, void, Reg, Reg) +DEF_HELPER_2(comiss, void, Reg, Reg) +DEF_HELPER_2(ucomisd, void, Reg, Reg) +DEF_HELPER_2(comisd, void, Reg, Reg) +DEF_HELPER_1(movmskps, i32, Reg) +DEF_HELPER_1(movmskpd, i32, Reg) +#endif + +DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg) +DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg) +#define UNPCK_OP(base_name, base) \ + DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \ + DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \ + DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg) + +UNPCK_OP(l, 0) +UNPCK_OP(h, 1) + +#if SHIFT == 1 +DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg) +#endif + +/* 3DNow! float ops */ +#if SHIFT == 0 +DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg) +DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg) +DEF_HELPER_2(pf2id, void, MMXReg, MMXReg) +DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg) +DEF_HELPER_2(pfacc, void, MMXReg, MMXReg) +DEF_HELPER_2(pfadd, void, MMXReg, MMXReg) +DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg) +DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg) +DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg) +DEF_HELPER_2(pfmax, void, MMXReg, MMXReg) +DEF_HELPER_2(pfmin, void, MMXReg, MMXReg) +DEF_HELPER_2(pfmul, void, MMXReg, MMXReg) +DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg) +DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg) +DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg) +DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg) +DEF_HELPER_2(pfsub, void, MMXReg, MMXReg) +DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg) +DEF_HELPER_2(pswapd, void, MMXReg, MMXReg) +#endif + +/* SSSE3 op helpers */ +DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32) + +/* SSE4.1 op helpers */ +#if SHIFT == 1 +DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg) +DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32) +#endif + +/* SSE4.2 op helpers */ +#if SHIFT == 1 +DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg) +DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(crc32, tl, i32, tl, i32) +DEF_HELPER_2(popcnt, tl, tl, i32) +#endif + +#undef SHIFT +#undef Reg +#undef SUFFIX + +#undef SSE_HELPER_B +#undef SSE_HELPER_W +#undef SSE_HELPER_L +#undef SSE_HELPER_Q +#undef SSE_HELPER_S +#undef SSE_HELPER_CMP +#undef UNPCK_OP diff --git a/src/recompiler/target-i386/svm.h b/src/recompiler/target-i386/svm.h new file mode 100644 index 00000000..a224aead --- /dev/null +++ b/src/recompiler/target-i386/svm.h @@ -0,0 +1,222 @@ +#ifndef __SVM_H +#define __SVM_H + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define SVM_INTERRUPT_SHADOW_MASK 1 + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +/* only included in documentation, maybe wrong */ +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ + +struct __attribute__ ((__packed__)) vmcb_control_area { + uint16_t intercept_cr_read; + uint16_t intercept_cr_write; + uint16_t intercept_dr_read; + uint16_t intercept_dr_write; + uint32_t intercept_exceptions; + uint64_t intercept; + uint8_t reserved_1[44]; + uint64_t iopm_base_pa; + uint64_t msrpm_base_pa; + uint64_t tsc_offset; + uint32_t asid; + uint8_t tlb_ctl; + uint8_t reserved_2[3]; + uint32_t int_ctl; + uint32_t int_vector; + uint32_t int_state; + uint8_t reserved_3[4]; + uint64_t exit_code; + uint64_t exit_info_1; + uint64_t exit_info_2; + uint32_t exit_int_info; + uint32_t exit_int_info_err; + uint64_t nested_ctl; + uint8_t reserved_4[16]; + uint32_t event_inj; + uint32_t event_inj_err; + uint64_t nested_cr3; + uint64_t lbr_ctl; + uint8_t reserved_5[832]; +}; + +struct __attribute__ ((__packed__)) vmcb_seg { + uint16_t selector; + uint16_t attrib; + uint32_t limit; + uint64_t base; +}; + +struct __attribute__ ((__packed__)) vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + uint8_t reserved_1[43]; + uint8_t cpl; + uint8_t reserved_2[4]; + uint64_t efer; + uint8_t reserved_3[112]; + uint64_t cr4; + uint64_t cr3; + uint64_t cr0; + uint64_t dr7; + uint64_t dr6; + uint64_t rflags; + uint64_t rip; + uint8_t reserved_4[88]; + uint64_t rsp; + uint8_t reserved_5[24]; + uint64_t rax; + uint64_t star; + uint64_t lstar; + uint64_t cstar; + uint64_t sfmask; + uint64_t kernel_gs_base; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + uint64_t cr2; + uint8_t reserved_6[32]; + uint64_t g_pat; + uint64_t dbgctl; + uint64_t br_from; + uint64_t br_to; + uint64_t last_excp_from; + uint64_t last_excp_to; +}; + +struct __attribute__ ((__packed__)) vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +}; + +#endif diff --git a/src/recompiler/target-i386/translate.c b/src/recompiler/target-i386/translate.c new file mode 100644 index 00000000..1b82f3f5 --- /dev/null +++ b/src/recompiler/target-i386/translate.c @@ -0,0 +1,8385 @@ +/* + * i386 translation + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#ifndef VBOX +#include <inttypes.h> +#include <signal.h> +#endif /* !VBOX */ + +#include "cpu.h" +#include "exec-all.h" +#include "disas.h" +#include "tcg-op.h" + +#include "helper.h" +#define GEN_HELPER 1 +#include "helper.h" + +#define PREFIX_REPZ 0x01 +#define PREFIX_REPNZ 0x02 +#define PREFIX_LOCK 0x04 +#define PREFIX_DATA 0x08 +#define PREFIX_ADR 0x10 + +#ifdef TARGET_X86_64 +#define X86_64_ONLY(x) x +#define X86_64_DEF(...) __VA_ARGS__ +#define CODE64(s) ((s)->code64) +#define REX_X(s) ((s)->rex_x) +#define REX_B(s) ((s)->rex_b) +# ifdef VBOX +# define IS_LONG_MODE(s) ((s)->lma) +# endif +/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */ +#if 1 +#define BUGGY_64(x) NULL +#endif +#else +#define X86_64_ONLY(x) NULL +#define X86_64_DEF(...) +#define CODE64(s) 0 +#define REX_X(s) 0 +#define REX_B(s) 0 +# ifdef VBOX +# define IS_LONG_MODE(s) 0 +# endif +#endif + +//#define MACRO_TEST 1 + +/* global register indexes */ +static TCGv_ptr cpu_env; +static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp; +static TCGv_i32 cpu_cc_op; +static TCGv cpu_regs[CPU_NB_REGS]; +/* local temps */ +static TCGv cpu_T[2], cpu_T3; +/* local register indexes (only used inside old micro ops) */ +static TCGv cpu_tmp0, cpu_tmp4; +static TCGv_ptr cpu_ptr0, cpu_ptr1; +static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; +static TCGv_i64 cpu_tmp1_i64; +static TCGv cpu_tmp5; + +static uint8_t gen_opc_cc_op[OPC_BUF_SIZE]; + +#include "gen-icount.h" + +#ifdef TARGET_X86_64 +static int x86_64_hregs; +#endif + +#ifdef VBOX + +/* Special/override code readers to hide patched code. */ + +uint8_t ldub_code_raw(target_ulong pc) +{ + uint8_t b; + +# ifdef VBOX_WITH_RAW_MODE + if (!remR3GetOpcode(cpu_single_env, pc, &b)) +# endif + b = ldub_code(pc); + return b; +} +# define ldub_code(a) ldub_code_raw(a) + +uint16_t lduw_code_raw(target_ulong pc) +{ + uint16_t u16; + u16 = (uint16_t)ldub_code_raw(pc); + u16 |= (uint16_t)ldub_code_raw(pc + 1) << 8; + return u16; +} +# define lduw_code(a) lduw_code_raw(a) + + +uint32_t ldl_code_raw(target_ulong pc) +{ + uint32_t u32; + u32 = (uint32_t)ldub_code_raw(pc); + u32 |= (uint32_t)ldub_code_raw(pc + 1) << 8; + u32 |= (uint32_t)ldub_code_raw(pc + 2) << 16; + u32 |= (uint32_t)ldub_code_raw(pc + 3) << 24; + return u32; +} +# define ldl_code(a) ldl_code_raw(a) + +#endif /* VBOX */ + +typedef struct DisasContext { + /* current insn context */ + int override; /* -1 if no override */ + int prefix; + int aflag, dflag; + target_ulong pc; /* pc = eip + cs_base */ + int is_jmp; /* 1 = means jump (stop translation), 2 means CPU + static state change (stop translation) */ + /* current block context */ + target_ulong cs_base; /* base of CS segment */ + int pe; /* protected mode */ + int code32; /* 32 bit code segment */ +#ifdef TARGET_X86_64 + int lma; /* long mode active */ + int code64; /* 64 bit code segment */ + int rex_x, rex_b; +#endif + int ss32; /* 32 bit stack segment */ + int cc_op; /* current CC operation */ + int addseg; /* non zero if either DS/ES/SS have a non zero base */ + int f_st; /* currently unused */ + int vm86; /* vm86 mode */ +#ifdef VBOX + int vme; /* CR4.VME */ + int pvi; /* CR4.PVI */ + int record_call; /* record calls for CSAM or not? */ +#endif + int cpl; + int iopl; + int tf; /* TF cpu flag */ + int singlestep_enabled; /* "hardware" single step enabled */ + int jmp_opt; /* use direct block chaining for direct jumps */ + int mem_index; /* select memory access functions */ + uint64_t flags; /* all execution flags */ + struct TranslationBlock *tb; + int popl_esp_hack; /* for correct popl with esp base handling */ + int rip_offset; /* only used in x86_64, but left for simplicity */ + int cpuid_features; + int cpuid_ext_features; + int cpuid_ext2_features; + int cpuid_ext3_features; +} DisasContext; + +static void gen_eob(DisasContext *s); +static void gen_jmp(DisasContext *s, target_ulong eip); +static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); + +#ifdef VBOX +static void gen_check_external_event(void); +#endif + +/* i386 arith/logic operations */ +enum { + OP_ADDL, + OP_ORL, + OP_ADCL, + OP_SBBL, + OP_ANDL, + OP_SUBL, + OP_XORL, + OP_CMPL, +}; + +/* i386 shift ops */ +enum { + OP_ROL, + OP_ROR, + OP_RCL, + OP_RCR, + OP_SHL, + OP_SHR, + OP_SHL1, /* undocumented */ + OP_SAR = 7, +}; + +enum { + JCC_O, + JCC_B, + JCC_Z, + JCC_BE, + JCC_S, + JCC_P, + JCC_L, + JCC_LE, +}; + +/* operand size */ +enum { + OT_BYTE = 0, + OT_WORD, + OT_LONG, + OT_QUAD, +}; + +enum { + /* I386 int registers */ + OR_EAX, /* MUST be even numbered */ + OR_ECX, + OR_EDX, + OR_EBX, + OR_ESP, + OR_EBP, + OR_ESI, + OR_EDI, + + OR_TMP0 = 16, /* temporary operand register */ + OR_TMP1, + OR_A0, /* temporary register used when doing address evaluation */ +}; + +static inline void gen_op_movl_T0_0(void) +{ + tcg_gen_movi_tl(cpu_T[0], 0); +} + +static inline void gen_op_movl_T0_im(int32_t val) +{ + tcg_gen_movi_tl(cpu_T[0], val); +} + +static inline void gen_op_movl_T0_imu(uint32_t val) +{ + tcg_gen_movi_tl(cpu_T[0], val); +} + +static inline void gen_op_movl_T1_im(int32_t val) +{ + tcg_gen_movi_tl(cpu_T[1], val); +} + +static inline void gen_op_movl_T1_imu(uint32_t val) +{ + tcg_gen_movi_tl(cpu_T[1], val); +} + +static inline void gen_op_movl_A0_im(uint32_t val) +{ + tcg_gen_movi_tl(cpu_A0, val); +} + +#ifdef TARGET_X86_64 +static inline void gen_op_movq_A0_im(int64_t val) +{ + tcg_gen_movi_tl(cpu_A0, val); +} +#endif + +static inline void gen_movtl_T0_im(target_ulong val) +{ + tcg_gen_movi_tl(cpu_T[0], val); +} + +static inline void gen_movtl_T1_im(target_ulong val) +{ + tcg_gen_movi_tl(cpu_T[1], val); +} + +static inline void gen_op_andl_T0_ffff(void) +{ + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff); +} + +static inline void gen_op_andl_T0_im(uint32_t val) +{ + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], val); +} + +static inline void gen_op_movl_T0_T1(void) +{ + tcg_gen_mov_tl(cpu_T[0], cpu_T[1]); +} + +static inline void gen_op_andl_A0_ffff(void) +{ + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffff); +} + +#ifdef TARGET_X86_64 + +#define NB_OP_SIZES 4 + +#else /* !TARGET_X86_64 */ + +#define NB_OP_SIZES 3 + +#endif /* !TARGET_X86_64 */ + +#if defined(HOST_WORDS_BIGENDIAN) +#define REG_B_OFFSET (sizeof(target_ulong) - 1) +#define REG_H_OFFSET (sizeof(target_ulong) - 2) +#define REG_W_OFFSET (sizeof(target_ulong) - 2) +#define REG_L_OFFSET (sizeof(target_ulong) - 4) +#define REG_LH_OFFSET (sizeof(target_ulong) - 8) +#else +#define REG_B_OFFSET 0 +#define REG_H_OFFSET 1 +#define REG_W_OFFSET 0 +#define REG_L_OFFSET 0 +#define REG_LH_OFFSET 4 +#endif + +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0) +{ + TCGv tmp; + + switch(ot) { + case OT_BYTE: + tmp = tcg_temp_new(); + tcg_gen_ext8u_tl(tmp, t0); + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); + } else { + tcg_gen_shli_tl(tmp, tmp, 8); + tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00); + tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp); + } + tcg_temp_free(tmp); + break; + case OT_WORD: + tmp = tcg_temp_new(); + tcg_gen_ext16u_tl(tmp, t0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); + tcg_temp_free(tmp); + break; + default: /* XXX this shouldn't be reached; abort? */ + case OT_LONG: + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a mov. */ + tcg_gen_ext32u_tl(cpu_regs[reg], t0); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + tcg_gen_mov_tl(cpu_regs[reg], t0); + break; +#endif + } +} + +static inline void gen_op_mov_reg_T0(int ot, int reg) +{ + gen_op_mov_reg_v(ot, reg, cpu_T[0]); +} + +static inline void gen_op_mov_reg_T1(int ot, int reg) +{ + gen_op_mov_reg_v(ot, reg, cpu_T[1]); +} + +static inline void gen_op_mov_reg_A0(int size, int reg) +{ + TCGv tmp; + + switch(size) { + case 0: + tmp = tcg_temp_new(); + tcg_gen_ext16u_tl(tmp, cpu_A0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp); + tcg_temp_free(tmp); + break; + default: /* XXX this shouldn't be reached; abort? */ + case 1: + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a mov. */ + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0); + break; +#endif + } +} + +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg) +{ + switch(ot) { + case OT_BYTE: + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) { + goto std_case; + } else { + tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8); + tcg_gen_ext8u_tl(t0, t0); + } + break; + default: + std_case: + tcg_gen_mov_tl(t0, cpu_regs[reg]); + break; + } +} + +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg) +{ + gen_op_mov_v_reg(ot, cpu_T[t_index], reg); +} + +static inline void gen_op_movl_A0_reg(int reg) +{ + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]); +} + +static inline void gen_op_addl_A0_im(int32_t val) +{ + tcg_gen_addi_tl(cpu_A0, cpu_A0, val); +#ifdef TARGET_X86_64 + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); +#endif +} + +#ifdef TARGET_X86_64 +static inline void gen_op_addq_A0_im(int64_t val) +{ + tcg_gen_addi_tl(cpu_A0, cpu_A0, val); +} +#endif + +static void gen_add_A0_im(DisasContext *s, int val) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) + gen_op_addq_A0_im(val); + else +#endif + gen_op_addl_A0_im(val); +} + +static inline void gen_op_addl_T0_T1(void) +{ + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); +} + +static inline void gen_op_jmp_T0(void) +{ + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip)); +} + +static inline void gen_op_add_reg_im(int size, int reg, int32_t val) +{ + switch(size) { + case 0: + tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); + tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0); + break; + case 1: + tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a nop. */ + tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val); + break; +#endif + } +} + +static inline void gen_op_add_reg_T0(int size, int reg) +{ + switch(size) { + case 0: + tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); + tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff); + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0); + break; + case 1: + tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a nop. */ + tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]); + break; +#endif + } +} + +static inline void gen_op_set_cc_op(int32_t val) +{ + tcg_gen_movi_i32(cpu_cc_op, val); +} + +static inline void gen_op_addl_A0_reg_sN(int shift, int reg) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); + if (shift != 0) + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); + /* For x86_64, this sets the higher half of register to zero. + For i386, this is equivalent to a nop. */ + tcg_gen_ext32u_tl(cpu_A0, cpu_A0); +} + +#ifdef VBOX +DECLINLINE(void) gen_op_seg_check(int reg, bool keepA0) +{ + /* It seems segments doesn't get out of sync - if they do in fact - enable below code. */ +# ifdef FORCE_SEGMENT_SYNC +# if 1 + TCGv t0; + + /* Considering poor quality of TCG optimizer - better call directly */ + t0 = tcg_temp_local_new(TCG_TYPE_TL); + tcg_gen_movi_tl(t0, reg); + tcg_gen_helper_0_1(helper_sync_seg, t0); + tcg_temp_free(t0); +# else + /* Our segments could be outdated, thus check for newselector field to see if update really needed */ + int skip_label; + TCGv t0, a0; + + /* For other segments this check is waste of time, and also TCG is unable to cope with this code, + for data/stack segments, as expects alive cpu_T[0] */ + if (reg != R_GS) + return; + + if (keepA0) + { + /* we need to store old cpu_A0 */ + a0 = tcg_temp_local_new(TCG_TYPE_TL); + tcg_gen_mov_tl(a0, cpu_A0); + } + + skip_label = gen_new_label(); + t0 = tcg_temp_local_new(TCG_TYPE_TL); + + tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, segs[reg].newselector) + REG_L_OFFSET); + tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label); + tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, eflags) + REG_L_OFFSET); + tcg_gen_andi_tl(t0, t0, VM_MASK); + tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, skip_label); + tcg_gen_movi_tl(t0, reg); + + tcg_gen_helper_0_1(helper_sync_seg, t0); + + tcg_temp_free(t0); + + gen_set_label(skip_label); + if (keepA0) + { + tcg_gen_mov_tl(cpu_A0, a0); + tcg_temp_free(a0); + } +# endif /* 0 */ +# endif /* FORCE_SEGMENT_SYNC */ +} +#endif /* VBOX */ + +static inline void gen_op_movl_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, false); +#endif + tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET); +} + +static inline void gen_op_addl_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, true); +#endif + tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base)); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); +#ifdef TARGET_X86_64 + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); +#endif +} + +#ifdef TARGET_X86_64 +static inline void gen_op_movq_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, false); +#endif + tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base)); +} + +static inline void gen_op_addq_A0_seg(int reg) +{ +#ifdef VBOX + gen_op_seg_check(reg, true); +#endif + tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base)); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); +} + +static inline void gen_op_movq_A0_reg(int reg) +{ + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]); +} + +static inline void gen_op_addq_A0_reg_sN(int shift, int reg) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); + if (shift != 0) + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); +} +#endif + +static inline void gen_op_lds_T0_A0(int idx) +{ + int mem_index = (idx >> 2) - 1; + switch(idx & 3) { + case 0: + tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index); + break; + case 1: + tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index); + break; + default: + case 2: + tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index); + break; + } +} + +static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0) +{ + int mem_index = (idx >> 2) - 1; + switch(idx & 3) { + case 0: + tcg_gen_qemu_ld8u(t0, a0, mem_index); + break; + case 1: + tcg_gen_qemu_ld16u(t0, a0, mem_index); + break; + case 2: + tcg_gen_qemu_ld32u(t0, a0, mem_index); + break; + default: + case 3: + /* Should never happen on 32-bit targets. */ +#ifdef TARGET_X86_64 + tcg_gen_qemu_ld64(t0, a0, mem_index); +#endif + break; + } +} + +/* XXX: always use ldu or lds */ +static inline void gen_op_ld_T0_A0(int idx) +{ + gen_op_ld_v(idx, cpu_T[0], cpu_A0); +} + +static inline void gen_op_ldu_T0_A0(int idx) +{ + gen_op_ld_v(idx, cpu_T[0], cpu_A0); +} + +static inline void gen_op_ld_T1_A0(int idx) +{ + gen_op_ld_v(idx, cpu_T[1], cpu_A0); +} + +static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0) +{ + int mem_index = (idx >> 2) - 1; + switch(idx & 3) { + case 0: + tcg_gen_qemu_st8(t0, a0, mem_index); + break; + case 1: + tcg_gen_qemu_st16(t0, a0, mem_index); + break; + case 2: + tcg_gen_qemu_st32(t0, a0, mem_index); + break; + default: + case 3: + /* Should never happen on 32-bit targets. */ +#ifdef TARGET_X86_64 + tcg_gen_qemu_st64(t0, a0, mem_index); +#endif + break; + } +} + +static inline void gen_op_st_T0_A0(int idx) +{ + gen_op_st_v(idx, cpu_T[0], cpu_A0); +} + +static inline void gen_op_st_T1_A0(int idx) +{ + gen_op_st_v(idx, cpu_T[1], cpu_A0); +} + +#ifdef VBOX + +static void gen_check_external_event(void) +{ +# if 1 + /** @todo once TCG codegen improves, we may want to use version + from else version */ + gen_helper_check_external_event(); +# else + int skip_label; + TCGv t0; + + skip_label = gen_new_label(); + t0 = tcg_temp_local_new(TCG_TYPE_TL); + /* t0 = cpu_tmp0; */ + + tcg_gen_ld32u_tl(t0, cpu_env, offsetof(CPUState, interrupt_request)); + /* Keep in sync with helper_check_external_event() */ + tcg_gen_andi_tl(t0, t0, + CPU_INTERRUPT_EXTERNAL_EXIT + | CPU_INTERRUPT_EXTERNAL_TIMER + | CPU_INTERRUPT_EXTERNAL_DMA + | CPU_INTERRUPT_EXTERNAL_HARD); + /** @todo predict branch as taken */ + tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, skip_label); + tcg_temp_free(t0); + + gen_helper_check_external_event(); + + gen_set_label(skip_label); +# endif +} + +#endif /* VBOX */ + +static inline void gen_jmp_im(target_ulong pc) +{ + tcg_gen_movi_tl(cpu_tmp0, pc); + tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip)); +} + +#ifdef VBOX +DECLINLINE(void) gen_update_eip(target_ulong pc) +{ + gen_jmp_im(pc); +# ifdef VBOX_DUMP_STATE + gen_helper_dump_state(); +# endif +} +#endif /* VBOX */ + +static inline void gen_string_movl_A0_ESI(DisasContext *s) +{ + int override; + + override = s->override; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + if (override >= 0) { + gen_op_movq_A0_seg(override); + gen_op_addq_A0_reg_sN(0, R_ESI); + } else { + gen_op_movq_A0_reg(R_ESI); + } + } else +#endif + if (s->aflag) { + /* 32 bit address */ + if (s->addseg && override < 0) + override = R_DS; + if (override >= 0) { + gen_op_movl_A0_seg(override); + gen_op_addl_A0_reg_sN(0, R_ESI); + } else { + gen_op_movl_A0_reg(R_ESI); + } + } else { + /* 16 address, always override */ + if (override < 0) + override = R_DS; + gen_op_movl_A0_reg(R_ESI); + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(override); + } +} + +static inline void gen_string_movl_A0_EDI(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EDI); + } else +#endif + if (s->aflag) { + if (s->addseg) { + gen_op_movl_A0_seg(R_ES); + gen_op_addl_A0_reg_sN(0, R_EDI); + } else { + gen_op_movl_A0_reg(R_EDI); + } + } else { + gen_op_movl_A0_reg(R_EDI); + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(R_ES); + } +} + +static inline void gen_op_movl_T0_Dshift(int ot) +{ + tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df)); + tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot); +}; + +static void gen_extu(int ot, TCGv reg) +{ + switch(ot) { + case OT_BYTE: + tcg_gen_ext8u_tl(reg, reg); + break; + case OT_WORD: + tcg_gen_ext16u_tl(reg, reg); + break; + case OT_LONG: + tcg_gen_ext32u_tl(reg, reg); + break; + default: + break; + } +} + +static void gen_exts(int ot, TCGv reg) +{ + switch(ot) { + case OT_BYTE: + tcg_gen_ext8s_tl(reg, reg); + break; + case OT_WORD: + tcg_gen_ext16s_tl(reg, reg); + break; + case OT_LONG: + tcg_gen_ext32s_tl(reg, reg); + break; + default: + break; + } +} + +static inline void gen_op_jnz_ecx(int size, int label1) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); + gen_extu(size + 1, cpu_tmp0); + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1); +} + +static inline void gen_op_jz_ecx(int size, int label1) +{ + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]); + gen_extu(size + 1, cpu_tmp0); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); +} + +static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n) +{ + switch (ot) { + case 0: gen_helper_inb(v, n); break; + case 1: gen_helper_inw(v, n); break; + case 2: gen_helper_inl(v, n); break; + } + +} + +static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n) +{ + switch (ot) { + case 0: gen_helper_outb(v, n); break; + case 1: gen_helper_outw(v, n); break; + case 2: gen_helper_outl(v, n); break; + } + +} + +static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, + uint32_t svm_flags) +{ + int state_saved; + target_ulong next_eip; + + state_saved = 0; + if (s->pe && (s->cpl > s->iopl || s->vm86)) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + state_saved = 1; + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + switch (ot) { + case 0: gen_helper_check_iob(cpu_tmp2_i32); break; + case 1: gen_helper_check_iow(cpu_tmp2_i32); break; + case 2: gen_helper_check_iol(cpu_tmp2_i32); break; + } + } + if(s->flags & HF_SVMI_MASK) { + if (!state_saved) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + } + svm_flags |= (1 << (4 + ot)); + next_eip = s->pc - s->cs_base; + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags), + tcg_const_i32(next_eip - cur_eip)); + } +} + +static inline void gen_movs(DisasContext *s, int ot) +{ + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + gen_string_movl_A0_EDI(s); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_update_cc_op(DisasContext *s) +{ + if (s->cc_op != CC_OP_DYNAMIC) { + gen_op_set_cc_op(s->cc_op); + s->cc_op = CC_OP_DYNAMIC; + } +} + +static void gen_op_update1_cc(void) +{ + tcg_gen_discard_tl(cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +static void gen_op_update2_cc(void) +{ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +static inline void gen_op_cmpl_T0_T1_cc(void) +{ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); +} + +static inline void gen_op_testl_T0_T1_cc(void) +{ + tcg_gen_discard_tl(cpu_cc_src); + tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); +} + +static void gen_op_update_neg_cc(void) +{ + tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +/* compute eflags.C to reg */ +static void gen_compute_eflags_c(TCGv reg) +{ + gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op); + tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); +} + +/* compute all eflags to cc_src */ +static void gen_compute_eflags(TCGv reg) +{ + gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op); + tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); +} + +static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + switch(jcc_op) { + case JCC_O: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_B: + gen_compute_eflags_c(cpu_T[0]); + break; + case JCC_Z: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_BE: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_S: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_P: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_L: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */ + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + default: + case JCC_LE: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */ + tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */ + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + } +} + +/* return true if setcc_slow is not needed (WARNING: must be kept in + sync with gen_jcc1) */ +static int is_fast_jcc_case(DisasContext *s, int b) +{ + int jcc_op; + jcc_op = (b >> 1) & 7; + switch(s->cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + case CC_OP_SUBQ: + if (jcc_op == JCC_O || jcc_op == JCC_P) + goto slow_jcc; + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_ADDQ: + + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + case CC_OP_SHLQ: + if (jcc_op != JCC_Z && jcc_op != JCC_S) + goto slow_jcc; + break; + default: + slow_jcc: + return 0; + } + return 1; +} + +/* generate a conditional jump to label 'l1' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) +{ + int inv, jcc_op, size, cond; + TCGv t0; + + inv = b & 1; + jcc_op = (b >> 1) & 7; + + switch(cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + case CC_OP_SUBQ: + + size = cc_op - CC_OP_SUBB; + switch(jcc_op) { + case JCC_Z: + fast_jcc_z: + switch(size) { + case 0: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff); + t0 = cpu_tmp0; + break; + case 1: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff); + t0 = cpu_tmp0; + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff); + t0 = cpu_tmp0; + break; +#endif + default: + t0 = cpu_cc_dst; + break; + } + tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1); + break; + case JCC_S: + fast_jcc_s: + switch(size) { + case 0: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + 0, l1); + break; + case 1: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + 0, l1); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + 0, l1); + break; +#endif + default: + tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, + 0, l1); + break; + } + break; + + case JCC_B: + cond = inv ? TCG_COND_GEU : TCG_COND_LTU; + goto fast_jcc_b; + case JCC_BE: + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; + fast_jcc_b: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + switch(size) { + case 0: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xff); + break; + case 1: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff); + break; +#ifdef TARGET_X86_64 + case 2: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff); + break; +#endif + default: + t0 = cpu_cc_src; + break; + } + tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + break; + + case JCC_L: + cond = inv ? TCG_COND_GE : TCG_COND_LT; + goto fast_jcc_l; + case JCC_LE: + cond = inv ? TCG_COND_GT : TCG_COND_LE; + fast_jcc_l: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + switch(size) { + case 0: + t0 = cpu_tmp0; + tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext8s_tl(t0, cpu_cc_src); + break; + case 1: + t0 = cpu_tmp0; + tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext16s_tl(t0, cpu_cc_src); + break; +#ifdef TARGET_X86_64 + case 2: + t0 = cpu_tmp0; + tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext32s_tl(t0, cpu_cc_src); + break; +#endif + default: + t0 = cpu_cc_src; + break; + } + tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + break; + + default: + goto slow_jcc; + } + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_ADDQ: + + case CC_OP_ADCB: + case CC_OP_ADCW: + case CC_OP_ADCL: + case CC_OP_ADCQ: + + case CC_OP_SBBB: + case CC_OP_SBBW: + case CC_OP_SBBL: + case CC_OP_SBBQ: + + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + case CC_OP_SHLQ: + + case CC_OP_SARB: + case CC_OP_SARW: + case CC_OP_SARL: + case CC_OP_SARQ: + switch(jcc_op) { + case JCC_Z: + size = (cc_op - CC_OP_ADDB) & 3; + goto fast_jcc_z; + case JCC_S: + size = (cc_op - CC_OP_ADDB) & 3; + goto fast_jcc_s; + default: + goto slow_jcc; + } + break; + default: + slow_jcc: + gen_setcc_slow_T0(s, jcc_op); + tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, + cpu_T[0], 0, l1); + break; + } +} + +/* XXX: does not work with gdbstub "ice" single step - not a + serious problem */ +static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) +{ + int l1, l2; + + l1 = gen_new_label(); + l2 = gen_new_label(); + gen_op_jnz_ecx(s->aflag, l1); + gen_set_label(l2); + gen_jmp_tb(s, next_eip, 1); + gen_set_label(l1); + return l2; +} + +static inline void gen_stos(DisasContext *s, int ot) +{ + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + gen_string_movl_A0_EDI(s); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_lods(DisasContext *s, int ot) +{ + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + gen_op_mov_reg_T0(ot, R_EAX); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); +} + +static inline void gen_scas(DisasContext *s, int ot) +{ + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + gen_string_movl_A0_EDI(s); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_cmpl_T0_T1_cc(); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_cmps(DisasContext *s, int ot) +{ + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + gen_string_movl_A0_EDI(s); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_cmpl_T0_T1_cc(); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); + gen_op_add_reg_T0(s->aflag, R_EDI); +} + +static inline void gen_ins(DisasContext *s, int ot) +{ + if (use_icount) + gen_io_start(); + gen_string_movl_A0_EDI(s); + /* Note: we must do this dummy write first to be restartable in + case of page fault. */ + gen_op_movl_T0_0(); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_mov_TN_reg(OT_WORD, 1, R_EDX); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_EDI); + if (use_icount) + gen_io_end(); +} + +static inline void gen_outs(DisasContext *s, int ot) +{ + if (use_icount) + gen_io_start(); + gen_string_movl_A0_ESI(s); + gen_op_ld_T0_A0(ot + s->mem_index); + + gen_op_mov_TN_reg(OT_WORD, 1, R_EDX); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[1]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]); + gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); + + gen_op_movl_T0_Dshift(ot); + gen_op_add_reg_T0(s->aflag, R_ESI); + if (use_icount) + gen_io_end(); +} + +/* same method as Valgrind : we generate jumps to current or next + instruction */ +#define GEN_REPZ(op) \ +static inline void gen_repz_ ## op(DisasContext *s, int ot, \ + target_ulong cur_eip, target_ulong next_eip) \ +{ \ + int l2;\ + gen_update_cc_op(s); \ + l2 = gen_jz_ecx_string(s, next_eip); \ + gen_ ## op(s, ot); \ + gen_op_add_reg_im(s->aflag, R_ECX, -1); \ + /* a loop would cause two single step exceptions if ECX = 1 \ + before rep string_insn */ \ + if (!s->jmp_opt) \ + gen_op_jz_ecx(s->aflag, l2); \ + gen_jmp(s, cur_eip); \ +} + +#define GEN_REPZ2(op) \ +static inline void gen_repz_ ## op(DisasContext *s, int ot, \ + target_ulong cur_eip, \ + target_ulong next_eip, \ + int nz) \ +{ \ + int l2;\ + gen_update_cc_op(s); \ + l2 = gen_jz_ecx_string(s, next_eip); \ + gen_ ## op(s, ot); \ + gen_op_add_reg_im(s->aflag, R_ECX, -1); \ + gen_op_set_cc_op(CC_OP_SUBB + ot); \ + gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \ + if (!s->jmp_opt) \ + gen_op_jz_ecx(s->aflag, l2); \ + gen_jmp(s, cur_eip); \ +} + +GEN_REPZ(movs) +GEN_REPZ(stos) +GEN_REPZ(lods) +GEN_REPZ(ins) +GEN_REPZ(outs) +GEN_REPZ2(scas) +GEN_REPZ2(cmps) + +static void gen_helper_fp_arith_ST0_FT0(int op) +{ + switch (op) { + case 0: gen_helper_fadd_ST0_FT0(); break; + case 1: gen_helper_fmul_ST0_FT0(); break; + case 2: gen_helper_fcom_ST0_FT0(); break; + case 3: gen_helper_fcom_ST0_FT0(); break; + case 4: gen_helper_fsub_ST0_FT0(); break; + case 5: gen_helper_fsubr_ST0_FT0(); break; + case 6: gen_helper_fdiv_ST0_FT0(); break; + case 7: gen_helper_fdivr_ST0_FT0(); break; + } +} + +/* NOTE the exception in "r" op ordering */ +static void gen_helper_fp_arith_STN_ST0(int op, int opreg) +{ + TCGv_i32 tmp = tcg_const_i32(opreg); + switch (op) { + case 0: gen_helper_fadd_STN_ST0(tmp); break; + case 1: gen_helper_fmul_STN_ST0(tmp); break; + case 4: gen_helper_fsubr_STN_ST0(tmp); break; + case 5: gen_helper_fsub_STN_ST0(tmp); break; + case 6: gen_helper_fdivr_STN_ST0(tmp); break; + case 7: gen_helper_fdiv_STN_ST0(tmp); break; + } +} + +/* if d == OR_TMP0, it means memory operand (address in A0) */ +static void gen_op(DisasContext *s1, int op, int ot, int d) +{ + if (d != OR_TMP0) { + gen_op_mov_TN_reg(ot, 0, d); + } else { + gen_op_ld_T0_A0(ot + s1->mem_index); + } + switch(op) { + case OP_ADCL: + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + gen_compute_eflags_c(cpu_tmp4); + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); + tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); + tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot); + s1->cc_op = CC_OP_DYNAMIC; + break; + case OP_SBBL: + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + gen_compute_eflags_c(cpu_tmp4); + tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); + tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); + tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot); + s1->cc_op = CC_OP_DYNAMIC; + break; + case OP_ADDL: + gen_op_addl_T0_T1(); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update2_cc(); + s1->cc_op = CC_OP_ADDB + ot; + break; + case OP_SUBL: + tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update2_cc(); + s1->cc_op = CC_OP_SUBB + ot; + break; + default: + case OP_ANDL: + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update1_cc(); + s1->cc_op = CC_OP_LOGICB + ot; + break; + case OP_ORL: + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update1_cc(); + s1->cc_op = CC_OP_LOGICB + ot; + break; + case OP_XORL: + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_op_update1_cc(); + s1->cc_op = CC_OP_LOGICB + ot; + break; + case OP_CMPL: + gen_op_cmpl_T0_T1_cc(); + s1->cc_op = CC_OP_SUBB + ot; + break; + } +} + +/* if d == OR_TMP0, it means memory operand (address in A0) */ +static void gen_inc(DisasContext *s1, int ot, int d, int c) +{ + if (d != OR_TMP0) + gen_op_mov_TN_reg(ot, 0, d); + else + gen_op_ld_T0_A0(ot + s1->mem_index); + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + if (c > 0) { + tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); + s1->cc_op = CC_OP_INCB + ot; + } else { + tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1); + s1->cc_op = CC_OP_DECB + ot; + } + if (d != OR_TMP0) + gen_op_mov_reg_T0(ot, d); + else + gen_op_st_T0_A0(ot + s1->mem_index); + gen_compute_eflags_c(cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + +static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, + int is_right, int is_arith) +{ + target_ulong mask; + int shift_label; + TCGv t0, t1; + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) + gen_op_ld_T0_A0(ot + s->mem_index); + else + gen_op_mov_TN_reg(ot, 0, op1); + + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask); + + tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1); + + if (is_right) { + if (is_arith) { + gen_exts(ot, cpu_T[0]); + tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5); + tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else { + gen_extu(ot, cpu_T[0]); + tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + } else { + tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5); + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + + /* store */ + if (op1 == OR_TMP0) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, op1); + + /* update eflags if non zero shift */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + /* XXX: inefficient */ + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + + tcg_gen_mov_tl(t0, cpu_T[0]); + tcg_gen_mov_tl(t1, cpu_T3); + + shift_label = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label); + + tcg_gen_mov_tl(cpu_cc_src, t1); + tcg_gen_mov_tl(cpu_cc_dst, t0); + if (is_right) + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); + else + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); + + gen_set_label(shift_label); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, + int is_right, int is_arith) +{ + int mask; + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) + gen_op_ld_T0_A0(ot + s->mem_index); + else + gen_op_mov_TN_reg(ot, 0, op1); + + op2 &= mask; + if (op2 != 0) { + if (is_right) { + if (is_arith) { + gen_exts(ot, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2); + } else { + gen_extu(ot, cpu_T[0]); + tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2); + } + } else { + tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1); + tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2); + } + } + + /* store */ + if (op1 == OR_TMP0) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, op1); + + /* update eflags if non zero shift */ + if (op2 != 0) { + tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + if (is_right) + s->cc_op = CC_OP_SARB + ot; + else + s->cc_op = CC_OP_SHLB + ot; + } +} + +static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2) +{ + if (arg2 >= 0) + tcg_gen_shli_tl(ret, arg1, arg2); + else + tcg_gen_shri_tl(ret, arg1, -arg2); +} + +static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, + int is_right) +{ + target_ulong mask; + int label1, label2, data_bits; + TCGv t0, t1, t2, a0; + + /* XXX: inefficient, but we must use local temps */ + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) { + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_v_reg(ot, t0, op1); + } + + tcg_gen_mov_tl(t1, cpu_T[1]); + + tcg_gen_andi_tl(t1, t1, mask); + + /* Must test zero case to avoid using undefined behaviour in TCG + shifts. */ + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1); + + if (ot <= OT_WORD) + tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1); + else + tcg_gen_mov_tl(cpu_tmp0, t1); + + gen_extu(ot, t0); + tcg_gen_mov_tl(t2, t0); + + data_bits = 8 << ot; + /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX: + fix TCG definition) */ + if (is_right) { + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0); + tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); + tcg_gen_shl_tl(t0, t0, cpu_tmp0); + } else { + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0); + tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); + tcg_gen_shr_tl(t0, t0, cpu_tmp0); + } + tcg_gen_or_tl(t0, t0, cpu_tmp4); + + gen_set_label(label1); + /* store */ + if (op1 == OR_TMP0) { + gen_op_st_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_reg_v(ot, op1, t0); + } + + /* update eflags */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + label2 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2); + + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); + tcg_gen_xor_tl(cpu_tmp0, t2, t0); + tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + if (is_right) { + tcg_gen_shri_tl(t0, t0, data_bits - 1); + } + tcg_gen_andi_tl(t0, t0, CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); + + tcg_gen_discard_tl(cpu_cc_dst); + tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); + + gen_set_label(label2); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(a0); +} + +static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, + int is_right) +{ + int mask; + int data_bits; + TCGv t0, t1, a0; + + /* XXX: inefficient, but we must use local temps */ + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) { + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_v_reg(ot, t0, op1); + } + + gen_extu(ot, t0); + tcg_gen_mov_tl(t1, t0); + + op2 &= mask; + data_bits = 8 << ot; + if (op2 != 0) { + int shift = op2 & ((1 << (3 + ot)) - 1); + if (is_right) { + tcg_gen_shri_tl(cpu_tmp4, t0, shift); + tcg_gen_shli_tl(t0, t0, data_bits - shift); + } + else { + tcg_gen_shli_tl(cpu_tmp4, t0, shift); + tcg_gen_shri_tl(t0, t0, data_bits - shift); + } + tcg_gen_or_tl(t0, t0, cpu_tmp4); + } + + /* store */ + if (op1 == OR_TMP0) { + gen_op_st_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_reg_v(ot, op1, t0); + } + + if (op2 != 0) { + /* update eflags */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); + tcg_gen_xor_tl(cpu_tmp0, t1, t0); + tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + if (is_right) { + tcg_gen_shri_tl(t0, t0, data_bits - 1); + } + tcg_gen_andi_tl(t0, t0, CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); + + tcg_gen_discard_tl(cpu_cc_dst); + tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); + s->cc_op = CC_OP_EFLAGS; + } + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(a0); +} + +/* XXX: add faster immediate = 1 case */ +static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, + int is_right) +{ + int label1; + + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + /* load */ + if (op1 == OR_TMP0) + gen_op_ld_T0_A0(ot + s->mem_index); + else + gen_op_mov_TN_reg(ot, 0, op1); + + if (is_right) { + switch (ot) { + case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#ifdef TARGET_X86_64 + case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#endif + } + } else { + switch (ot) { + case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break; + case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#ifdef TARGET_X86_64 + case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break; +#endif + } + } + /* store */ + if (op1 == OR_TMP0) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, op1); + + /* update eflags */ + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1); + + tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp); + tcg_gen_discard_tl(cpu_cc_dst); + tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); + + gen_set_label(label1); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ +} + +/* XXX: add faster immediate case */ +static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, + int is_right) +{ + int label1, label2, data_bits; + target_ulong mask; + TCGv t0, t1, t2, a0; + + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + + if (ot == OT_QUAD) + mask = 0x3f; + else + mask = 0x1f; + + /* load */ + if (op1 == OR_TMP0) { + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_v_reg(ot, t0, op1); + } + + tcg_gen_andi_tl(cpu_T3, cpu_T3, mask); + + tcg_gen_mov_tl(t1, cpu_T[1]); + tcg_gen_mov_tl(t2, cpu_T3); + + /* Must test zero case to avoid using undefined behaviour in TCG + shifts. */ + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); + + tcg_gen_addi_tl(cpu_tmp5, t2, -1); + if (ot == OT_WORD) { + /* Note: we implement the Intel behaviour for shift count > 16 */ + if (is_right) { + tcg_gen_andi_tl(t0, t0, 0xffff); + tcg_gen_shli_tl(cpu_tmp0, t1, 16); + tcg_gen_or_tl(t0, t0, cpu_tmp0); + tcg_gen_ext32u_tl(t0, t0); + + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); + + /* only needed if count > 16, but a test would complicate */ + tcg_gen_subfi_tl(cpu_tmp5, 32, t2); + tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5); + + tcg_gen_shr_tl(t0, t0, t2); + + tcg_gen_or_tl(t0, t0, cpu_tmp0); + } else { + /* XXX: not optimal */ + tcg_gen_andi_tl(t0, t0, 0xffff); + tcg_gen_shli_tl(t1, t1, 16); + tcg_gen_or_tl(t1, t1, t0); + tcg_gen_ext32u_tl(t1, t1); + + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); + tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5); + tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0); + tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5); + + tcg_gen_shl_tl(t0, t0, t2); + tcg_gen_subfi_tl(cpu_tmp5, 32, t2); + tcg_gen_shr_tl(t1, t1, cpu_tmp5); + tcg_gen_or_tl(t0, t0, t1); + } + } else { + data_bits = 8 << ot; + if (is_right) { + if (ot == OT_LONG) + tcg_gen_ext32u_tl(t0, t0); + + tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); + + tcg_gen_shr_tl(t0, t0, t2); + tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); + tcg_gen_shl_tl(t1, t1, cpu_tmp5); + tcg_gen_or_tl(t0, t0, t1); + + } else { + if (ot == OT_LONG) + tcg_gen_ext32u_tl(t1, t1); + + tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); + + tcg_gen_shl_tl(t0, t0, t2); + tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); + tcg_gen_shr_tl(t1, t1, cpu_tmp5); + tcg_gen_or_tl(t0, t0, t1); + } + } + tcg_gen_mov_tl(t1, cpu_tmp4); + + gen_set_label(label1); + /* store */ + if (op1 == OR_TMP0) { + gen_op_st_v(ot + s->mem_index, t0, a0); + } else { + gen_op_mov_reg_v(ot, op1, t0); + } + + /* update eflags */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + + label2 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2); + + tcg_gen_mov_tl(cpu_cc_src, t1); + tcg_gen_mov_tl(cpu_cc_dst, t0); + if (is_right) { + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); + } else { + tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); + } + gen_set_label(label2); + s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(a0); +} + +static void gen_shift(DisasContext *s1, int op, int ot, int d, int s) +{ + if (s != OR_TMP1) + gen_op_mov_TN_reg(ot, 1, s); + switch(op) { + case OP_ROL: + gen_rot_rm_T1(s1, ot, d, 0); + break; + case OP_ROR: + gen_rot_rm_T1(s1, ot, d, 1); + break; + case OP_SHL: + case OP_SHL1: + gen_shift_rm_T1(s1, ot, d, 0, 0); + break; + case OP_SHR: + gen_shift_rm_T1(s1, ot, d, 1, 0); + break; + case OP_SAR: + gen_shift_rm_T1(s1, ot, d, 1, 1); + break; + case OP_RCL: + gen_rotc_rm_T1(s1, ot, d, 0); + break; + case OP_RCR: + gen_rotc_rm_T1(s1, ot, d, 1); + break; + } +} + +static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c) +{ + switch(op) { + case OP_ROL: + gen_rot_rm_im(s1, ot, d, c, 0); + break; + case OP_ROR: + gen_rot_rm_im(s1, ot, d, c, 1); + break; + case OP_SHL: + case OP_SHL1: + gen_shift_rm_im(s1, ot, d, c, 0, 0); + break; + case OP_SHR: + gen_shift_rm_im(s1, ot, d, c, 1, 0); + break; + case OP_SAR: + gen_shift_rm_im(s1, ot, d, c, 1, 1); + break; + default: + /* currently not optimized */ + gen_op_movl_T1_im(c); + gen_shift(s1, op, ot, d, OR_TMP1); + break; + } +} + +static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr) +{ + target_long disp; + int havesib; + int base; + int index; + int scale; + int opreg; + int mod, rm, code, override, must_add_seg; + + override = s->override; + must_add_seg = s->addseg; + if (override >= 0) + must_add_seg = 1; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + + if (s->aflag) { + + havesib = 0; + base = rm; + index = 0; + scale = 0; + + if (base == 4) { + havesib = 1; + code = ldub_code(s->pc++); + scale = (code >> 6) & 3; + index = ((code >> 3) & 7) | REX_X(s); + base = (code & 7); + } + base |= REX_B(s); + + switch (mod) { + case 0: + if ((base & 7) == 5) { + base = -1; + disp = (int32_t)ldl_code(s->pc); + s->pc += 4; + if (CODE64(s) && !havesib) { + disp += s->pc + s->rip_offset; + } + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)ldub_code(s->pc++); + break; + default: + case 2: +#ifdef VBOX + disp = (int32_t)ldl_code(s->pc); +#else + disp = ldl_code(s->pc); +#endif + s->pc += 4; + break; + } + + if (base >= 0) { + /* for correct popl handling with esp */ + if (base == 4 && s->popl_esp_hack) + disp += s->popl_esp_hack; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(base); + if (disp != 0) { + gen_op_addq_A0_im(disp); + } + } else +#endif + { + gen_op_movl_A0_reg(base); + if (disp != 0) + gen_op_addl_A0_im(disp); + } + } else { +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_im(disp); + } else +#endif + { + gen_op_movl_A0_im(disp); + } + } + /* index == 4 means no index */ + if (havesib && (index != 4)) { +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_addq_A0_reg_sN(scale, index); + } else +#endif + { + gen_op_addl_A0_reg_sN(scale, index); + } + } + if (must_add_seg) { + if (override < 0) { + if (base == R_EBP || base == R_ESP) + override = R_SS; + else + override = R_DS; + } +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_addq_A0_seg(override); + } else +#endif + { + gen_op_addl_A0_seg(override); + } + } + } else { + switch (mod) { + case 0: + if (rm == 6) { + disp = lduw_code(s->pc); + s->pc += 2; + gen_op_movl_A0_im(disp); + rm = 0; /* avoid SS override */ + goto no_rm; + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)ldub_code(s->pc++); + break; + default: + case 2: + disp = lduw_code(s->pc); + s->pc += 2; + break; + } + switch(rm) { + case 0: + gen_op_movl_A0_reg(R_EBX); + gen_op_addl_A0_reg_sN(0, R_ESI); + break; + case 1: + gen_op_movl_A0_reg(R_EBX); + gen_op_addl_A0_reg_sN(0, R_EDI); + break; + case 2: + gen_op_movl_A0_reg(R_EBP); + gen_op_addl_A0_reg_sN(0, R_ESI); + break; + case 3: + gen_op_movl_A0_reg(R_EBP); + gen_op_addl_A0_reg_sN(0, R_EDI); + break; + case 4: + gen_op_movl_A0_reg(R_ESI); + break; + case 5: + gen_op_movl_A0_reg(R_EDI); + break; + case 6: + gen_op_movl_A0_reg(R_EBP); + break; + default: + case 7: + gen_op_movl_A0_reg(R_EBX); + break; + } + if (disp != 0) + gen_op_addl_A0_im(disp); + gen_op_andl_A0_ffff(); + no_rm: + if (must_add_seg) { + if (override < 0) { + if (rm == 2 || rm == 3 || rm == 6) + override = R_SS; + else + override = R_DS; + } + gen_op_addl_A0_seg(override); + } + } + + opreg = OR_A0; + disp = 0; + *reg_ptr = opreg; + *offset_ptr = disp; +} + +static void gen_nop_modrm(DisasContext *s, int modrm) +{ + int mod, rm, base, code; + + mod = (modrm >> 6) & 3; + if (mod == 3) + return; + rm = modrm & 7; + + if (s->aflag) { + + base = rm; + + if (base == 4) { + code = ldub_code(s->pc++); + base = (code & 7); + } + + switch (mod) { + case 0: + if (base == 5) { + s->pc += 4; + } + break; + case 1: + s->pc++; + break; + default: + case 2: + s->pc += 4; + break; + } + } else { + switch (mod) { + case 0: + if (rm == 6) { + s->pc += 2; + } + break; + case 1: + s->pc++; + break; + default: + case 2: + s->pc += 2; + break; + } + } +} + +/* used for LEA and MOV AX, mem */ +static void gen_add_A0_ds_seg(DisasContext *s) +{ + int override, must_add_seg; + must_add_seg = s->addseg; + override = R_DS; + if (s->override >= 0) { + override = s->override; + must_add_seg = 1; + } + if (must_add_seg) { +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_addq_A0_seg(override); + } else +#endif + { + gen_op_addl_A0_seg(override); + } + } +} + +/* generate modrm memory load or store of 'reg'. TMP0 is used if reg == + OR_TMP0 */ +static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store) +{ + int mod, rm, opreg, disp; + + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod == 3) { + if (is_store) { + if (reg != OR_TMP0) + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_mov_reg_T0(ot, rm); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + if (reg != OR_TMP0) + gen_op_mov_reg_T0(ot, reg); + } + } else { + gen_lea_modrm(s, modrm, &opreg, &disp); + if (is_store) { + if (reg != OR_TMP0) + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_st_T0_A0(ot + s->mem_index); + } else { + gen_op_ld_T0_A0(ot + s->mem_index); + if (reg != OR_TMP0) + gen_op_mov_reg_T0(ot, reg); + } + } +} + +static inline uint32_t insn_get(DisasContext *s, int ot) +{ + uint32_t ret; + + switch(ot) { + case OT_BYTE: + ret = ldub_code(s->pc); + s->pc++; + break; + case OT_WORD: + ret = lduw_code(s->pc); + s->pc += 2; + break; + default: + case OT_LONG: + ret = ldl_code(s->pc); + s->pc += 4; + break; + } + return ret; +} + +static inline int insn_const_size(unsigned int ot) +{ + if (ot <= OT_LONG) + return 1 << ot; + else + return 4; +} + +static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) +{ + TranslationBlock *tb; + target_ulong pc; + + pc = s->cs_base + eip; + tb = s->tb; + /* NOTE: we handle the case where the TB spans two pages here */ + if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) || + (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) { +#ifdef VBOX + gen_check_external_event(); +#endif /* VBOX */ + /* jump to same page: we can use a direct jump */ + tcg_gen_goto_tb(tb_num); + gen_jmp_im(eip); + tcg_gen_exit_tb((intptr_t)tb + tb_num); + } else { + /* jump to another page: currently not optimized */ + gen_jmp_im(eip); + gen_eob(s); + } +} + +static inline void gen_jcc(DisasContext *s, int b, + target_ulong val, target_ulong next_eip) +{ + int l1, l2, cc_op; + + cc_op = s->cc_op; + gen_update_cc_op(s); + if (s->jmp_opt) { + l1 = gen_new_label(); + gen_jcc1(s, cc_op, b, l1); + + gen_goto_tb(s, 0, next_eip); + + gen_set_label(l1); + gen_goto_tb(s, 1, val); + s->is_jmp = DISAS_TB_JUMP; + } else { + + l1 = gen_new_label(); + l2 = gen_new_label(); + gen_jcc1(s, cc_op, b, l1); + + gen_jmp_im(next_eip); + tcg_gen_br(l2); + + gen_set_label(l1); + gen_jmp_im(val); + gen_set_label(l2); + gen_eob(s); + } +} + +static void gen_setcc(DisasContext *s, int b) +{ + int inv, jcc_op, l1; + TCGv t0; + + if (is_fast_jcc_case(s, b)) { + /* nominal case: we use a jump */ + /* XXX: make it faster by adding new instructions in TCG */ + t0 = tcg_temp_local_new(); + tcg_gen_movi_tl(t0, 0); + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + tcg_gen_movi_tl(t0, 1); + gen_set_label(l1); + tcg_gen_mov_tl(cpu_T[0], t0); + tcg_temp_free(t0); + } else { + /* slow case: it is more efficient not to generate a jump, + although it is questionnable whether this optimization is + worth to */ + inv = b & 1; + jcc_op = (b >> 1) & 7; + gen_setcc_slow_T0(s, jcc_op); + if (inv) { + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); + } + } +} + +static inline void gen_op_movl_T0_seg(int seg_reg) +{ + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[seg_reg].selector)); +} + +static inline void gen_op_movl_seg_T0_vm(int seg_reg) +{ + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff); + tcg_gen_st32_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[seg_reg].selector)); + tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4); + tcg_gen_st_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[seg_reg].base)); +} + +/* move T0 to seg_reg and compute if the CPU state may change. Never + call this function with seg_reg == R_CS */ +static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip) +{ + if (s->pe && !s->vm86) { + /* XXX: optimize by finding processor state dynamically */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32); + /* abort translation because the addseg value may change or + because ss32 may change. For R_SS, translation must always + stop as a special handling must be done to disable hardware + interrupts for the next instruction */ + if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) + s->is_jmp = DISAS_TB_JUMP; + } else { + gen_op_movl_seg_T0_vm(seg_reg); + if (seg_reg == R_SS) + s->is_jmp = DISAS_TB_JUMP; + } +} + +static inline int svm_is_rep(int prefixes) +{ + return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0); +} + +static inline void +gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start, + uint32_t type, uint64_t param) +{ + /* no SVM activated; fast case */ + if (likely(!(s->flags & HF_SVMI_MASK))) + return; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_svm_check_intercept_param(tcg_const_i32(type), + tcg_const_i64(param)); +} + +static inline void +gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type) +{ + gen_svm_check_intercept_param(s, pc_start, type, 0); +} + +static inline void gen_stack_update(DisasContext *s, int addend) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_add_reg_im(2, R_ESP, addend); + } else +#endif + if (s->ss32) { + gen_op_add_reg_im(1, R_ESP, addend); + } else { + gen_op_add_reg_im(0, R_ESP, addend); + } +} + +/* generate a push. It depends on ss32, addseg and dflag */ +static void gen_push_T0(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_movq_A0_reg(R_ESP); + if (s->dflag) { + gen_op_addq_A0_im(-8); + gen_op_st_T0_A0(OT_QUAD + s->mem_index); + } else { + gen_op_addq_A0_im(-2); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + } + gen_op_mov_reg_A0(2, R_ESP); + } else +#endif + { + gen_op_movl_A0_reg(R_ESP); + if (!s->dflag) + gen_op_addl_A0_im(-2); + else + gen_op_addl_A0_im(-4); + if (s->ss32) { + if (s->addseg) { + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + gen_op_addl_A0_seg(R_SS); + } + } else { + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + gen_op_addl_A0_seg(R_SS); + } + gen_op_st_T0_A0(s->dflag + 1 + s->mem_index); + if (s->ss32 && !s->addseg) + gen_op_mov_reg_A0(1, R_ESP); + else + gen_op_mov_reg_T1(s->ss32 + 1, R_ESP); + } +} + +/* generate a push. It depends on ss32, addseg and dflag */ +/* slower version for T1, only used for call Ev */ +static void gen_push_T1(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_movq_A0_reg(R_ESP); + if (s->dflag) { + gen_op_addq_A0_im(-8); + gen_op_st_T1_A0(OT_QUAD + s->mem_index); + } else { + gen_op_addq_A0_im(-2); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + } + gen_op_mov_reg_A0(2, R_ESP); + } else +#endif + { + gen_op_movl_A0_reg(R_ESP); + if (!s->dflag) + gen_op_addl_A0_im(-2); + else + gen_op_addl_A0_im(-4); + if (s->ss32) { + if (s->addseg) { + gen_op_addl_A0_seg(R_SS); + } + } else { + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(R_SS); + } + gen_op_st_T1_A0(s->dflag + 1 + s->mem_index); + + if (s->ss32 && !s->addseg) + gen_op_mov_reg_A0(1, R_ESP); + else + gen_stack_update(s, -(2 << s->dflag)); + } +} + +/* two step pop is necessary for precise exceptions */ +static void gen_pop_T0(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_op_movq_A0_reg(R_ESP); + gen_op_ld_T0_A0((s->dflag ? OT_QUAD : OT_WORD) + s->mem_index); + } else +#endif + { + gen_op_movl_A0_reg(R_ESP); + if (s->ss32) { + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + } else { + gen_op_andl_A0_ffff(); + gen_op_addl_A0_seg(R_SS); + } + gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index); + } +} + +static void gen_pop_update(DisasContext *s) +{ +#ifdef TARGET_X86_64 + if (CODE64(s) && s->dflag) { + gen_stack_update(s, 8); + } else +#endif + { + gen_stack_update(s, 2 << s->dflag); + } +} + +static void gen_stack_A0(DisasContext *s) +{ + gen_op_movl_A0_reg(R_ESP); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); +} + +/* NOTE: wrap around in 16 bit not fully handled */ +static void gen_pusha(DisasContext *s) +{ + int i; + gen_op_movl_A0_reg(R_ESP); + gen_op_addl_A0_im(-(16 << s->dflag)); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + for(i = 0;i < 8; i++) { + gen_op_mov_TN_reg(OT_LONG, 0, 7 - i); + gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index); + gen_op_addl_A0_im(2 << s->dflag); + } + gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP); +} + +/* NOTE: wrap around in 16 bit not fully handled */ +static void gen_popa(DisasContext *s) +{ + int i; + gen_op_movl_A0_reg(R_ESP); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 << s->dflag); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + for(i = 0;i < 8; i++) { + /* ESP is not reloaded */ + if (i != 3) { + gen_op_ld_T0_A0(OT_WORD + s->dflag + s->mem_index); + gen_op_mov_reg_T0(OT_WORD + s->dflag, 7 - i); + } + gen_op_addl_A0_im(2 << s->dflag); + } + gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP); +} + +static void gen_enter(DisasContext *s, int esp_addend, int level) +{ + int ot, opsize; + + level &= 0x1f; +#ifdef TARGET_X86_64 + if (CODE64(s)) { + ot = s->dflag ? OT_QUAD : OT_WORD; + opsize = 1 << ot; + + gen_op_movl_A0_reg(R_ESP); + gen_op_addq_A0_im(-opsize); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + + /* push bp */ + gen_op_mov_TN_reg(OT_LONG, 0, R_EBP); + gen_op_st_T0_A0(ot + s->mem_index); + if (level) { + /* XXX: must save state */ + gen_helper_enter64_level(tcg_const_i32(level), + tcg_const_i32((ot == OT_QUAD)), + cpu_T[1]); + } + gen_op_mov_reg_T1(ot, R_EBP); + tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level)); + gen_op_mov_reg_T1(OT_QUAD, R_ESP); + } else +#endif + { + ot = s->dflag + OT_WORD; + opsize = 2 << s->dflag; + + gen_op_movl_A0_reg(R_ESP); + gen_op_addl_A0_im(-opsize); + if (!s->ss32) + gen_op_andl_A0_ffff(); + tcg_gen_mov_tl(cpu_T[1], cpu_A0); + if (s->addseg) + gen_op_addl_A0_seg(R_SS); + /* push bp */ + gen_op_mov_TN_reg(OT_LONG, 0, R_EBP); + gen_op_st_T0_A0(ot + s->mem_index); + if (level) { + /* XXX: must save state */ + gen_helper_enter_level(tcg_const_i32(level), + tcg_const_i32(s->dflag), + cpu_T[1]); + } + gen_op_mov_reg_T1(ot, R_EBP); + tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level)); + gen_op_mov_reg_T1(OT_WORD + s->ss32, R_ESP); + } +} + +static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + gen_helper_raise_exception(tcg_const_i32(trapno)); + s->is_jmp = DISAS_TB_JUMP; +} + +/* an interrupt is different from an exception because of the + privilege checks */ +static void gen_interrupt(DisasContext *s, int intno, + target_ulong cur_eip, target_ulong next_eip) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + gen_helper_raise_interrupt(tcg_const_i32(intno), + tcg_const_i32(next_eip - cur_eip)); + s->is_jmp = DISAS_TB_JUMP; +} + +static void gen_debug(DisasContext *s, target_ulong cur_eip) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(cur_eip); + gen_helper_debug(); + s->is_jmp = DISAS_TB_JUMP; +} + +/* generate a generic end of block. Trace exception is also generated + if needed */ +static void gen_eob(DisasContext *s) +{ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + if (s->tb->flags & HF_INHIBIT_IRQ_MASK) { + gen_helper_reset_inhibit_irq(); + } + if (s->tb->flags & HF_RF_MASK) { + gen_helper_reset_rf(); + } + if ( s->singlestep_enabled +#ifdef VBOX + && ( !(cpu_single_env->state & CPU_EMULATE_SINGLE_STEP) + || !(s->prefix & (PREFIX_REPNZ | PREFIX_REPZ) )) +#endif + ) { + gen_helper_debug(); + } else if (s->tf) { + gen_helper_single_step(); + } else { + tcg_gen_exit_tb(0); + } + s->is_jmp = DISAS_TB_JUMP; +} + +/* generate a jump to eip. No segment change must happen before as a + direct call to the next block may occur */ +static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) +{ + if (s->jmp_opt) { + gen_update_cc_op(s); + gen_goto_tb(s, tb_num, eip); + s->is_jmp = DISAS_TB_JUMP; + } else { + gen_jmp_im(eip); + gen_eob(s); + } +} + +static void gen_jmp(DisasContext *s, target_ulong eip) +{ + gen_jmp_tb(s, eip, 0); +} + +static inline void gen_ldq_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset); +} + +static inline void gen_stq_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index); +} + +static inline void gen_ldo_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, mem_index); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_tmp0, mem_index); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); +} + +static inline void gen_sto_env_A0(int idx, int offset) +{ + int mem_index = (idx >> 2) - 1; + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, mem_index); + tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_tmp0, mem_index); +} + +static inline void gen_op_movo(int d_offset, int s_offset) +{ + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8); +} + +static inline void gen_op_movq(int d_offset, int s_offset) +{ + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); +} + +static inline void gen_op_movl(int d_offset, int s_offset) +{ + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset); +} + +static inline void gen_op_movq_env_0(int d_offset) +{ + tcg_gen_movi_i64(cpu_tmp1_i64, 0); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset); +} + +#define SSE_SPECIAL ((void *)1) +#define SSE_DUMMY ((void *)2) + +#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm } +#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \ + gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, } + +static void *sse_op_table1[256][4] = { + /* 3DNow! extensions */ + [0x0e] = { SSE_DUMMY }, /* femms */ + [0x0f] = { SSE_DUMMY }, /* pf... */ + /* pure SSE operations */ + [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ + [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ + [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */ + [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ + [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm }, + [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm }, + [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */ + [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */ + + [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ + [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ + [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */ + [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */ + [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */ + [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ + [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd }, + [0x2f] = { gen_helper_comiss, gen_helper_comisd }, + [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */ + [0x51] = SSE_FOP(sqrt), + [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL }, + [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL }, + [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */ + [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */ + [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */ + [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */ + [0x58] = SSE_FOP(add), + [0x59] = SSE_FOP(mul), + [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps, + gen_helper_cvtss2sd, gen_helper_cvtsd2ss }, + [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq }, + [0x5c] = SSE_FOP(sub), + [0x5d] = SSE_FOP(min), + [0x5e] = SSE_FOP(div), + [0x5f] = SSE_FOP(max), + + [0xc2] = SSE_FOP(cmpeq), + [0xc6] = { gen_helper_shufps, gen_helper_shufpd }, + + [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */ + [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */ + + /* MMX ops and their SSE extensions */ + [0x60] = MMX_OP2(punpcklbw), + [0x61] = MMX_OP2(punpcklwd), + [0x62] = MMX_OP2(punpckldq), + [0x63] = MMX_OP2(packsswb), + [0x64] = MMX_OP2(pcmpgtb), + [0x65] = MMX_OP2(pcmpgtw), + [0x66] = MMX_OP2(pcmpgtl), + [0x67] = MMX_OP2(packuswb), + [0x68] = MMX_OP2(punpckhbw), + [0x69] = MMX_OP2(punpckhwd), + [0x6a] = MMX_OP2(punpckhdq), + [0x6b] = MMX_OP2(packssdw), + [0x6c] = { NULL, gen_helper_punpcklqdq_xmm }, + [0x6d] = { NULL, gen_helper_punpckhqdq_xmm }, + [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */ + [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */ + [0x70] = { gen_helper_pshufw_mmx, + gen_helper_pshufd_xmm, + gen_helper_pshufhw_xmm, + gen_helper_pshuflw_xmm }, + [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */ + [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */ + [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */ + [0x74] = MMX_OP2(pcmpeqb), + [0x75] = MMX_OP2(pcmpeqw), + [0x76] = MMX_OP2(pcmpeql), + [0x77] = { SSE_DUMMY }, /* emms */ + [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */ + [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r }, + [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps }, + [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps }, + [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ + [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */ + [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */ + [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */ + [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps }, + [0xd1] = MMX_OP2(psrlw), + [0xd2] = MMX_OP2(psrld), + [0xd3] = MMX_OP2(psrlq), + [0xd4] = MMX_OP2(paddq), + [0xd5] = MMX_OP2(pmullw), + [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, + [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */ + [0xd8] = MMX_OP2(psubusb), + [0xd9] = MMX_OP2(psubusw), + [0xda] = MMX_OP2(pminub), + [0xdb] = MMX_OP2(pand), + [0xdc] = MMX_OP2(paddusb), + [0xdd] = MMX_OP2(paddusw), + [0xde] = MMX_OP2(pmaxub), + [0xdf] = MMX_OP2(pandn), + [0xe0] = MMX_OP2(pavgb), + [0xe1] = MMX_OP2(psraw), + [0xe2] = MMX_OP2(psrad), + [0xe3] = MMX_OP2(pavgw), + [0xe4] = MMX_OP2(pmulhuw), + [0xe5] = MMX_OP2(pmulhw), + [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq }, + [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ + [0xe8] = MMX_OP2(psubsb), + [0xe9] = MMX_OP2(psubsw), + [0xea] = MMX_OP2(pminsw), + [0xeb] = MMX_OP2(por), + [0xec] = MMX_OP2(paddsb), + [0xed] = MMX_OP2(paddsw), + [0xee] = MMX_OP2(pmaxsw), + [0xef] = MMX_OP2(pxor), + [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */ + [0xf1] = MMX_OP2(psllw), + [0xf2] = MMX_OP2(pslld), + [0xf3] = MMX_OP2(psllq), + [0xf4] = MMX_OP2(pmuludq), + [0xf5] = MMX_OP2(pmaddwd), + [0xf6] = MMX_OP2(psadbw), + [0xf7] = MMX_OP2(maskmov), + [0xf8] = MMX_OP2(psubb), + [0xf9] = MMX_OP2(psubw), + [0xfa] = MMX_OP2(psubl), + [0xfb] = MMX_OP2(psubq), + [0xfc] = MMX_OP2(paddb), + [0xfd] = MMX_OP2(paddw), + [0xfe] = MMX_OP2(paddl), +}; + +static void *sse_op_table2[3 * 8][2] = { + [0 + 2] = MMX_OP2(psrlw), + [0 + 4] = MMX_OP2(psraw), + [0 + 6] = MMX_OP2(psllw), + [8 + 2] = MMX_OP2(psrld), + [8 + 4] = MMX_OP2(psrad), + [8 + 6] = MMX_OP2(pslld), + [16 + 2] = MMX_OP2(psrlq), + [16 + 3] = { NULL, gen_helper_psrldq_xmm }, + [16 + 6] = MMX_OP2(psllq), + [16 + 7] = { NULL, gen_helper_pslldq_xmm }, +}; + +static void *sse_op_table3[4 * 3] = { + gen_helper_cvtsi2ss, + gen_helper_cvtsi2sd, + X86_64_ONLY(gen_helper_cvtsq2ss), + X86_64_ONLY(gen_helper_cvtsq2sd), + + gen_helper_cvttss2si, + gen_helper_cvttsd2si, + X86_64_ONLY(gen_helper_cvttss2sq), + X86_64_ONLY(gen_helper_cvttsd2sq), + + gen_helper_cvtss2si, + gen_helper_cvtsd2si, + X86_64_ONLY(gen_helper_cvtss2sq), + X86_64_ONLY(gen_helper_cvtsd2sq), +}; + +static void *sse_op_table4[8][4] = { + SSE_FOP(cmpeq), + SSE_FOP(cmplt), + SSE_FOP(cmple), + SSE_FOP(cmpunord), + SSE_FOP(cmpneq), + SSE_FOP(cmpnlt), + SSE_FOP(cmpnle), + SSE_FOP(cmpord), +}; + +static void *sse_op_table5[256] = { + [0x0c] = gen_helper_pi2fw, + [0x0d] = gen_helper_pi2fd, + [0x1c] = gen_helper_pf2iw, + [0x1d] = gen_helper_pf2id, + [0x8a] = gen_helper_pfnacc, + [0x8e] = gen_helper_pfpnacc, + [0x90] = gen_helper_pfcmpge, + [0x94] = gen_helper_pfmin, + [0x96] = gen_helper_pfrcp, + [0x97] = gen_helper_pfrsqrt, + [0x9a] = gen_helper_pfsub, + [0x9e] = gen_helper_pfadd, + [0xa0] = gen_helper_pfcmpgt, + [0xa4] = gen_helper_pfmax, + [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */ + [0xa7] = gen_helper_movq, /* pfrsqit1 */ + [0xaa] = gen_helper_pfsubr, + [0xae] = gen_helper_pfacc, + [0xb0] = gen_helper_pfcmpeq, + [0xb4] = gen_helper_pfmul, + [0xb6] = gen_helper_movq, /* pfrcpit2 */ + [0xb7] = gen_helper_pmulhrw_mmx, + [0xbb] = gen_helper_pswapd, + [0xbf] = gen_helper_pavgb_mmx /* pavgusb */ +}; + +struct sse_op_helper_s { + void *op[2]; uint32_t ext_mask; +}; +#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 } +#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 } +#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 } +#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 } +static struct sse_op_helper_s sse_op_table6[256] = { + [0x00] = SSSE3_OP(pshufb), + [0x01] = SSSE3_OP(phaddw), + [0x02] = SSSE3_OP(phaddd), + [0x03] = SSSE3_OP(phaddsw), + [0x04] = SSSE3_OP(pmaddubsw), + [0x05] = SSSE3_OP(phsubw), + [0x06] = SSSE3_OP(phsubd), + [0x07] = SSSE3_OP(phsubsw), + [0x08] = SSSE3_OP(psignb), + [0x09] = SSSE3_OP(psignw), + [0x0a] = SSSE3_OP(psignd), + [0x0b] = SSSE3_OP(pmulhrsw), + [0x10] = SSE41_OP(pblendvb), + [0x14] = SSE41_OP(blendvps), + [0x15] = SSE41_OP(blendvpd), + [0x17] = SSE41_OP(ptest), + [0x1c] = SSSE3_OP(pabsb), + [0x1d] = SSSE3_OP(pabsw), + [0x1e] = SSSE3_OP(pabsd), + [0x20] = SSE41_OP(pmovsxbw), + [0x21] = SSE41_OP(pmovsxbd), + [0x22] = SSE41_OP(pmovsxbq), + [0x23] = SSE41_OP(pmovsxwd), + [0x24] = SSE41_OP(pmovsxwq), + [0x25] = SSE41_OP(pmovsxdq), + [0x28] = SSE41_OP(pmuldq), + [0x29] = SSE41_OP(pcmpeqq), + [0x2a] = SSE41_SPECIAL, /* movntqda */ + [0x2b] = SSE41_OP(packusdw), + [0x30] = SSE41_OP(pmovzxbw), + [0x31] = SSE41_OP(pmovzxbd), + [0x32] = SSE41_OP(pmovzxbq), + [0x33] = SSE41_OP(pmovzxwd), + [0x34] = SSE41_OP(pmovzxwq), + [0x35] = SSE41_OP(pmovzxdq), + [0x37] = SSE42_OP(pcmpgtq), + [0x38] = SSE41_OP(pminsb), + [0x39] = SSE41_OP(pminsd), + [0x3a] = SSE41_OP(pminuw), + [0x3b] = SSE41_OP(pminud), + [0x3c] = SSE41_OP(pmaxsb), + [0x3d] = SSE41_OP(pmaxsd), + [0x3e] = SSE41_OP(pmaxuw), + [0x3f] = SSE41_OP(pmaxud), + [0x40] = SSE41_OP(pmulld), + [0x41] = SSE41_OP(phminposuw), +}; + +static struct sse_op_helper_s sse_op_table7[256] = { + [0x08] = SSE41_OP(roundps), + [0x09] = SSE41_OP(roundpd), + [0x0a] = SSE41_OP(roundss), + [0x0b] = SSE41_OP(roundsd), + [0x0c] = SSE41_OP(blendps), + [0x0d] = SSE41_OP(blendpd), + [0x0e] = SSE41_OP(pblendw), + [0x0f] = SSSE3_OP(palignr), + [0x14] = SSE41_SPECIAL, /* pextrb */ + [0x15] = SSE41_SPECIAL, /* pextrw */ + [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */ + [0x17] = SSE41_SPECIAL, /* extractps */ + [0x20] = SSE41_SPECIAL, /* pinsrb */ + [0x21] = SSE41_SPECIAL, /* insertps */ + [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */ + [0x40] = SSE41_OP(dpps), + [0x41] = SSE41_OP(dppd), + [0x42] = SSE41_OP(mpsadbw), + [0x60] = SSE42_OP(pcmpestrm), + [0x61] = SSE42_OP(pcmpestri), + [0x62] = SSE42_OP(pcmpistrm), + [0x63] = SSE42_OP(pcmpistri), +}; + +static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) +{ + int b1, op1_offset, op2_offset, is_xmm, val, ot; + int modrm, mod, rm, reg, reg_addr, offset_addr; + void *sse_op2; + + b &= 0xff; + if (s->prefix & PREFIX_DATA) + b1 = 1; + else if (s->prefix & PREFIX_REPZ) + b1 = 2; + else if (s->prefix & PREFIX_REPNZ) + b1 = 3; + else + b1 = 0; + sse_op2 = sse_op_table1[b][b1]; + if (!sse_op2) + goto illegal_op; + if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { + is_xmm = 1; + } else { + if (b1 == 0) { + /* MMX case */ + is_xmm = 0; + } else { + is_xmm = 1; + } + } + /* simple MMX/SSE operation */ + if (s->flags & HF_TS_MASK) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + return; + } + if (s->flags & HF_EM_MASK) { + illegal_op: + gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + return; + } + if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) + if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA)) + goto illegal_op; + if (b == 0x0e) { + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) + goto illegal_op; + /* femms */ + gen_helper_emms(); + return; + } + if (b == 0x77) { + /* emms */ + gen_helper_emms(); + return; + } + /* prepare MMX state (XXX: optimize by storing fptt and fptags in + the static cpu state) */ + if (!is_xmm) { + gen_helper_enter_mmx(); + } + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7); + if (is_xmm) + reg |= rex_r; + mod = (modrm >> 6) & 3; + if (sse_op2 == SSE_SPECIAL) { + b |= (b1 << 8); + switch(b) { + case 0x0e7: /* movntq */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx)); + break; + case 0x1e7: /* movntdq */ + case 0x02b: /* movntps */ + case 0x12b: /* movntps */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + break; + case 0x3f0: /* lddqu */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + break; + case 0x22b: /* movntss */ + case 0x32b: /* movntsd */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (b1 & 1) { + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State, + xmm_regs[reg])); + } else { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(0))); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + } + break; + case 0x6e: /* movd mm, ea */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx)); + } else +#endif + { + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx)); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32); + } + break; + case 0x16e: /* movd xmm, ea */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[reg])); + gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]); + } else +#endif + { + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[reg])); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32); + } + break; + case 0x6f: /* movq mm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx)); + } else { + rm = (modrm & 7); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State,fpregs[rm].mmx)); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx)); + } + break; + case 0x010: /* movups */ + case 0x110: /* movupd */ + case 0x028: /* movaps */ + case 0x128: /* movapd */ + case 0x16f: /* movdqa xmm, ea */ + case 0x26f: /* movdqu xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]), + offsetof(CPUX86State,xmm_regs[rm])); + } + break; + case 0x210: /* movss xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(0))); + } + break; + case 0x310: /* movsd xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + break; + case 0x012: /* movlps */ + case 0x112: /* movlpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + /* movhlps */ + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); + } + break; + case 0x212: /* movsldup */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(2))); + } + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); + break; + case 0x312: /* movddup */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)), + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + break; + case 0x016: /* movhps */ + case 0x116: /* movhpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + } else { + /* movlhps */ + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + break; + case 0x216: /* movshdup */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(1))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)), + offsetof(CPUX86State,xmm_regs[rm].XMM_L(3))); + } + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + break; + case 0x178: + case 0x378: + { + int bit_index, field_length; + + if (b1 == 1 && reg != 0) + goto illegal_op; + field_length = ldub_code(s->pc++) & 0x3F; + bit_index = ldub_code(s->pc++) & 0x3F; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[reg])); + if (b1 == 1) + gen_helper_extrq_i(cpu_ptr0, tcg_const_i32(bit_index), + tcg_const_i32(field_length)); + else + gen_helper_insertq_i(cpu_ptr0, tcg_const_i32(bit_index), + tcg_const_i32(field_length)); + } + break; + case 0x7e: /* movd ea, mm */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + tcg_gen_ld_i64(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx)); + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); + } else +#endif + { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0))); + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); + } + break; + case 0x17e: /* movd ea, xmm */ +#ifdef TARGET_X86_64 + if (s->dflag == 2) { + tcg_gen_ld_i64(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1); + } else +#endif + { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1); + } + break; + case 0x27e: /* movq xmm, ea */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + } + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + break; + case 0x7f: /* movq ea, mm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx)); + } else { + rm = (modrm & 7); + gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx), + offsetof(CPUX86State,fpregs[reg].mmx)); + } + break; + case 0x011: /* movups */ + case 0x111: /* movupd */ + case 0x029: /* movaps */ + case 0x129: /* movapd */ + case 0x17f: /* movdqa ea, xmm */ + case 0x27f: /* movdqu ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg])); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]), + offsetof(CPUX86State,xmm_regs[reg])); + } + break; + case 0x211: /* movss ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + } + break; + case 0x311: /* movsd ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } + break; + case 0x013: /* movlps */ + case 0x113: /* movlpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + goto illegal_op; + } + break; + case 0x017: /* movhps */ + case 0x117: /* movhpd */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + } else { + goto illegal_op; + } + break; + case 0x71: /* shift mm, im */ + case 0x72: + case 0x73: + case 0x171: /* shift xmm, im */ + case 0x172: + case 0x173: + if (b1 >= 2) { + goto illegal_op; + } + val = ldub_code(s->pc++); + if (is_xmm) { + gen_op_movl_T0_im(val); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1))); + op1_offset = offsetof(CPUX86State,xmm_t0); + } else { + gen_op_movl_T0_im(val); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0))); + gen_op_movl_T0_0(); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1))); + op1_offset = offsetof(CPUX86State,mmx_t0); + } + sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; + if (!sse_op2) + goto illegal_op; + if (is_xmm) { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } else { + rm = (modrm & 7); + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + case 0x050: /* movmskps */ + rm = (modrm & 7) | REX_B(s); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[rm])); + gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_mov_reg_T0(OT_LONG, reg); + break; + case 0x150: /* movmskpd */ + rm = (modrm & 7) | REX_B(s); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, + offsetof(CPUX86State,xmm_regs[rm])); + gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_mov_reg_T0(OT_LONG, reg); + break; + case 0x02a: /* cvtpi2ps */ + case 0x12a: /* cvtpi2pd */ + gen_helper_enter_mmx(); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_ldq_env_A0(s->mem_index, op2_offset); + } else { + rm = (modrm & 7); + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + switch(b >> 8) { + case 0x0: + gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1); + break; + default: + case 0x1: + gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1); + break; + } + break; + case 0x22a: /* cvtsi2ss */ + case 0x32a: /* cvtsi2sd */ + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)]; + if (ot == OT_LONG) { + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32); + } else { + ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]); + } + break; + case 0x02c: /* cvttps2pi */ + case 0x12c: /* cvttpd2pi */ + case 0x02d: /* cvtps2pi */ + case 0x12d: /* cvtpd2pi */ + gen_helper_enter_mmx(); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,xmm_t0); + gen_ldo_env_A0(s->mem_index, op2_offset); + } else { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } + op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + switch(b) { + case 0x02c: + gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1); + break; + case 0x12c: + gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1); + break; + case 0x02d: + gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1); + break; + case 0x12d: + gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1); + break; + } + break; + case 0x22c: /* cvttss2si */ + case 0x32c: /* cvttsd2si */ + case 0x22d: /* cvtss2si */ + case 0x32d: /* cvtsd2si */ + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if ((b >> 8) & 1) { + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0))); + } else { + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + } + op2_offset = offsetof(CPUX86State,xmm_t0); + } else { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } + sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 + + (b & 1) * 4]; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset); + if (ot == OT_LONG) { + ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + } else { + ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0); + } + gen_op_mov_reg_T0(ot, reg); + break; + case 0xc4: /* pinsrw */ + case 0x1c4: + s->rip_offset = 1; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + val = ldub_code(s->pc++); + if (b1) { + val &= 7; + tcg_gen_st16_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[reg].XMM_W(val))); + } else { + val &= 3; + tcg_gen_st16_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val))); + } + break; + case 0xc5: /* pextrw */ + case 0x1c5: + if (mod != 3) + goto illegal_op; + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + val = ldub_code(s->pc++); + if (b1) { + val &= 7; + rm = (modrm & 7) | REX_B(s); + tcg_gen_ld16u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,xmm_regs[rm].XMM_W(val))); + } else { + val &= 3; + rm = (modrm & 7); + tcg_gen_ld16u_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val))); + } + reg = ((modrm >> 3) & 7) | rex_r; + gen_op_mov_reg_T0(ot, reg); + break; + case 0x1d6: /* movq ea, xmm */ + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), + offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); + } + break; + case 0x2d6: /* movq2dq */ + gen_helper_enter_mmx(); + rm = (modrm & 7); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + offsetof(CPUX86State,fpregs[rm].mmx)); + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + break; + case 0x3d6: /* movdq2q */ + gen_helper_enter_mmx(); + rm = (modrm & 7) | REX_B(s); + gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx), + offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + break; + case 0xd7: /* pmovmskb */ + case 0x1d7: + if (mod != 3) + goto illegal_op; + if (b1) { + rm = (modrm & 7) | REX_B(s); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm])); + gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0); + } else { + rm = (modrm & 7); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx)); + gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0); + } + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + reg = ((modrm >> 3) & 7) | rex_r; + gen_op_mov_reg_T0(OT_LONG, reg); + break; + case 0x138: + if (s->prefix & PREFIX_REPNZ) + goto crc32; + case 0x038: + b = modrm; + modrm = ldub_code(s->pc++); + rm = modrm & 7; + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (b1 >= 2) { + goto illegal_op; + } + + sse_op2 = sse_op_table6[b].op[b1]; + if (!sse_op2) + goto illegal_op; + if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask)) + goto illegal_op; + + if (b1) { + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); + } else { + op2_offset = offsetof(CPUX86State,xmm_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + switch (b) { + case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */ + case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */ + case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */ + gen_ldq_env_A0(s->mem_index, op2_offset + + offsetof(XMMReg, XMM_Q(0))); + break; + case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */ + case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */ + tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset + + offsetof(XMMReg, XMM_L(0))); + break; + case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */ + tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset + + offsetof(XMMReg, XMM_W(0))); + break; + case 0x2a: /* movntqda */ + gen_ldo_env_A0(s->mem_index, op1_offset); + return; + default: + gen_ldo_env_A0(s->mem_index, op2_offset); + } + } + } else { + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } else { + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, op2_offset); + } + } + if (sse_op2 == SSE_SPECIAL) + goto illegal_op; + + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + + if (b == 0x17) + s->cc_op = CC_OP_EFLAGS; + break; + case 0x338: /* crc32 */ + crc32: + b = modrm; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + if (b != 0xf0 && b != 0xf1) + goto illegal_op; + if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) + goto illegal_op; + + if (b == 0xf0) + ot = OT_BYTE; + else if (b == 0xf1 && s->dflag != 2) + if (s->prefix & PREFIX_DATA) + ot = OT_WORD; + else + ot = OT_LONG; + else + ot = OT_QUAD; + + gen_op_mov_TN_reg(OT_LONG, 0, reg); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_helper_crc32(cpu_T[0], cpu_tmp2_i32, + cpu_T[0], tcg_const_i32(8 << ot)); + + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + gen_op_mov_reg_T0(ot, reg); + break; + case 0x03a: + case 0x13a: + b = modrm; + modrm = ldub_code(s->pc++); + rm = modrm & 7; + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (b1 >= 2) { + goto illegal_op; + } + + sse_op2 = sse_op_table7[b].op[b1]; + if (!sse_op2) + goto illegal_op; + if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask)) + goto illegal_op; + + if (sse_op2 == SSE_SPECIAL) { + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + reg = ((modrm >> 3) & 7) | rex_r; + val = ldub_code(s->pc++); + switch (b) { + case 0x14: /* pextrb */ + tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_B(val & 15))); + if (mod == 3) + gen_op_mov_reg_T0(ot, rm); + else + tcg_gen_qemu_st8(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 0x15: /* pextrw */ + tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_W(val & 7))); + if (mod == 3) + gen_op_mov_reg_T0(ot, rm); + else + tcg_gen_qemu_st16(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 0x16: + if (ot == OT_LONG) { /* pextrd */ + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_L(val & 3))); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + if (mod == 3) + gen_op_mov_reg_v(ot, rm, cpu_T[0]); + else + tcg_gen_qemu_st32(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + } else { /* pextrq */ +#ifdef TARGET_X86_64 + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_Q(val & 1))); + if (mod == 3) + gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64); + else + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); +#else + goto illegal_op; +#endif + } + break; + case 0x17: /* extractps */ + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(val & 3))); + if (mod == 3) + gen_op_mov_reg_T0(ot, rm); + else + tcg_gen_qemu_st32(cpu_T[0], cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 0x20: /* pinsrb */ + if (mod == 3) + gen_op_mov_TN_reg(OT_LONG, 0, rm); + else + tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_B(val & 15))); + break; + case 0x21: /* insertps */ + if (mod == 3) { + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State,xmm_regs[rm] + .XMM_L((val >> 6) & 3))); + } else { + tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); + } + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State,xmm_regs[reg] + .XMM_L((val >> 4) & 3))); + if ((val >> 0) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(0))); + if ((val >> 1) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(1))); + if ((val >> 2) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(2))); + if ((val >> 3) & 1) + tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), + cpu_env, offsetof(CPUX86State, + xmm_regs[reg].XMM_L(3))); + break; + case 0x22: + if (ot == OT_LONG) { /* pinsrd */ + if (mod == 3) + gen_op_mov_v_reg(ot, cpu_tmp0, rm); + else + tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_L(val & 3))); + } else { /* pinsrq */ +#ifdef TARGET_X86_64 + if (mod == 3) + gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm); + else + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, + offsetof(CPUX86State, + xmm_regs[reg].XMM_Q(val & 1))); +#else + goto illegal_op; +#endif + } + break; + } + return; + } + + if (b1) { + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); + } else { + op2_offset = offsetof(CPUX86State,xmm_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldo_env_A0(s->mem_index, op2_offset); + } + } else { + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); + if (mod == 3) { + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } else { + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_ldq_env_A0(s->mem_index, op2_offset); + } + } + val = ldub_code(s->pc++); + + if ((b & 0xfc) == 0x60) { /* pcmpXstrX */ + s->cc_op = CC_OP_EFLAGS; + + if (s->dflag == 2) + /* The helper must use entire 64-bit gp registers */ + val |= 1 << 8; + } + + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); + break; + default: + goto illegal_op; + } + } else { + /* generic MMX or SSE operation */ + switch(b) { + case 0x70: /* pshufx insn */ + case 0xc6: /* pshufx insn */ + case 0xc2: /* compare insns */ + s->rip_offset = 1; + break; + default: + break; + } + if (is_xmm) { + op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,xmm_t0); + if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) || + b == 0xc2)) { + /* specific case for SSE single instructions */ + if (b1 == 2) { + /* 32 bit access */ + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + } else { + /* 64 bit access */ + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0))); + } + } else { + gen_ldo_env_A0(s->mem_index, op2_offset); + } + } else { + rm = (modrm & 7) | REX_B(s); + op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + } + } else { + op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + op2_offset = offsetof(CPUX86State,mmx_t0); + gen_ldq_env_A0(s->mem_index, op2_offset); + } else { + rm = (modrm & 7); + op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); + } + } + switch(b) { + case 0x0f: /* 3DNow! data insns */ + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) + goto illegal_op; + val = ldub_code(s->pc++); + sse_op2 = sse_op_table5[val]; + if (!sse_op2) + goto illegal_op; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + case 0x70: /* pshufx insn */ + case 0xc6: /* pshufx insn */ + val = ldub_code(s->pc++); + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); + break; + case 0xc2: + /* compare insns */ + val = ldub_code(s->pc++); + if (val >= 8) + goto illegal_op; + sse_op2 = sse_op_table4[val][b1]; + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + case 0xf7: + /* maskmov : we must prepare A0 */ + if (mod != 3) + goto illegal_op; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EDI); + } else +#endif + { + gen_op_movl_A0_reg(R_EDI); + if (s->aflag == 0) + gen_op_andl_A0_ffff(); + } + gen_add_A0_ds_seg(s); + + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0); + break; + default: + tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); + ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1); + break; + } + if (b == 0x2e || b == 0x2f) { + s->cc_op = CC_OP_EFLAGS; + } + } +} + +#ifdef VBOX +/* Checks if it's an invalid lock sequence. Only a few instructions + can be used together with the lock prefix and of those only the + form that write a memory operand. So, this is kind of annoying + work to do... + The AMD manual lists the following instructions. + ADC + ADD + AND + BTC + BTR + BTS + CMPXCHG + CMPXCHG8B + CMPXCHG16B + DEC + INC + NEG + NOT + OR + SBB + SUB + XADD + XCHG + XOR */ +static bool is_invalid_lock_sequence(DisasContext *s, target_ulong pc_start, int b) +{ + target_ulong pc = s->pc; + int modrm, mod, op; + + /* X={8,16,32,64} Y={16,32,64} */ + switch (b) + { + /* /2: ADC reg/memX, immX */ + /* /0: ADD reg/memX, immX */ + /* /4: AND reg/memX, immX */ + /* /1: OR reg/memX, immX */ + /* /3: SBB reg/memX, immX */ + /* /5: SUB reg/memX, immX */ + /* /6: XOR reg/memX, immX */ + case 0x80: + case 0x81: + case 0x83: + modrm = ldub_code(pc++); + op = (modrm >> 3) & 7; + if (op == 7) /* /7: CMP */ + break; + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + case 0x10: /* /r: ADC reg/mem8, reg8 */ + case 0x11: /* /r: ADC reg/memX, regY */ + case 0x00: /* /r: ADD reg/mem8, reg8 */ + case 0x01: /* /r: ADD reg/memX, regY */ + case 0x20: /* /r: AND reg/mem8, reg8 */ + case 0x21: /* /r: AND reg/memY, regY */ + case 0x08: /* /r: OR reg/mem8, reg8 */ + case 0x09: /* /r: OR reg/memY, regY */ + case 0x18: /* /r: SBB reg/mem8, reg8 */ + case 0x19: /* /r: SBB reg/memY, regY */ + case 0x28: /* /r: SUB reg/mem8, reg8 */ + case 0x29: /* /r: SUB reg/memY, regY */ + case 0x86: /* /r: XCHG reg/mem8, reg8 or XCHG reg8, reg/mem8 */ + case 0x87: /* /r: XCHG reg/memY, regY or XCHG regY, reg/memY */ + case 0x30: /* /r: XOR reg/mem8, reg8 */ + case 0x31: /* /r: XOR reg/memY, regY */ + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + /* /1: DEC reg/memX */ + /* /0: INC reg/memX */ + case 0xfe: + case 0xff: + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + /* /3: NEG reg/memX */ + /* /2: NOT reg/memX */ + case 0xf6: + case 0xf7: + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + case 0x0f: + b = ldub_code(pc++); + switch (b) + { + /* /7: BTC reg/memY, imm8 */ + /* /6: BTR reg/memY, imm8 */ + /* /5: BTS reg/memY, imm8 */ + case 0xba: + modrm = ldub_code(pc++); + op = (modrm >> 3) & 7; + if (op < 5) + break; + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + case 0xbb: /* /r: BTC reg/memY, regY */ + case 0xb3: /* /r: BTR reg/memY, regY */ + case 0xab: /* /r: BTS reg/memY, regY */ + case 0xb0: /* /r: CMPXCHG reg/mem8, reg8 */ + case 0xb1: /* /r: CMPXCHG reg/memY, regY */ + case 0xc0: /* /r: XADD reg/mem8, reg8 */ + case 0xc1: /* /r: XADD reg/memY, regY */ + modrm = ldub_code(pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) /* register destination */ + break; + return false; + + /* /1: CMPXCHG8B mem64 or CMPXCHG16B mem128 */ + case 0xc7: + modrm = ldub_code(pc++); + op = (modrm >> 3) & 7; + if (op != 1) + break; + return false; + } + break; + } + + /* illegal sequence. The s->pc is past the lock prefix and that + is sufficient for the TB, I think. */ + Log(("illegal lock sequence %RGv (b=%#x)\n", pc_start, b)); + return true; +} +#endif /* VBOX */ + +/* convert one instruction. s->is_jmp is set if the translation must + be stopped. Return the next pc value */ +static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) +{ + int b, prefixes, aflag, dflag; + int shift, ot; + int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val; + target_ulong next_eip, tval; + int rex_w, rex_r; + + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) + tcg_gen_debug_insn_start(pc_start); + s->pc = pc_start; + prefixes = 0; + aflag = s->code32; + dflag = s->code32; + s->override = -1; + rex_w = -1; + rex_r = 0; +#ifdef TARGET_X86_64 + s->rex_x = 0; + s->rex_b = 0; + x86_64_hregs = 0; +#endif + s->rip_offset = 0; /* for relative ip address */ +#ifdef VBOX + /* nike: seems only slow down things */ +# if 0 + /* Always update EIP. Otherwise one must be very careful with generated code that can raise exceptions. */ + + gen_update_eip(pc_start - s->cs_base); +# endif +#endif /* VBOX */ + + next_byte: + b = ldub_code(s->pc); + s->pc++; + /* check prefixes */ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + switch (b) { + case 0xf3: + prefixes |= PREFIX_REPZ; + goto next_byte; + case 0xf2: + prefixes |= PREFIX_REPNZ; + goto next_byte; + case 0xf0: + prefixes |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override = R_CS; + goto next_byte; + case 0x36: + s->override = R_SS; + goto next_byte; + case 0x3e: + s->override = R_DS; + goto next_byte; + case 0x26: + s->override = R_ES; + goto next_byte; + case 0x64: + s->override = R_FS; + goto next_byte; + case 0x65: + s->override = R_GS; + goto next_byte; + case 0x66: + prefixes |= PREFIX_DATA; + goto next_byte; + case 0x67: + prefixes |= PREFIX_ADR; + goto next_byte; + case 0x40 ... 0x4f: + /* REX prefix */ + rex_w = (b >> 3) & 1; + rex_r = (b & 0x4) << 1; + s->rex_x = (b & 0x2) << 2; + REX_B(s) = (b & 0x1) << 3; + x86_64_hregs = 1; /* select uniform byte register addressing */ + goto next_byte; + } + if (rex_w == 1) { + /* 0x66 is ignored if rex.w is set */ + dflag = 2; + } else { + if (prefixes & PREFIX_DATA) + dflag ^= 1; + } + if (!(prefixes & PREFIX_ADR)) + aflag = 2; + } else +#endif + { + switch (b) { + case 0xf3: + prefixes |= PREFIX_REPZ; + goto next_byte; + case 0xf2: + prefixes |= PREFIX_REPNZ; + goto next_byte; + case 0xf0: + prefixes |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override = R_CS; + goto next_byte; + case 0x36: + s->override = R_SS; + goto next_byte; + case 0x3e: + s->override = R_DS; + goto next_byte; + case 0x26: + s->override = R_ES; + goto next_byte; + case 0x64: + s->override = R_FS; + goto next_byte; + case 0x65: + s->override = R_GS; + goto next_byte; + case 0x66: + prefixes |= PREFIX_DATA; + goto next_byte; + case 0x67: + prefixes |= PREFIX_ADR; + goto next_byte; + } + if (prefixes & PREFIX_DATA) + dflag ^= 1; + if (prefixes & PREFIX_ADR) + aflag ^= 1; + } + + s->prefix = prefixes; + s->aflag = aflag; + s->dflag = dflag; + + /* lock generation */ +#ifndef VBOX + if (prefixes & PREFIX_LOCK) + gen_helper_lock(); +#else /* VBOX */ + if (prefixes & PREFIX_LOCK) { + if (is_invalid_lock_sequence(s, pc_start, b)) { + gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + return s->pc; + } + gen_helper_lock(); + } +#endif /* VBOX */ + + /* now check op code */ + reswitch: + switch(b) { + case 0x0f: + /**************************/ + /* extended op code */ + b = ldub_code(s->pc++) | 0x100; + goto reswitch; + + /**************************/ + /* arith & logic */ + case 0x00 ... 0x05: + case 0x08 ... 0x0d: + case 0x10 ... 0x15: + case 0x18 ... 0x1d: + case 0x20 ... 0x25: + case 0x28 ... 0x2d: + case 0x30 ... 0x35: + case 0x38 ... 0x3d: + { + int op, f, val; + op = (b >> 3) & 7; + f = (b >> 1) & 3; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + switch(f) { + case 0: /* OP Ev, Gv */ + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else if (op == OP_XORL && rm == reg) { + xor_zero: + /* xor reg, reg optimisation */ + gen_op_movl_T0_0(); + s->cc_op = CC_OP_LOGICB + ot; + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + break; + } else { + opreg = rm; + } + gen_op_mov_TN_reg(ot, 1, reg); + gen_op(s, op, ot, opreg); + break; + case 1: /* OP Gv, Ev */ + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + reg = ((modrm >> 3) & 7) | rex_r; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0(ot + s->mem_index); + } else if (op == OP_XORL && rm == reg) { + goto xor_zero; + } else { + gen_op_mov_TN_reg(ot, 1, rm); + } + gen_op(s, op, ot, reg); + break; + case 2: /* OP A, Iv */ + val = insn_get(s, ot); + gen_op_movl_T1_im(val); + gen_op(s, op, ot, OR_EAX); + break; + } + } + break; + + case 0x82: + if (CODE64(s)) + goto illegal_op; + case 0x80: /* GRP1 */ + case 0x81: + case 0x83: + { + int val; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + op = (modrm >> 3) & 7; + + if (mod != 3) { + if (b == 0x83) + s->rip_offset = 1; + else + s->rip_offset = insn_const_size(ot); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else { + opreg = rm; + } + + switch(b) { + default: + case 0x80: + case 0x81: + case 0x82: + val = insn_get(s, ot); + break; + case 0x83: + val = (int8_t)insn_get(s, OT_BYTE); + break; + } + gen_op_movl_T1_im(val); + gen_op(s, op, ot, opreg); + } + break; + + /**************************/ + /* inc, dec, and other misc arith */ + case 0x40 ... 0x47: /* inc Gv */ + ot = dflag ? OT_LONG : OT_WORD; + gen_inc(s, ot, OR_EAX + (b & 7), 1); + break; + case 0x48 ... 0x4f: /* dec Gv */ + ot = dflag ? OT_LONG : OT_WORD; + gen_inc(s, ot, OR_EAX + (b & 7), -1); + break; + case 0xf6: /* GRP3 */ + case 0xf7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + op = (modrm >> 3) & 7; + if (mod != 3) { + if (op == 0) + s->rip_offset = insn_const_size(ot); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + + switch(op) { + case 0: /* test */ + val = insn_get(s, ot); + gen_op_movl_T1_im(val); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + case 2: /* not */ + tcg_gen_not_tl(cpu_T[0], cpu_T[0]); + if (mod != 3) { + gen_op_st_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_reg_T0(ot, rm); + } + break; + case 3: /* neg */ + tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); + if (mod != 3) { + gen_op_st_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_reg_T0(ot, rm); + } + gen_op_update_neg_cc(); + s->cc_op = CC_OP_SUBB + ot; + break; + case 4: /* mul */ + switch(ot) { + case OT_BYTE: + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); + s->cc_op = CC_OP_MULB; + break; + case OT_WORD: + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); + gen_op_mov_reg_T0(OT_WORD, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + s->cc_op = CC_OP_MULW; + break; + default: + case OT_LONG: +#ifdef TARGET_X86_64 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); +#else + { + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_extu_i32_i64(t0, cpu_T[0]); + tcg_gen_extu_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + } +#endif + s->cc_op = CC_OP_MULL; + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_helper_mulq_EAX_T0(cpu_T[0]); + s->cc_op = CC_OP_MULQ; + break; +#endif + } + break; + case 5: /* imul */ + switch(ot) { + case OT_BYTE: + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + s->cc_op = CC_OP_MULB; + break; + case OT_WORD: + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); + gen_op_mov_reg_T0(OT_WORD, R_EDX); + s->cc_op = CC_OP_MULW; + break; + default: + case OT_LONG: +#ifdef TARGET_X86_64 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); + gen_op_mov_reg_T0(OT_LONG, R_EDX); +#else + { + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext_i32_i64(t0, cpu_T[0]); + tcg_gen_ext_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + } +#endif + s->cc_op = CC_OP_MULL; + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_helper_imulq_EAX_T0(cpu_T[0]); + s->cc_op = CC_OP_MULQ; + break; +#endif + } + break; + case 6: /* div */ + switch(ot) { + case OT_BYTE: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divb_AL(cpu_T[0]); + break; + case OT_WORD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divw_AX(cpu_T[0]); + break; + default: + case OT_LONG: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divl_EAX(cpu_T[0]); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_divq_EAX(cpu_T[0]); + break; +#endif + } + break; + case 7: /* idiv */ + switch(ot) { + case OT_BYTE: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivb_AL(cpu_T[0]); + break; + case OT_WORD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivw_AX(cpu_T[0]); + break; + default: + case OT_LONG: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivl_EAX(cpu_T[0]); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + gen_jmp_im(pc_start - s->cs_base); + gen_helper_idivq_EAX(cpu_T[0]); + break; +#endif + } + break; + default: + goto illegal_op; + } + break; + + case 0xfe: /* GRP4 */ + case 0xff: /* GRP5 */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + op = (modrm >> 3) & 7; + if (op >= 2 && b == 0xfe) { + goto illegal_op; + } + if (CODE64(s)) { + if (op == 2 || op == 4) { + /* operand size for jumps is 64 bit */ + ot = OT_QUAD; + } else if (op == 3 || op == 5) { + ot = dflag ? OT_LONG + (rex_w == 1) : OT_WORD; + } else if (op == 6) { + /* default push size is 64 bit */ + ot = dflag ? OT_QUAD : OT_WORD; + } + } + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (op >= 2 && op != 3 && op != 5) + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + + switch(op) { + case 0: /* inc Ev */ + if (mod != 3) + opreg = OR_TMP0; + else + opreg = rm; + gen_inc(s, ot, opreg, 1); + break; + case 1: /* dec Ev */ + if (mod != 3) + opreg = OR_TMP0; + else + opreg = rm; + gen_inc(s, ot, opreg, -1); + break; + case 2: /* call Ev */ + /* XXX: optimize if memory (no 'and' is necessary) */ +#ifdef VBOX_WITH_CALL_RECORD + if (s->record_call) + gen_op_record_call(); +#endif + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + next_eip = s->pc - s->cs_base; + gen_movtl_T1_im(next_eip); + gen_push_T1(s); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 3: /* lcall Ev */ + gen_op_ld_T1_A0(ot + s->mem_index); + gen_add_A0_im(s, 1 << (ot - OT_WORD + 1)); + gen_op_ldu_T0_A0(OT_WORD + s->mem_index); + do_lcall: + if (s->pe && !s->vm86) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1], + tcg_const_i32(dflag), + tcg_const_i32(s->pc - pc_start)); + } else { + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1], + tcg_const_i32(dflag), + tcg_const_i32(s->pc - s->cs_base)); + } + gen_eob(s); + break; + case 4: /* jmp Ev */ + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 5: /* ljmp Ev */ + gen_op_ld_T1_A0(ot + s->mem_index); + gen_add_A0_im(s, 1 << (ot - OT_WORD + 1)); + gen_op_ldu_T0_A0(OT_WORD + s->mem_index); + do_ljmp: + if (s->pe && !s->vm86) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1], + tcg_const_i32(s->pc - pc_start)); + } else { + gen_op_movl_seg_T0_vm(R_CS); + gen_op_movl_T0_T1(); + gen_op_jmp_T0(); + } + gen_eob(s); + break; + case 6: /* push Ev */ + gen_push_T0(s); + break; + default: + goto illegal_op; + } + break; + + case 0x84: /* test Ev, Gv */ + case 0x85: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_op_mov_TN_reg(ot, 1, reg); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + + case 0xa8: /* test eAX, Iv */ + case 0xa9: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + val = insn_get(s, ot); + + gen_op_mov_TN_reg(ot, 0, OR_EAX); + gen_op_movl_T1_im(val); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + + case 0x98: /* CWDE/CBW */ +#ifdef TARGET_X86_64 + if (dflag == 2) { + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_QUAD, R_EAX); + } else +#endif + if (dflag == 1) { + gen_op_mov_TN_reg(OT_WORD, 0, R_EAX); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + } else { + gen_op_mov_TN_reg(OT_BYTE, 0, R_EAX); + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + } + break; + case 0x99: /* CDQ/CWD */ +#ifdef TARGET_X86_64 + if (dflag == 2) { + gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63); + gen_op_mov_reg_T0(OT_QUAD, R_EDX); + } else +#endif + if (dflag == 1) { + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + } else { + gen_op_mov_TN_reg(OT_WORD, 0, R_EAX); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15); + gen_op_mov_reg_T0(OT_WORD, R_EDX); + } + break; + case 0x1af: /* imul Gv, Ev */ + case 0x69: /* imul Gv, Ev, I */ + case 0x6b: + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + if (b == 0x69) + s->rip_offset = insn_const_size(ot); + else if (b == 0x6b) + s->rip_offset = 1; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + if (b == 0x69) { + val = insn_get(s, ot); + gen_op_movl_T1_im(val); + } else if (b == 0x6b) { + val = (int8_t)insn_get(s, OT_BYTE); + gen_op_movl_T1_im(val); + } else { + gen_op_mov_TN_reg(ot, 1, reg); + } + +#ifdef TARGET_X86_64 + if (ot == OT_QUAD) { + gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]); + } else +#endif + if (ot == OT_LONG) { +#ifdef TARGET_X86_64 + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); +#else + { + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(t0, cpu_T[0]); + tcg_gen_ext_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[1], t0); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0); + } +#endif + } else { + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + } + gen_op_mov_reg_T0(ot, reg); + s->cc_op = CC_OP_MULB + ot; + break; + case 0x1c0: + case 0x1c1: /* xadd Ev, Gv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_mov_TN_reg(ot, 1, rm); + gen_op_addl_T0_T1(); + gen_op_mov_reg_T1(ot, reg); + gen_op_mov_reg_T0(ot, rm); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_addl_T0_T1(); + gen_op_st_T0_A0(ot + s->mem_index); + gen_op_mov_reg_T1(ot, reg); + } + gen_op_update2_cc(); + s->cc_op = CC_OP_ADDB + ot; + break; + case 0x1b0: + case 0x1b1: /* cmpxchg Ev, Gv */ + { + int label1, label2; + TCGv t0, t1, t2, a0; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + a0 = tcg_temp_local_new(); + gen_op_mov_v_reg(ot, t1, reg); + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, t0, rm); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_mov_tl(a0, cpu_A0); + gen_op_ld_v(ot + s->mem_index, t0, a0); + rm = 0; /* avoid warning */ + } + label1 = gen_new_label(); + tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0); + gen_extu(ot, t2); + tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); + if (mod == 3) { + label2 = gen_new_label(); + gen_op_mov_reg_v(ot, R_EAX, t0); + tcg_gen_br(label2); + gen_set_label(label1); + gen_op_mov_reg_v(ot, rm, t1); + gen_set_label(label2); + } else { + tcg_gen_mov_tl(t1, t0); + gen_op_mov_reg_v(ot, R_EAX, t0); + gen_set_label(label1); + /* always store */ + gen_op_st_v(ot + s->mem_index, t1, a0); + } + tcg_gen_mov_tl(cpu_cc_src, t0); + tcg_gen_mov_tl(cpu_cc_dst, t2); + s->cc_op = CC_OP_SUBB + ot; + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + tcg_temp_free(a0); + } + break; + case 0x1c7: /* cmpxchg8b */ + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if ((mod == 3) || ((modrm & 0x38) != 0x8)) + goto illegal_op; +#ifdef TARGET_X86_64 + if (dflag == 2) { + if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) + goto illegal_op; + gen_jmp_im(pc_start - s->cs_base); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_helper_cmpxchg16b(cpu_A0); + } else +#endif + { + if (!(s->cpuid_features & CPUID_CX8)) + goto illegal_op; + gen_jmp_im(pc_start - s->cs_base); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_helper_cmpxchg8b(cpu_A0); + } + s->cc_op = CC_OP_EFLAGS; + break; + + /**************************/ + /* push/pop */ + case 0x50 ... 0x57: /* push */ + gen_op_mov_TN_reg(OT_LONG, 0, (b & 7) | REX_B(s)); + gen_push_T0(s); + break; + case 0x58 ... 0x5f: /* pop */ + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + gen_pop_T0(s); + /* NOTE: order is important for pop %sp */ + gen_pop_update(s); + gen_op_mov_reg_T0(ot, (b & 7) | REX_B(s)); + break; + case 0x60: /* pusha */ + if (CODE64(s)) + goto illegal_op; + gen_pusha(s); + break; + case 0x61: /* popa */ + if (CODE64(s)) + goto illegal_op; + gen_popa(s); + break; + case 0x68: /* push Iv */ + case 0x6a: + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + if (b == 0x68) + val = insn_get(s, ot); + else + val = (int8_t)insn_get(s, OT_BYTE); + gen_op_movl_T0_im(val); + gen_push_T0(s); + break; + case 0x8f: /* pop Ev */ + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + gen_pop_T0(s); + if (mod == 3) { + /* NOTE: order is important for pop %sp */ + gen_pop_update(s); + rm = (modrm & 7) | REX_B(s); + gen_op_mov_reg_T0(ot, rm); + } else { + /* NOTE: order is important too for MMU exceptions */ + s->popl_esp_hack = 1 << ot; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + s->popl_esp_hack = 0; + gen_pop_update(s); + } + break; + case 0xc8: /* enter */ + { + int level; + val = lduw_code(s->pc); + s->pc += 2; + level = ldub_code(s->pc++); + gen_enter(s, val, level); + } + break; + case 0xc9: /* leave */ + /* XXX: exception not precise (ESP is updated before potential exception) */ + if (CODE64(s)) { + gen_op_mov_TN_reg(OT_QUAD, 0, R_EBP); + gen_op_mov_reg_T0(OT_QUAD, R_ESP); + } else if (s->ss32) { + gen_op_mov_TN_reg(OT_LONG, 0, R_EBP); + gen_op_mov_reg_T0(OT_LONG, R_ESP); + } else { + gen_op_mov_TN_reg(OT_WORD, 0, R_EBP); + gen_op_mov_reg_T0(OT_WORD, R_ESP); + } + gen_pop_T0(s); + if (CODE64(s)) { + ot = dflag ? OT_QUAD : OT_WORD; + } else { + ot = dflag + OT_WORD; + } + gen_op_mov_reg_T0(ot, R_EBP); + gen_pop_update(s); + break; + case 0x06: /* push es */ + case 0x0e: /* push cs */ + case 0x16: /* push ss */ + case 0x1e: /* push ds */ + if (CODE64(s)) + goto illegal_op; + gen_op_movl_T0_seg(b >> 3); + gen_push_T0(s); + break; + case 0x1a0: /* push fs */ + case 0x1a8: /* push gs */ + gen_op_movl_T0_seg((b >> 3) & 7); + gen_push_T0(s); + break; + case 0x07: /* pop es */ + case 0x17: /* pop ss */ + case 0x1f: /* pop ds */ + if (CODE64(s)) + goto illegal_op; + reg = b >> 3; + gen_pop_T0(s); + gen_movl_seg_T0(s, reg, pc_start - s->cs_base); + gen_pop_update(s); + if (reg == R_SS) { + /* if reg == SS, inhibit interrupts/trace. */ + /* If several instructions disable interrupts, only the + _first_ does it */ + if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK)) + gen_helper_set_inhibit_irq(); + s->tf = 0; + } + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 0x1a1: /* pop fs */ + case 0x1a9: /* pop gs */ + gen_pop_T0(s); + gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base); + gen_pop_update(s); + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + + /**************************/ + /* mov */ + case 0x88: + case 0x89: /* mov Gv, Ev */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + /* generate a generic store */ + gen_ldst_modrm(s, modrm, ot, reg, 1); + break; + case 0xc6: + case 0xc7: /* mov Ev, Iv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod != 3) { + s->rip_offset = insn_const_size(ot); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + } + val = insn_get(s, ot); + gen_op_movl_T0_im(val); + if (mod != 3) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, (modrm & 7) | REX_B(s)); + break; + case 0x8a: + case 0x8b: /* mov Ev, Gv */ +#ifdef VBOX /* dtrace hot fix */ + if (prefixes & PREFIX_LOCK) + goto illegal_op; +#endif + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = OT_WORD + dflag; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_op_mov_reg_T0(ot, reg); + break; + case 0x8e: /* mov seg, Gv */ + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + if (reg >= 6 || reg == R_CS) + goto illegal_op; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_movl_seg_T0(s, reg, pc_start - s->cs_base); + if (reg == R_SS) { + /* if reg == SS, inhibit interrupts/trace */ + /* If several instructions disable interrupts, only the + _first_ does it */ + if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK)) + gen_helper_set_inhibit_irq(); + s->tf = 0; + } + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 0x8c: /* mov Gv, seg */ + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + if (reg >= 6) + goto illegal_op; + gen_op_movl_T0_seg(reg); + if (mod == 3) + ot = OT_WORD + dflag; + else + ot = OT_WORD; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + + case 0x1b6: /* movzbS Gv, Eb */ + case 0x1b7: /* movzwS Gv, Eb */ + case 0x1be: /* movsbS Gv, Eb */ + case 0x1bf: /* movswS Gv, Eb */ + { + int d_ot; + /* d_ot is the size of destination */ + d_ot = dflag + OT_WORD; + /* ot is the size of source */ + ot = (b & 1) + OT_BYTE; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + + if (mod == 3) { + gen_op_mov_TN_reg(ot, 0, rm); + switch(ot | (b & 8)) { + case OT_BYTE: + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); + break; + case OT_BYTE | 8: + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); + break; + case OT_WORD: + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); + break; + default: + case OT_WORD | 8: + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + break; + } + gen_op_mov_reg_T0(d_ot, reg); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (b & 8) { + gen_op_lds_T0_A0(ot + s->mem_index); + } else { + gen_op_ldu_T0_A0(ot + s->mem_index); + } + gen_op_mov_reg_T0(d_ot, reg); + } + } + break; + + case 0x8d: /* lea */ + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + reg = ((modrm >> 3) & 7) | rex_r; + /* we must ensure that no segment is added */ + s->override = -1; + val = s->addseg; + s->addseg = 0; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + s->addseg = val; + gen_op_mov_reg_A0(ot - OT_WORD, reg); + break; + + case 0xa0: /* mov EAX, Ov */ + case 0xa1: + case 0xa2: /* mov Ov, EAX */ + case 0xa3: + { + target_ulong offset_addr; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + offset_addr = ldq_code(s->pc); + s->pc += 8; + gen_op_movq_A0_im(offset_addr); + } else +#endif + { + if (s->aflag) { + offset_addr = insn_get(s, OT_LONG); + } else { + offset_addr = insn_get(s, OT_WORD); + } + gen_op_movl_A0_im(offset_addr); + } + gen_add_A0_ds_seg(s); + if ((b & 2) == 0) { + gen_op_ld_T0_A0(ot + s->mem_index); + gen_op_mov_reg_T0(ot, R_EAX); + } else { + gen_op_mov_TN_reg(ot, 0, R_EAX); + gen_op_st_T0_A0(ot + s->mem_index); + } + } + break; + case 0xd7: /* xlat */ +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EBX); + gen_op_mov_TN_reg(OT_QUAD, 0, R_EAX); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]); + } else +#endif + { + gen_op_movl_A0_reg(R_EBX); + gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xff); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]); + if (s->aflag == 0) + gen_op_andl_A0_ffff(); + else + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff); + } + gen_add_A0_ds_seg(s); + gen_op_ldu_T0_A0(OT_BYTE + s->mem_index); + gen_op_mov_reg_T0(OT_BYTE, R_EAX); + break; + case 0xb0 ... 0xb7: /* mov R, Ib */ + val = insn_get(s, OT_BYTE); + gen_op_movl_T0_im(val); + gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s)); + break; + case 0xb8 ... 0xbf: /* mov R, Iv */ +#ifdef TARGET_X86_64 + if (dflag == 2) { + uint64_t tmp; + /* 64 bit case */ + tmp = ldq_code(s->pc); + s->pc += 8; + reg = (b & 7) | REX_B(s); + gen_movtl_T0_im(tmp); + gen_op_mov_reg_T0(OT_QUAD, reg); + } else +#endif + { + ot = dflag ? OT_LONG : OT_WORD; + val = insn_get(s, ot); + reg = (b & 7) | REX_B(s); + gen_op_movl_T0_im(val); + gen_op_mov_reg_T0(ot, reg); + } + break; + + case 0x91 ... 0x97: /* xchg R, EAX */ + do_xchg_reg_eax: + ot = dflag + OT_WORD; + reg = (b & 7) | REX_B(s); + rm = R_EAX; + goto do_xchg_reg; + case 0x86: + case 0x87: /* xchg Ev, Gv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (mod == 3) { + rm = (modrm & 7) | REX_B(s); + do_xchg_reg: + gen_op_mov_TN_reg(ot, 0, reg); + gen_op_mov_TN_reg(ot, 1, rm); + gen_op_mov_reg_T0(ot, rm); + gen_op_mov_reg_T1(ot, reg); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_mov_TN_reg(ot, 0, reg); + /* for xchg, lock is implicit */ + if (!(prefixes & PREFIX_LOCK)) + gen_helper_lock(); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_op_st_T0_A0(ot + s->mem_index); + if (!(prefixes & PREFIX_LOCK)) + gen_helper_unlock(); + gen_op_mov_reg_T1(ot, reg); + } + break; + case 0xc4: /* les Gv */ + if (CODE64(s)) + goto illegal_op; + op = R_ES; + goto do_lxx; + case 0xc5: /* lds Gv */ + if (CODE64(s)) + goto illegal_op; + op = R_DS; + goto do_lxx; + case 0x1b2: /* lss Gv */ + op = R_SS; + goto do_lxx; + case 0x1b4: /* lfs Gv */ + op = R_FS; + goto do_lxx; + case 0x1b5: /* lgs Gv */ + op = R_GS; + do_lxx: + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0(ot + s->mem_index); + gen_add_A0_im(s, 1 << (ot - OT_WORD + 1)); + /* load the segment first to handle exceptions properly */ + gen_op_ldu_T0_A0(OT_WORD + s->mem_index); + gen_movl_seg_T0(s, op, pc_start - s->cs_base); + /* then put the data */ + gen_op_mov_reg_T1(ot, reg); + if (s->is_jmp) { + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + + /************************/ + /* shifts */ + case 0xc0: + case 0xc1: + /* shift Ev,Ib */ + shift = 2; + grp2: + { + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + + if (mod != 3) { + if (shift == 2) { + s->rip_offset = 1; + } + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else { + opreg = (modrm & 7) | REX_B(s); + } + + /* simpler op */ + if (shift == 0) { + gen_shift(s, op, ot, opreg, OR_ECX); + } else { + if (shift == 2) { + shift = ldub_code(s->pc++); + } + gen_shifti(s, op, ot, opreg, shift); + } + } + break; + case 0xd0: + case 0xd1: + /* shift Ev,1 */ + shift = 1; + goto grp2; + case 0xd2: + case 0xd3: + /* shift Ev,cl */ + shift = 0; + goto grp2; + + case 0x1a4: /* shld imm */ + op = 0; + shift = 1; + goto do_shiftd; + case 0x1a5: /* shld cl */ + op = 0; + shift = 0; + goto do_shiftd; + case 0x1ac: /* shrd imm */ + op = 1; + shift = 1; + goto do_shiftd; + case 0x1ad: /* shrd cl */ + op = 1; + shift = 0; + do_shiftd: + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + reg = ((modrm >> 3) & 7) | rex_r; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + opreg = OR_TMP0; + } else { + opreg = rm; + } + gen_op_mov_TN_reg(ot, 1, reg); + + if (shift) { + val = ldub_code(s->pc++); + tcg_gen_movi_tl(cpu_T3, val); + } else { + tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]); + } + gen_shiftd_rm_T1_T3(s, ot, opreg, op); + break; + + /************************/ + /* floats */ + case 0xd8 ... 0xdf: + if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { + /* if CR0.EM or CR0.TS are set, generate an FPU exception */ + /* XXX: what to do if illegal op ? */ + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = ((b & 7) << 3) | ((modrm >> 3) & 7); + if (mod != 3) { + /* memory op */ + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + switch(op) { + case 0x00 ... 0x07: /* fxxxs */ + case 0x10 ... 0x17: /* fixxxl */ + case 0x20 ... 0x27: /* fxxxl */ + case 0x30 ... 0x37: /* fixxx */ + { + int op1; + op1 = op & 7; + + switch(op >> 4) { + case 0: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_flds_FT0(cpu_tmp2_i32); + break; + case 1: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_FT0(cpu_tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fldl_FT0(cpu_tmp1_i64); + break; + case 3: + default: + gen_op_lds_T0_A0(OT_WORD + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_FT0(cpu_tmp2_i32); + break; + } + + gen_helper_fp_arith_ST0_FT0(op1); + if (op1 == 3) { + /* fcomp needs pop */ + gen_helper_fpop(); + } + } + break; + case 0x08: /* flds */ + case 0x0a: /* fsts */ + case 0x0b: /* fstps */ + case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */ + case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */ + case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */ + switch(op & 7) { + case 0: + switch(op >> 4) { + case 0: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_flds_ST0(cpu_tmp2_i32); + break; + case 1: + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_ST0(cpu_tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fldl_ST0(cpu_tmp1_i64); + break; + case 3: + default: + gen_op_lds_T0_A0(OT_WORD + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fildl_ST0(cpu_tmp2_i32); + break; + } + break; + case 1: + /* XXX: the corresponding CPUID bit must be tested ! */ + switch(op >> 4) { + case 1: + gen_helper_fisttl_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + break; + case 2: + gen_helper_fisttll_ST0(cpu_tmp1_i64); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 3: + default: + gen_helper_fistt_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + } + gen_helper_fpop(); + break; + default: + switch(op >> 4) { + case 0: + gen_helper_fsts_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + break; + case 1: + gen_helper_fistl_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + break; + case 2: + gen_helper_fstl_ST0(cpu_tmp1_i64); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + break; + case 3: + default: + gen_helper_fist_ST0(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + } + if ((op & 7) == 3) + gen_helper_fpop(); + break; + } + break; + case 0x0c: /* fldenv mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fldenv( + cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x0d: /* fldcw mem */ + gen_op_ld_T0_A0(OT_WORD + s->mem_index); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_fldcw(cpu_tmp2_i32); + break; + case 0x0e: /* fnstenv mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fstenv(cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x0f: /* fnstcw mem */ + gen_helper_fnstcw(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + case 0x1d: /* fldt mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fldt_ST0(cpu_A0); + break; + case 0x1f: /* fstpt mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fstt_ST0(cpu_A0); + gen_helper_fpop(); + break; + case 0x2c: /* frstor mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_frstor(cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x2e: /* fnsave mem */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fsave(cpu_A0, tcg_const_i32(s->dflag)); + break; + case 0x2f: /* fnstsw mem */ + gen_helper_fnstsw(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + break; + case 0x3c: /* fbld */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fbld_ST0(cpu_A0); + break; + case 0x3e: /* fbstp */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fbst_ST0(cpu_A0); + gen_helper_fpop(); + break; + case 0x3d: /* fildll */ + tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fildll_ST0(cpu_tmp1_i64); + break; + case 0x3f: /* fistpll */ + gen_helper_fistll_ST0(cpu_tmp1_i64); + tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, + (s->mem_index >> 2) - 1); + gen_helper_fpop(); + break; + default: + goto illegal_op; + } + } else { + /* register float ops */ + opreg = rm; + + switch(op) { + case 0x08: /* fld sti */ + gen_helper_fpush(); + gen_helper_fmov_ST0_STN(tcg_const_i32((opreg + 1) & 7)); + break; + case 0x09: /* fxchg sti */ + case 0x29: /* fxchg4 sti, undocumented op */ + case 0x39: /* fxchg7 sti, undocumented op */ + gen_helper_fxchg_ST0_STN(tcg_const_i32(opreg)); + break; + case 0x0a: /* grp d9/2 */ + switch(rm) { + case 0: /* fnop */ + /* check exceptions (FreeBSD FPU probe) */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fwait(); + break; + default: + goto illegal_op; + } + break; + case 0x0c: /* grp d9/4 */ + switch(rm) { + case 0: /* fchs */ + gen_helper_fchs_ST0(); + break; + case 1: /* fabs */ + gen_helper_fabs_ST0(); + break; + case 4: /* ftst */ + gen_helper_fldz_FT0(); + gen_helper_fcom_ST0_FT0(); + break; + case 5: /* fxam */ + gen_helper_fxam_ST0(); + break; + default: + goto illegal_op; + } + break; + case 0x0d: /* grp d9/5 */ + { + switch(rm) { + case 0: + gen_helper_fpush(); + gen_helper_fld1_ST0(); + break; + case 1: + gen_helper_fpush(); + gen_helper_fldl2t_ST0(); + break; + case 2: + gen_helper_fpush(); + gen_helper_fldl2e_ST0(); + break; + case 3: + gen_helper_fpush(); + gen_helper_fldpi_ST0(); + break; + case 4: + gen_helper_fpush(); + gen_helper_fldlg2_ST0(); + break; + case 5: + gen_helper_fpush(); + gen_helper_fldln2_ST0(); + break; + case 6: + gen_helper_fpush(); + gen_helper_fldz_ST0(); + break; + default: + goto illegal_op; + } + } + break; + case 0x0e: /* grp d9/6 */ + switch(rm) { + case 0: /* f2xm1 */ + gen_helper_f2xm1(); + break; + case 1: /* fyl2x */ + gen_helper_fyl2x(); + break; + case 2: /* fptan */ + gen_helper_fptan(); + break; + case 3: /* fpatan */ + gen_helper_fpatan(); + break; + case 4: /* fxtract */ + gen_helper_fxtract(); + break; + case 5: /* fprem1 */ + gen_helper_fprem1(); + break; + case 6: /* fdecstp */ + gen_helper_fdecstp(); + break; + default: + case 7: /* fincstp */ + gen_helper_fincstp(); + break; + } + break; + case 0x0f: /* grp d9/7 */ + switch(rm) { + case 0: /* fprem */ + gen_helper_fprem(); + break; + case 1: /* fyl2xp1 */ + gen_helper_fyl2xp1(); + break; + case 2: /* fsqrt */ + gen_helper_fsqrt(); + break; + case 3: /* fsincos */ + gen_helper_fsincos(); + break; + case 5: /* fscale */ + gen_helper_fscale(); + break; + case 4: /* frndint */ + gen_helper_frndint(); + break; + case 6: /* fsin */ + gen_helper_fsin(); + break; + default: + case 7: /* fcos */ + gen_helper_fcos(); + break; + } + break; + case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */ + case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */ + case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */ + { + int op1; + + op1 = op & 7; + if (op >= 0x20) { + gen_helper_fp_arith_STN_ST0(op1, opreg); + if (op >= 0x30) + gen_helper_fpop(); + } else { + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fp_arith_ST0_FT0(op1); + } + } + break; + case 0x02: /* fcom */ + case 0x22: /* fcom2, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcom_ST0_FT0(); + break; + case 0x03: /* fcomp */ + case 0x23: /* fcomp3, undocumented op */ + case 0x32: /* fcomp5, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcom_ST0_FT0(); + gen_helper_fpop(); + break; + case 0x15: /* da/5 */ + switch(rm) { + case 1: /* fucompp */ + gen_helper_fmov_FT0_STN(tcg_const_i32(1)); + gen_helper_fucom_ST0_FT0(); + gen_helper_fpop(); + gen_helper_fpop(); + break; + default: + goto illegal_op; + } + break; + case 0x1c: + switch(rm) { + case 0: /* feni (287 only, just do nop here) */ + break; + case 1: /* fdisi (287 only, just do nop here) */ + break; + case 2: /* fclex */ + gen_helper_fclex(); + break; + case 3: /* fninit */ + gen_helper_fninit(); + break; + case 4: /* fsetpm (287 only, just do nop here) */ + break; + default: + goto illegal_op; + } + break; + case 0x1d: /* fucomi */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucomi_ST0_FT0(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x1e: /* fcomi */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcomi_ST0_FT0(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x28: /* ffree sti */ + gen_helper_ffree_STN(tcg_const_i32(opreg)); + break; + case 0x2a: /* fst sti */ + gen_helper_fmov_STN_ST0(tcg_const_i32(opreg)); + break; + case 0x2b: /* fstp sti */ + case 0x0b: /* fstp1 sti, undocumented op */ + case 0x3a: /* fstp8 sti, undocumented op */ + case 0x3b: /* fstp9 sti, undocumented op */ + gen_helper_fmov_STN_ST0(tcg_const_i32(opreg)); + gen_helper_fpop(); + break; + case 0x2c: /* fucom st(i) */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucom_ST0_FT0(); + break; + case 0x2d: /* fucomp st(i) */ + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucom_ST0_FT0(); + gen_helper_fpop(); + break; + case 0x33: /* de/3 */ + switch(rm) { + case 1: /* fcompp */ + gen_helper_fmov_FT0_STN(tcg_const_i32(1)); + gen_helper_fcom_ST0_FT0(); + gen_helper_fpop(); + gen_helper_fpop(); + break; + default: + goto illegal_op; + } + break; + case 0x38: /* ffreep sti, undocumented op */ + gen_helper_ffree_STN(tcg_const_i32(opreg)); + gen_helper_fpop(); + break; + case 0x3c: /* df/4 */ + switch(rm) { + case 0: + gen_helper_fnstsw(cpu_tmp2_i32); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + break; + default: + goto illegal_op; + } + break; + case 0x3d: /* fucomip */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fucomi_ST0_FT0(); + gen_helper_fpop(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x3e: /* fcomip */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_fmov_FT0_STN(tcg_const_i32(opreg)); + gen_helper_fcomi_ST0_FT0(); + gen_helper_fpop(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x10 ... 0x13: /* fcmovxx */ + case 0x18 ... 0x1b: + { + int op1, l1; + static const uint8_t fcmov_cc[8] = { + (JCC_B << 1), + (JCC_Z << 1), + (JCC_BE << 1), + (JCC_P << 1), + }; + op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, op1, l1); + gen_helper_fmov_ST0_STN(tcg_const_i32(opreg)); + gen_set_label(l1); + } + break; + default: + goto illegal_op; + } + } + break; + /************************/ + /* string ops */ + + case 0xa4: /* movsS */ + case 0xa5: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_movs(s, ot); + } + break; + + case 0xaa: /* stosS */ + case 0xab: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_stos(s, ot); + } + break; + case 0xac: /* lodsS */ + case 0xad: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_lods(s, ot); + } + break; + case 0xae: /* scasS */ + case 0xaf: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + if (prefixes & PREFIX_REPNZ) { + gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + } else if (prefixes & PREFIX_REPZ) { + gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + } else { + gen_scas(s, ot); + s->cc_op = CC_OP_SUBB + ot; + } + break; + + case 0xa6: /* cmpsS */ + case 0xa7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag + OT_WORD; + if (prefixes & PREFIX_REPNZ) { + gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + } else if (prefixes & PREFIX_REPZ) { + gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + } else { + gen_cmps(s, ot); + s->cc_op = CC_OP_SUBB + ot; + } + break; + case 0x6c: /* insS */ + case 0x6d: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4); + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_ins(s, ot); + if (use_icount) { + gen_jmp(s, s->pc - s->cs_base); + } + } + break; + case 0x6e: /* outsS */ + case 0x6f: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + svm_is_rep(prefixes) | 4); + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + } else { + gen_outs(s, ot); + if (use_icount) { + gen_jmp(s, s->pc - s->cs_base); + } + } + break; + + /************************/ + /* port I/O */ + + case 0xe4: + case 0xe5: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + val = ldub_code(s->pc++); + gen_op_movl_T0_im(val); + gen_check_io(s, ot, pc_start - s->cs_base, + SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32); + gen_op_mov_reg_T1(ot, R_EAX); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0xe6: + case 0xe7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + val = ldub_code(s->pc++); + gen_op_movl_T0_im(val); + gen_check_io(s, ot, pc_start - s->cs_base, + svm_is_rep(prefixes)); + gen_op_mov_TN_reg(ot, 1, R_EAX); + + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]); + gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0xec: + case 0xed: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32); + gen_op_mov_reg_T1(ot, R_EAX); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0xee: + case 0xef: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + gen_op_mov_TN_reg(OT_WORD, 0, R_EDX); + gen_op_andl_T0_ffff(); + gen_check_io(s, ot, pc_start - s->cs_base, + svm_is_rep(prefixes)); + gen_op_mov_TN_reg(ot, 1, R_EAX); + + if (use_icount) + gen_io_start(); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]); + gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + + /************************/ + /* control */ + case 0xc2: /* ret im */ + val = ldsw_code(s->pc); + s->pc += 2; + gen_pop_T0(s); + if (CODE64(s) && s->dflag) + s->dflag = 2; + gen_stack_update(s, val + (2 << s->dflag)); + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 0xc3: /* ret */ + gen_pop_T0(s); + gen_pop_update(s); + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + gen_op_jmp_T0(); + gen_eob(s); + break; + case 0xca: /* lret im */ + val = ldsw_code(s->pc); + s->pc += 2; + do_lret: + if (s->pe && !s->vm86) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_lret_protected(tcg_const_i32(s->dflag), + tcg_const_i32(val)); + } else { + gen_stack_A0(s); + /* pop offset */ + gen_op_ld_T0_A0(1 + s->dflag + s->mem_index); + if (s->dflag == 0) + gen_op_andl_T0_ffff(); + /* NOTE: keeping EIP updated is not a problem in case of + exception */ + gen_op_jmp_T0(); + /* pop selector */ + gen_op_addl_A0_im(2 << s->dflag); + gen_op_ld_T0_A0(1 + s->dflag + s->mem_index); + gen_op_movl_seg_T0_vm(R_CS); + /* add stack offset */ + gen_stack_update(s, val + (4 << s->dflag)); + } + gen_eob(s); + break; + case 0xcb: /* lret */ + val = 0; + goto do_lret; + case 0xcf: /* iret */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET); + if (!s->pe) { + /* real mode */ + gen_helper_iret_real(tcg_const_i32(s->dflag)); + s->cc_op = CC_OP_EFLAGS; + } else if (s->vm86) { +#ifdef VBOX + if (s->iopl != 3 && (!s->vme || s->dflag)) { +#else + if (s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_helper_iret_real(tcg_const_i32(s->dflag)); + s->cc_op = CC_OP_EFLAGS; + } + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_iret_protected(tcg_const_i32(s->dflag), + tcg_const_i32(s->pc - s->cs_base)); + s->cc_op = CC_OP_EFLAGS; + } + gen_eob(s); + break; + case 0xe8: /* call im */ + { + if (dflag) + tval = (int32_t)insn_get(s, OT_LONG); + else + tval = (int16_t)insn_get(s, OT_WORD); + next_eip = s->pc - s->cs_base; + tval += next_eip; + if (s->dflag == 0) + tval &= 0xffff; + else if(!CODE64(s)) + tval &= 0xffffffff; + gen_movtl_T0_im(next_eip); + gen_push_T0(s); + gen_jmp(s, tval); + } + break; + case 0x9a: /* lcall im */ + { + unsigned int selector, offset; + + if (CODE64(s)) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + offset = insn_get(s, ot); + selector = insn_get(s, OT_WORD); + + gen_op_movl_T0_im(selector); + gen_op_movl_T1_imu(offset); + } + goto do_lcall; + case 0xe9: /* jmp im */ + if (dflag) + tval = (int32_t)insn_get(s, OT_LONG); + else + tval = (int16_t)insn_get(s, OT_WORD); + tval += s->pc - s->cs_base; + if (s->dflag == 0) + tval &= 0xffff; + else if(!CODE64(s)) + tval &= 0xffffffff; + gen_jmp(s, tval); + break; + case 0xea: /* ljmp im */ + { + unsigned int selector, offset; + + if (CODE64(s)) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + offset = insn_get(s, ot); + selector = insn_get(s, OT_WORD); + + gen_op_movl_T0_im(selector); + gen_op_movl_T1_imu(offset); + } + goto do_ljmp; + case 0xeb: /* jmp Jb */ + tval = (int8_t)insn_get(s, OT_BYTE); + tval += s->pc - s->cs_base; + if (s->dflag == 0) + tval &= 0xffff; + gen_jmp(s, tval); + break; + case 0x70 ... 0x7f: /* jcc Jb */ + tval = (int8_t)insn_get(s, OT_BYTE); + goto do_jcc; + case 0x180 ... 0x18f: /* jcc Jv */ + if (dflag) { + tval = (int32_t)insn_get(s, OT_LONG); + } else { + tval = (int16_t)insn_get(s, OT_WORD); + } + do_jcc: + next_eip = s->pc - s->cs_base; + tval += next_eip; + if (s->dflag == 0) + tval &= 0xffff; + gen_jcc(s, b, tval, next_eip); + break; + + case 0x190 ... 0x19f: /* setcc Gv */ + modrm = ldub_code(s->pc++); + gen_setcc(s, b); + gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1); + break; + case 0x140 ... 0x14f: /* cmov Gv, Ev */ + { + int l1; + TCGv t0; + + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + t0 = tcg_temp_local_new(); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, t0, rm); + } +#ifdef TARGET_X86_64 + if (ot == OT_LONG) { + /* XXX: specific Intel behaviour ? */ + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + tcg_gen_mov_tl(cpu_regs[reg], t0); + gen_set_label(l1); + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); + } else +#endif + { + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + gen_op_mov_reg_v(ot, reg, t0); + gen_set_label(l1); + } + tcg_temp_free(t0); + } + break; + + /************************/ + /* flags */ + case 0x9c: /* pushf */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF); +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) { +#else + if (s->vm86 && s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); +#ifdef VBOX + if (s->vm86 && s->vme && s->iopl != 3) + gen_helper_read_eflags_vme(cpu_T[0]); + else +#endif + gen_helper_read_eflags(cpu_T[0]); + gen_push_T0(s); + } + break; + case 0x9d: /* popf */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF); +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && (!s->vme || s->dflag)) { +#else + if (s->vm86 && s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_pop_T0(s); + if (s->cpl == 0) { + if (s->dflag) { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK))); + } else { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff)); + } + } else { + if (s->cpl <= s->iopl) { + if (s->dflag) { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK))); + } else { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff)); + } + } else { + if (s->dflag) { + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK))); + } else { +#ifdef VBOX + if (s->vm86 && s->vme) + gen_helper_write_eflags_vme(cpu_T[0]); + else +#endif + gen_helper_write_eflags(cpu_T[0], + tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff)); + } + } + } + gen_pop_update(s); + s->cc_op = CC_OP_EFLAGS; + /* abort translation because TF flag may change */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 0x9e: /* sahf */ + if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) + goto illegal_op; + gen_op_mov_TN_reg(OT_BYTE, 0, R_AH); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x9f: /* lahf */ + if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_T[0]); + /* Note: gen_compute_eflags() only gives the condition codes */ + tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02); + gen_op_mov_reg_T0(OT_BYTE, R_AH); + break; + case 0xf5: /* cmc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xf8: /* clc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xf9: /* stc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xfc: /* cld */ + tcg_gen_movi_i32(cpu_tmp2_i32, 1); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df)); + break; + case 0xfd: /* std */ + tcg_gen_movi_i32(cpu_tmp2_i32, -1); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df)); + break; + + /************************/ + /* bit operations */ + case 0x1ba: /* bt/bts/btr/btc Gv, im */ + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + op = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod != 3) { + s->rip_offset = 1; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + /* load shift */ + val = ldub_code(s->pc++); + gen_op_movl_T1_im(val); + if (op < 4) + goto illegal_op; + op -= 4; + goto bt_op; + case 0x1a3: /* bt Gv, Ev */ + op = 0; + goto do_btx; + case 0x1ab: /* bts */ + op = 1; + goto do_btx; + case 0x1b3: /* btr */ + op = 2; + goto do_btx; + case 0x1bb: /* btc */ + op = 3; + do_btx: + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + gen_op_mov_TN_reg(OT_LONG, 1, reg); + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + /* specific case: we need to add a displacement */ + gen_exts(ot, cpu_T[1]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot); + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot); + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0); + gen_op_ld_T0_A0(ot + s->mem_index); + } else { + gen_op_mov_TN_reg(ot, 0, rm); + } + bt_op: + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); + switch(op) { + case 0: + tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_cc_dst, 0); + break; + case 1: + tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_tmp0, 1); + tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; + case 2: + tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_tmp0, 1); + tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); + tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; + default: + case 3: + tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); + tcg_gen_movi_tl(cpu_tmp0, 1); + tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; + } + s->cc_op = CC_OP_SARB + ot; + if (op != 0) { + if (mod != 3) + gen_op_st_T0_A0(ot + s->mem_index); + else + gen_op_mov_reg_T0(ot, rm); + tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); + tcg_gen_movi_tl(cpu_cc_dst, 0); + } + break; + case 0x1bc: /* bsf */ + case 0x1bd: /* bsr */ + { + int label1; + TCGv t0; + + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_ldst_modrm(s,modrm, ot, OR_TMP0, 0); + gen_extu(ot, cpu_T[0]); + t0 = tcg_temp_local_new(); + tcg_gen_mov_tl(t0, cpu_T[0]); + if ((b & 1) && (prefixes & PREFIX_REPZ) && + (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { + switch(ot) { + case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(16)); break; + case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(32)); break; + case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(64)); break; + } + gen_op_mov_reg_T0(ot, reg); + } else { + label1 = gen_new_label(); + tcg_gen_movi_tl(cpu_cc_dst, 0); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); + if (b & 1) { + gen_helper_bsr(cpu_T[0], t0); + } else { + gen_helper_bsf(cpu_T[0], t0); + } + gen_op_mov_reg_T0(ot, reg); + tcg_gen_movi_tl(cpu_cc_dst, 1); + gen_set_label(label1); + tcg_gen_discard_tl(cpu_cc_src); + s->cc_op = CC_OP_LOGICB + ot; + } + tcg_temp_free(t0); + } + break; + /************************/ + /* bcd */ + case 0x27: /* daa */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_daa(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x2f: /* das */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_das(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x37: /* aaa */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_aaa(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x3f: /* aas */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_helper_aas(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xd4: /* aam */ + if (CODE64(s)) + goto illegal_op; + val = ldub_code(s->pc++); + if (val == 0) { + gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base); + } else { + gen_helper_aam(tcg_const_i32(val)); + s->cc_op = CC_OP_LOGICB; + } + break; + case 0xd5: /* aad */ + if (CODE64(s)) + goto illegal_op; + val = ldub_code(s->pc++); + gen_helper_aad(tcg_const_i32(val)); + s->cc_op = CC_OP_LOGICB; + break; + /************************/ + /* misc */ + case 0x90: /* nop */ + /* XXX: correct lock test for all insn */ + if (prefixes & PREFIX_LOCK) { + goto illegal_op; + } + /* If REX_B is set, then this is xchg eax, r8d, not a nop. */ + if (REX_B(s)) { + goto do_xchg_reg_eax; + } + if (prefixes & PREFIX_REPZ) { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_PAUSE); + } + break; + case 0x9b: /* fwait */ + if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == + (HF_MP_MASK | HF_TS_MASK)) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fwait(); + } + break; + case 0xcc: /* int3 */ +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && !s->vme) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else +#endif + gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base); + break; + case 0xcd: /* int N */ + val = ldub_code(s->pc++); +#ifdef VBOX + if (s->vm86 && s->iopl != 3 && !s->vme) { +#else + if (s->vm86 && s->iopl != 3) { +#endif + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base); + } + break; + case 0xce: /* into */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_into(tcg_const_i32(s->pc - pc_start)); + break; +#ifdef WANT_ICEBP + case 0xf1: /* icebp (undocumented, exits to external debugger) */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP); +#if 1 + gen_debug(s, pc_start - s->cs_base); +#else + /* start debug */ + tb_flush(cpu_single_env); + cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM); +#endif + break; +#endif + case 0xfa: /* cli */ + if (!s->vm86) { + if (s->cpl <= s->iopl) { + gen_helper_cli(); + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } else { + if (s->iopl == 3) { + gen_helper_cli(); +#ifdef VBOX + } else if (s->iopl != 3 && s->vme) { + gen_helper_cli_vme(); +#endif + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } + break; + case 0xfb: /* sti */ + if (!s->vm86) { + if (s->cpl <= s->iopl) { + gen_sti: + gen_helper_sti(); + /* interruptions are enabled only the first insn after sti */ + /* If several instructions disable interrupts, only the + _first_ does it */ + if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK)) + gen_helper_set_inhibit_irq(); + /* give a chance to handle pending irqs */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } else { + if (s->iopl == 3) { + goto gen_sti; +#ifdef VBOX + } else if (s->iopl != 3 && s->vme) { + gen_helper_sti_vme(); + /* give a chance to handle pending irqs */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); +#endif + } else { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + } + break; + case 0x62: /* bound */ + if (CODE64(s)) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + gen_op_mov_TN_reg(ot, 0, reg); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + if (ot == OT_WORD) + gen_helper_boundw(cpu_A0, cpu_tmp2_i32); + else + gen_helper_boundl(cpu_A0, cpu_tmp2_i32); + break; + case 0x1c8 ... 0x1cf: /* bswap reg */ + reg = (b & 7) | REX_B(s); +#ifdef TARGET_X86_64 + if (dflag == 2) { + gen_op_mov_TN_reg(OT_QUAD, 0, reg); + tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_QUAD, reg); + } else +#endif + { + gen_op_mov_TN_reg(OT_LONG, 0, reg); + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_LONG, reg); + } + break; + case 0xd6: /* salc */ + if (CODE64(s)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags_c(cpu_T[0]); + tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(OT_BYTE, R_EAX); + break; + case 0xe0: /* loopnz */ + case 0xe1: /* loopz */ + case 0xe2: /* loop */ + case 0xe3: /* jecxz */ + { + int l1, l2, l3; + + tval = (int8_t)insn_get(s, OT_BYTE); + next_eip = s->pc - s->cs_base; + tval += next_eip; + if (s->dflag == 0) + tval &= 0xffff; + + l1 = gen_new_label(); + l2 = gen_new_label(); + l3 = gen_new_label(); + b &= 3; + switch(b) { + case 0: /* loopnz */ + case 1: /* loopz */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_op_add_reg_im(s->aflag, R_ECX, -1); + gen_op_jz_ecx(s->aflag, l3); + gen_compute_eflags(cpu_tmp0); + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z); + if (b == 0) { + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1); + } else { + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1); + } + break; + case 2: /* loop */ + gen_op_add_reg_im(s->aflag, R_ECX, -1); + gen_op_jnz_ecx(s->aflag, l1); + break; + default: + case 3: /* jcxz */ + gen_op_jz_ecx(s->aflag, l1); + break; + } + + gen_set_label(l3); + gen_jmp_im(next_eip); + tcg_gen_br(l2); + + gen_set_label(l1); + gen_jmp_im(tval); + gen_set_label(l2); + gen_eob(s); + } + break; + case 0x130: /* wrmsr */ + case 0x132: /* rdmsr */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (b & 2) { + gen_helper_rdmsr(); + } else { + gen_helper_wrmsr(); + } + } + break; + case 0x131: /* rdtsc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (use_icount) + gen_io_start(); + gen_helper_rdtsc(); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + case 0x133: /* rdpmc */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_rdpmc(); + break; + case 0x134: /* sysenter */ +#ifndef VBOX + /* For Intel SYSENTER is valid on 64-bit */ + if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) +#else + if ( !(cpu_single_env->cpuid_features & CPUID_SEP) + || ( IS_LONG_MODE(s) + && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL)) +#endif + goto illegal_op; + if (!s->pe) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_sysenter(); + gen_eob(s); + } + break; + case 0x135: /* sysexit */ +#ifndef VBOX + /* For Intel SYSEXIT is valid on 64-bit */ + if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) +#else + if ( !(cpu_single_env->cpuid_features & CPUID_SEP) + || ( IS_LONG_MODE(s) + && CPUMGetGuestCpuVendor(cpu_single_env->pVM) != CPUMCPUVENDOR_INTEL)) +#endif + goto illegal_op; + if (!s->pe) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_sysexit(tcg_const_i32(dflag)); + gen_eob(s); + } + break; +#ifdef TARGET_X86_64 + case 0x105: /* syscall */ + /* XXX: is it usable in real mode ? */ + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_syscall(tcg_const_i32(s->pc - pc_start)); + gen_eob(s); + break; + case 0x107: /* sysret */ + if (!s->pe) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_sysret(tcg_const_i32(s->dflag)); + /* condition codes are modified only in long mode */ + if (s->lma) + s->cc_op = CC_OP_EFLAGS; + gen_eob(s); + } + break; +#endif + case 0x1a2: /* cpuid */ + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_cpuid(); + break; + case 0xf4: /* hlt */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_hlt(tcg_const_i32(s->pc - pc_start)); + s->is_jmp = DISAS_TB_JUMP; + } + break; + case 0x100: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + switch(op) { + case 0: /* sldt */ + if (!s->pe || s->vm86) + goto illegal_op; + gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector)); + ot = OT_WORD; + if (mod == 3) + ot += s->dflag; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + case 2: /* lldt */ + if (!s->pe || s->vm86) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE); + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_lldt(cpu_tmp2_i32); + } + break; + case 1: /* str */ + if (!s->pe || s->vm86) + goto illegal_op; + gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector)); + ot = OT_WORD; + if (mod == 3) + ot += s->dflag; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + case 3: /* ltr */ + if (!s->pe || s->vm86) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE); + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_jmp_im(pc_start - s->cs_base); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_helper_ltr(cpu_tmp2_i32); + } + break; + case 4: /* verr */ + case 5: /* verw */ + if (!s->pe || s->vm86) + goto illegal_op; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + if (op == 4) + gen_helper_verr(cpu_T[0]); + else + gen_helper_verw(cpu_T[0]); + s->cc_op = CC_OP_EFLAGS; + break; + default: + goto illegal_op; + } + break; + case 0x101: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + rm = modrm & 7; + switch(op) { + case 0: /* sgdt */ + if (mod == 3) + goto illegal_op; + gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit)); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + gen_add_A0_im(s, 2); + tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base)); + gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index); + break; + case 1: + if (mod == 3) { + switch (rm) { + case 0: /* monitor */ + if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || + s->cpl != 0) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); +#ifdef TARGET_X86_64 + if (s->aflag == 2) { + gen_op_movq_A0_reg(R_EAX); + } else +#endif + { + gen_op_movl_A0_reg(R_EAX); + if (s->aflag == 0) + gen_op_andl_A0_ffff(); + } + gen_add_A0_ds_seg(s); + gen_helper_monitor(cpu_A0); + break; + case 1: /* mwait */ + if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || + s->cpl != 0) + goto illegal_op; + gen_update_cc_op(s); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_mwait(tcg_const_i32(s->pc - pc_start)); + gen_eob(s); + break; + default: + goto illegal_op; + } + } else { /* sidt */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit)); + gen_op_st_T0_A0(OT_WORD + s->mem_index); + gen_add_A0_im(s, 2); + tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base)); + gen_op_st_T0_A0(CODE64(s) + OT_LONG + s->mem_index); + } + break; + case 2: /* lgdt */ + case 3: /* lidt */ + if (mod == 3) { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + switch(rm) { + case 0: /* VMRUN */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_vmrun(tcg_const_i32(s->aflag), + tcg_const_i32(s->pc - pc_start)); + tcg_gen_exit_tb(0); + s->is_jmp = DISAS_TB_JUMP; + } + break; + case 1: /* VMMCALL */ + if (!(s->flags & HF_SVME_MASK)) + goto illegal_op; + gen_helper_vmmcall(); + break; + case 2: /* VMLOAD */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_vmload(tcg_const_i32(s->aflag)); + } + break; + case 3: /* VMSAVE */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_vmsave(tcg_const_i32(s->aflag)); + } + break; + case 4: /* STGI */ + if ((!(s->flags & HF_SVME_MASK) && + !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || + !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_stgi(); + } + break; + case 5: /* CLGI */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_clgi(); + } + break; + case 6: /* SKINIT */ + if ((!(s->flags & HF_SVME_MASK) && + !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || + !s->pe) + goto illegal_op; + gen_helper_skinit(); + break; + case 7: /* INVLPGA */ + if (!(s->flags & HF_SVME_MASK) || !s->pe) + goto illegal_op; + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } else { + gen_helper_invlpga(tcg_const_i32(s->aflag)); + } + break; + default: + goto illegal_op; + } + } else if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, + op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0(OT_WORD + s->mem_index); + gen_add_A0_im(s, 2); + gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index); + if (!s->dflag) + gen_op_andl_T0_im(0xffffff); + if (op == 2) { + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base)); + tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit)); + } else { + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base)); + tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit)); + } + } + break; + case 4: /* smsw */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); +#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4); +#else + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0])); +#endif + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1); + break; + case 6: /* lmsw */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + gen_helper_lmsw(cpu_T[0]); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + case 7: + if (mod != 3) { /* invlpg */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_helper_invlpg(cpu_A0); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + } else { + switch (rm) { + case 0: /* swapgs */ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + tcg_gen_ld_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,segs[R_GS].base)); + tcg_gen_ld_tl(cpu_T[1], cpu_env, + offsetof(CPUX86State,kernelgsbase)); + tcg_gen_st_tl(cpu_T[1], cpu_env, + offsetof(CPUX86State,segs[R_GS].base)); + tcg_gen_st_tl(cpu_T[0], cpu_env, + offsetof(CPUX86State,kernelgsbase)); + } + } else +#endif + { + goto illegal_op; + } + break; + case 1: /* rdtscp */ + if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) + goto illegal_op; + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (use_icount) + gen_io_start(); + gen_helper_rdtscp(); + if (use_icount) { + gen_io_end(); + gen_jmp(s, s->pc - s->cs_base); + } + break; + default: + goto illegal_op; + } + } + break; + default: + goto illegal_op; + } + break; + case 0x108: /* invd */ + case 0x109: /* wbinvd */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD); + /* nothing to do */ + } + break; + case 0x63: /* arpl or movslS (x86_64) */ +#ifdef TARGET_X86_64 + if (CODE64(s)) { + int d_ot; + /* d_ot is the size of destination */ + d_ot = dflag + OT_WORD; + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + + if (mod == 3) { + gen_op_mov_TN_reg(OT_LONG, 0, rm); + /* sign extend */ + if (d_ot == OT_QUAD) + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + gen_op_mov_reg_T0(d_ot, reg); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (d_ot == OT_QUAD) { + gen_op_lds_T0_A0(OT_LONG + s->mem_index); + } else { + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + } + gen_op_mov_reg_T0(d_ot, reg); + } + } else +#endif + { + int label1; + TCGv t0, t1, t2, a0; + + if (!s->pe || s->vm86) + goto illegal_op; + t0 = tcg_temp_local_new(); + t1 = tcg_temp_local_new(); + t2 = tcg_temp_local_new(); + ot = OT_WORD; + modrm = ldub_code(s->pc++); + reg = (modrm >> 3) & 7; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); + a0 = tcg_temp_local_new(); + tcg_gen_mov_tl(a0, cpu_A0); + } else { + gen_op_mov_v_reg(ot, t0, rm); + TCGV_UNUSED(a0); + } + gen_op_mov_v_reg(ot, t1, reg); + tcg_gen_andi_tl(cpu_tmp0, t0, 3); + tcg_gen_andi_tl(t1, t1, 3); + tcg_gen_movi_tl(t2, 0); + label1 = gen_new_label(); + tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1); + tcg_gen_andi_tl(t0, t0, ~3); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_movi_tl(t2, CC_Z); + gen_set_label(label1); + if (mod != 3) { + gen_op_st_v(ot + s->mem_index, t0, a0); + tcg_temp_free(a0); + } else { + gen_op_mov_reg_v(ot, rm, t0); + } + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); + s->cc_op = CC_OP_EFLAGS; + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + } + break; + case 0x102: /* lar */ + case 0x103: /* lsl */ + { + int label1; + TCGv t0; + if (!s->pe || s->vm86) + goto illegal_op; + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0); + t0 = tcg_temp_local_new(); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + if (b == 0x102) + gen_helper_lar(t0, cpu_T[0]); + else + gen_helper_lsl(t0, cpu_T[0]); + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z); + label1 = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); + gen_op_mov_reg_v(ot, reg, t0); + gen_set_label(label1); + s->cc_op = CC_OP_EFLAGS; + tcg_temp_free(t0); + } + break; + case 0x118: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + switch(op) { + case 0: /* prefetchnta */ + case 1: /* prefetchnt0 */ + case 2: /* prefetchnt0 */ + case 3: /* prefetchnt0 */ + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + /* nothing more to do */ + break; + default: /* nop (multi byte) */ + gen_nop_modrm(s, modrm); + break; + } + break; + case 0x119 ... 0x11f: /* nop (multi byte) */ + modrm = ldub_code(s->pc++); + gen_nop_modrm(s, modrm); + break; + case 0x120: /* mov reg, crN */ + case 0x122: /* mov crN, reg */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + modrm = ldub_code(s->pc++); +#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */ + if ((modrm & 0xc0) != 0xc0) + goto illegal_op; +#endif + rm = (modrm & 7) | REX_B(s); + reg = ((modrm >> 3) & 7) | rex_r; + if (CODE64(s)) + ot = OT_QUAD; + else + ot = OT_LONG; + if ((prefixes & PREFIX_LOCK) && (reg == 0) && + (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) { + reg = 8; + } + switch(reg) { + case 0: + case 2: + case 3: + case 4: + case 8: + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + if (b & 2) { + gen_op_mov_TN_reg(ot, 0, rm); + gen_helper_write_crN(tcg_const_i32(reg), cpu_T[0]); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } else { + gen_helper_read_crN(cpu_T[0], tcg_const_i32(reg)); + gen_op_mov_reg_T0(ot, rm); + } + break; + default: + goto illegal_op; + } + } + break; + case 0x121: /* mov reg, drN */ + case 0x123: /* mov drN, reg */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + modrm = ldub_code(s->pc++); +#ifndef VBOX /* mod bits are always understood to be 11 (0xc0) regardless of actual content; see AMD manuals */ + if ((modrm & 0xc0) != 0xc0) + goto illegal_op; +#endif + rm = (modrm & 7) | REX_B(s); + reg = ((modrm >> 3) & 7) | rex_r; + if (CODE64(s)) + ot = OT_QUAD; + else + ot = OT_LONG; + /* XXX: do it dynamically with CR4.DE bit */ + if (reg == 4 || reg == 5 || reg >= 8) + goto illegal_op; + if (b & 2) { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg); + gen_op_mov_TN_reg(ot, 0, rm); + gen_helper_movl_drN_T0(tcg_const_i32(reg), cpu_T[0]); + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg); + tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg])); + gen_op_mov_reg_T0(ot, rm); + } + } + break; + case 0x106: /* clts */ + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } else { + gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); + gen_helper_clts(); + /* abort block because static cpu state changed */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob(s); + } + break; + /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */ + case 0x1c3: /* MOVNTI reg, mem */ + if (!(s->cpuid_features & CPUID_SSE2)) + goto illegal_op; + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + reg = ((modrm >> 3) & 7) | rex_r; + /* generate a generic store */ + gen_ldst_modrm(s, modrm, ot, reg, 1); + break; + case 0x1ae: + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + op = (modrm >> 3) & 7; + switch(op) { + case 0: /* fxsave */ + if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) || + (s->prefix & PREFIX_LOCK)) + goto illegal_op; + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fxsave(cpu_A0, tcg_const_i32((s->dflag == 2))); + break; + case 1: /* fxrstor */ + if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) || + (s->prefix & PREFIX_LOCK)) + goto illegal_op; + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); + gen_jmp_im(pc_start - s->cs_base); + gen_helper_fxrstor(cpu_A0, tcg_const_i32((s->dflag == 2))); + break; + case 2: /* ldmxcsr */ + case 3: /* stmxcsr */ + if (s->flags & HF_TS_MASK) { + gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + break; + } + if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) || + mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (op == 2) { + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr)); + } else { + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr)); + gen_op_st_T0_A0(OT_LONG + s->mem_index); + } + break; + case 5: /* lfence */ + case 6: /* mfence */ + if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE)) + goto illegal_op; + break; + case 7: /* sfence / clflush */ + if ((modrm & 0xc7) == 0xc0) { + /* sfence */ + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ + if (!(s->cpuid_features & CPUID_SSE)) + goto illegal_op; + } else { + /* clflush */ + if (!(s->cpuid_features & CPUID_CLFLUSH)) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + } + break; + default: + goto illegal_op; + } + break; + case 0x10d: /* 3DNow! prefetch(w) */ + modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + /* ignore for now */ + break; + case 0x1aa: /* rsm */ + gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM); + if (!(s->flags & HF_SMM_MASK)) + goto illegal_op; + gen_update_cc_op(s); + gen_jmp_im(s->pc - s->cs_base); + gen_helper_rsm(); + gen_eob(s); + break; + case 0x1b8: /* SSE4.2 popcnt */ + if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) != + PREFIX_REPZ) + goto illegal_op; + if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT)) + goto illegal_op; + + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7); + + if (s->prefix & PREFIX_DATA) + ot = OT_WORD; + else if (s->dflag != 2) + ot = OT_LONG; + else + ot = OT_QUAD; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_helper_popcnt(cpu_T[0], cpu_T[0], tcg_const_i32(ot)); + gen_op_mov_reg_T0(ot, reg); + + s->cc_op = CC_OP_EFLAGS; + break; + case 0x10e ... 0x10f: + /* 3DNow! instructions, ignore prefixes */ + s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); + case 0x110 ... 0x117: + case 0x128 ... 0x12f: + case 0x138 ... 0x13a: + case 0x150 ... 0x179: + case 0x17c ... 0x17f: + case 0x1c2: + case 0x1c4 ... 0x1c6: + case 0x1d0 ... 0x1fe: + gen_sse(s, b, pc_start, rex_r); + break; + default: + goto illegal_op; + } + /* lock generation */ + if (s->prefix & PREFIX_LOCK) + gen_helper_unlock(); + return s->pc; + illegal_op: + if (s->prefix & PREFIX_LOCK) + gen_helper_unlock(); + /* XXX: ensure that no lock was generated */ + gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + return s->pc; +} + +void optimize_flags_init(void) +{ +#if TCG_TARGET_REG_BITS == 32 + assert(sizeof(CCTable) == (1 << 3)); +#else + assert(sizeof(CCTable) == (1 << 4)); +#endif + cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); + cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, cc_op), "cc_op"); + cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_src), + "cc_src"); + cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_dst), + "cc_dst"); + cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp), + "cc_tmp"); + +#ifdef TARGET_X86_64 + cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EAX]), "rax"); + cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_ECX]), "rcx"); + cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EDX]), "rdx"); + cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EBX]), "rbx"); + cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_ESP]), "rsp"); + cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EBP]), "rbp"); + cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_ESI]), "rsi"); + cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[R_EDI]), "rdi"); + cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[8]), "r8"); + cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[9]), "r9"); + cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[10]), "r10"); + cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[11]), "r11"); + cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[12]), "r12"); + cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[13]), "r13"); + cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[14]), "r14"); + cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUState, regs[15]), "r15"); +#else + cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EAX]), "eax"); + cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_ECX]), "ecx"); + cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EDX]), "edx"); + cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EBX]), "ebx"); + cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_ESP]), "esp"); + cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EBP]), "ebp"); + cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_ESI]), "esi"); + cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[R_EDI]), "edi"); +#endif + + /* register helpers */ +#define GEN_HELPER 2 +#include "helper.h" +} + +/* generate intermediate code in gen_opc_buf and gen_opparam_buf for + basic block 'tb'. If search_pc is TRUE, also generate PC + information for each intermediate instruction. */ +static inline void gen_intermediate_code_internal(CPUState *env, + TranslationBlock *tb, + int search_pc) +{ + DisasContext dc1, *dc = &dc1; + target_ulong pc_ptr; + uint16_t *gen_opc_end; + CPUBreakpoint *bp; + int j, lj; + uint64_t flags; + target_ulong pc_start; + target_ulong cs_base; + int num_insns; + int max_insns; +#ifdef VBOX + int const singlestep = env->state & CPU_EMULATE_SINGLE_STEP; +#endif + + /* generate intermediate code */ + pc_start = tb->pc; + cs_base = tb->cs_base; + flags = tb->flags; + + dc->pe = (flags >> HF_PE_SHIFT) & 1; + dc->code32 = (flags >> HF_CS32_SHIFT) & 1; + dc->ss32 = (flags >> HF_SS32_SHIFT) & 1; + dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1; + dc->f_st = 0; + dc->vm86 = (flags >> VM_SHIFT) & 1; +#ifdef VBOX + dc->vme = !!(env->cr[4] & CR4_VME_MASK); + dc->pvi = !!(env->cr[4] & CR4_PVI_MASK); +# ifdef VBOX_WITH_CALL_RECORD + if ( !(env->state & CPU_RAW_RING0) + && (env->cr[0] & CR0_PG_MASK) + && !(env->eflags & X86_EFL_IF) + && dc->code32) + dc->record_call = 1; + else + dc->record_call = 0; +# endif +#endif /* VBOX */ + dc->cpl = (flags >> HF_CPL_SHIFT) & 3; + dc->iopl = (flags >> IOPL_SHIFT) & 3; + dc->tf = (flags >> TF_SHIFT) & 1; + dc->singlestep_enabled = env->singlestep_enabled; + dc->cc_op = CC_OP_DYNAMIC; + dc->cs_base = cs_base; + dc->tb = tb; + dc->popl_esp_hack = 0; + /* select memory access functions */ + dc->mem_index = 0; + if (flags & HF_SOFTMMU_MASK) { + if (dc->cpl == 3) + dc->mem_index = 2 * 4; + else + dc->mem_index = 1 * 4; + } + dc->cpuid_features = env->cpuid_features; + dc->cpuid_ext_features = env->cpuid_ext_features; + dc->cpuid_ext2_features = env->cpuid_ext2_features; + dc->cpuid_ext3_features = env->cpuid_ext3_features; +#ifdef TARGET_X86_64 + dc->lma = (flags >> HF_LMA_SHIFT) & 1; + dc->code64 = (flags >> HF_CS64_SHIFT) & 1; +#endif + dc->flags = flags; + dc->jmp_opt = !(dc->tf || env->singlestep_enabled || + (flags & HF_INHIBIT_IRQ_MASK) +#ifndef CONFIG_SOFTMMU + || (flags & HF_SOFTMMU_MASK) +#endif + ); +#if 0 + /* check addseg logic */ + if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32)) + printf("ERROR addseg\n"); +#endif + + cpu_T[0] = tcg_temp_new(); + cpu_T[1] = tcg_temp_new(); + cpu_A0 = tcg_temp_new(); + cpu_T3 = tcg_temp_new(); + + cpu_tmp0 = tcg_temp_new(); + cpu_tmp1_i64 = tcg_temp_new_i64(); + cpu_tmp2_i32 = tcg_temp_new_i32(); + cpu_tmp3_i32 = tcg_temp_new_i32(); + cpu_tmp4 = tcg_temp_new(); + cpu_tmp5 = tcg_temp_new(); + cpu_ptr0 = tcg_temp_new_ptr(); + cpu_ptr1 = tcg_temp_new_ptr(); + + gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; + + dc->is_jmp = DISAS_NEXT; + pc_ptr = pc_start; + lj = -1; + num_insns = 0; + max_insns = tb->cflags & CF_COUNT_MASK; + if (max_insns == 0) + max_insns = CF_COUNT_MASK; + + gen_icount_start(); + for(;;) { + if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { + QTAILQ_FOREACH(bp, &env->breakpoints, entry) { + if (bp->pc == pc_ptr && + !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) { + gen_debug(dc, pc_ptr - dc->cs_base); + break; + } + } + } + if (search_pc) { + j = gen_opc_ptr - gen_opc_buf; + if (lj < j) { + lj++; + while (lj < j) + gen_opc_instr_start[lj++] = 0; + } + gen_opc_pc[lj] = pc_ptr; + gen_opc_cc_op[lj] = dc->cc_op; + gen_opc_instr_start[lj] = 1; + gen_opc_icount[lj] = num_insns; + } + if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) + gen_io_start(); + + pc_ptr = disas_insn(dc, pc_ptr); + num_insns++; + /* stop translation if indicated */ + if (dc->is_jmp) + break; +#ifdef VBOX +# ifdef DEBUG +/* + if(cpu_check_code_raw(env, pc_ptr, env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK))) == ERROR_SUCCESS) + { + //should never happen as the jump to the patch code terminates the translation block + dprintf(("QEmu is about to execute instructions in our patch block at %08X!!\n", pc_ptr)); + } +*/ +# endif /* DEBUG */ + if (env->state & CPU_EMULATE_SINGLE_INSTR) + { + env->state &= ~CPU_EMULATE_SINGLE_INSTR; + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } +#endif /* VBOX */ + + /* if single step mode, we generate only one instruction and + generate an exception */ + /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear + the flag and abort the translation to give the irqs a + change to be happen */ + if (dc->tf || dc->singlestep_enabled || + (flags & HF_INHIBIT_IRQ_MASK)) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + /* if too long translation, stop generation too */ + if (gen_opc_ptr >= gen_opc_end || + (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) || + num_insns >= max_insns) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + if (singlestep) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + } + if (tb->cflags & CF_LAST_IO) + gen_io_end(); + gen_icount_end(tb, num_insns); + *gen_opc_ptr = INDEX_op_end; + /* we don't forget to fill the last values */ + if (search_pc) { + j = gen_opc_ptr - gen_opc_buf; + lj++; + while (lj <= j) + gen_opc_instr_start[lj++] = 0; + } + +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + int disas_flags; + qemu_log("----------------\n"); + qemu_log("IN: %s\n", lookup_symbol(pc_start)); +#ifdef TARGET_X86_64 + if (dc->code64) + disas_flags = 2; + else +#endif + disas_flags = !dc->code32; + log_target_disas(pc_start, pc_ptr - pc_start, disas_flags); + qemu_log("\n"); + } +#endif + + if (!search_pc) { + tb->size = pc_ptr - pc_start; + tb->icount = num_insns; + } +} + +void gen_intermediate_code(CPUState *env, TranslationBlock *tb) +{ + gen_intermediate_code_internal(env, tb, 0); +} + +void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb) +{ + gen_intermediate_code_internal(env, tb, 1); +} + +void gen_pc_load(CPUState *env, TranslationBlock *tb, + uintptr_t searched_pc, int pc_pos, void *puc) +{ + int cc_op; +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_OP)) { + int i; + qemu_log("RESTORE:\n"); + for(i = 0;i <= pc_pos; i++) { + if (gen_opc_instr_start[i]) { + qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]); + } + } + qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n", + searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base, + (uint32_t)tb->cs_base); + } +#endif + env->eip = gen_opc_pc[pc_pos] - tb->cs_base; + cc_op = gen_opc_cc_op[pc_pos]; + if (cc_op != CC_OP_DYNAMIC) + env->cc_op = cc_op; +} diff --git a/src/recompiler/targphys.h b/src/recompiler/targphys.h new file mode 100644 index 00000000..95648d68 --- /dev/null +++ b/src/recompiler/targphys.h @@ -0,0 +1,21 @@ +/* Define target_phys_addr_t if it exists. */ + +#ifndef TARGPHYS_H +#define TARGPHYS_H + +#ifdef TARGET_PHYS_ADDR_BITS +/* target_phys_addr_t is the type of a physical address (its size can + be different from 'target_ulong'). */ + +#if TARGET_PHYS_ADDR_BITS == 32 +typedef uint32_t target_phys_addr_t; +#define TARGET_PHYS_ADDR_MAX UINT32_MAX +#define TARGET_FMT_plx "%08x" +#elif TARGET_PHYS_ADDR_BITS == 64 +typedef uint64_t target_phys_addr_t; +#define TARGET_PHYS_ADDR_MAX UINT64_MAX +#define TARGET_FMT_plx "%016" PRIx64 +#endif +#endif + +#endif diff --git a/src/recompiler/tcg-runtime.c b/src/recompiler/tcg-runtime.c new file mode 100644 index 00000000..ab2df5f0 --- /dev/null +++ b/src/recompiler/tcg-runtime.c @@ -0,0 +1,89 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef VBOX +#include <stdint.h> +#else +# include <VBox/types.h> +#endif + +#include "tcg/tcg-runtime.h" + +/* 32-bit helpers */ + +int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2) +{ + return arg1 / arg2; +} + +int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2) +{ + return arg1 % arg2; +} + +uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2) +{ + return arg1 / arg2; +} + +uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2) +{ + return arg1 % arg2; +} + +/* 64-bit helpers */ + +int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2) +{ + return arg1 << arg2; +} + +int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2) +{ + return (uint64_t)arg1 >> arg2; +} + +int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2) +{ + return arg1 >> arg2; +} + +int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2) +{ + return arg1 / arg2; +} + +int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2) +{ + return arg1 % arg2; +} + +uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2) +{ + return arg1 / arg2; +} + +uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2) +{ + return arg1 % arg2; +} diff --git a/src/recompiler/tcg/LICENSE b/src/recompiler/tcg/LICENSE new file mode 100644 index 00000000..be817fa1 --- /dev/null +++ b/src/recompiler/tcg/LICENSE @@ -0,0 +1,3 @@ +All the files in this directory and subdirectories are released under +a BSD like license (see header in each file). No other license is +accepted. diff --git a/src/recompiler/tcg/Makefile.kup b/src/recompiler/tcg/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/tcg/Makefile.kup diff --git a/src/recompiler/tcg/README b/src/recompiler/tcg/README new file mode 100644 index 00000000..8d6fe059 --- /dev/null +++ b/src/recompiler/tcg/README @@ -0,0 +1,497 @@ +Tiny Code Generator - Fabrice Bellard. + +1) Introduction + +TCG (Tiny Code Generator) began as a generic backend for a C +compiler. It was simplified to be used in QEMU. It also has its roots +in the QOP code generator written by Paul Brook. + +2) Definitions + +The TCG "target" is the architecture for which we generate the +code. It is of course not the same as the "target" of QEMU which is +the emulated architecture. As TCG started as a generic C backend used +for cross compiling, it is assumed that the TCG target is different +from the host, although it is never the case for QEMU. + +A TCG "function" corresponds to a QEMU Translated Block (TB). + +A TCG "temporary" is a variable only live in a basic +block. Temporaries are allocated explicitly in each function. + +A TCG "local temporary" is a variable only live in a function. Local +temporaries are allocated explicitly in each function. + +A TCG "global" is a variable which is live in all the functions +(equivalent of a C global variable). They are defined before the +functions defined. A TCG global can be a memory location (e.g. a QEMU +CPU register), a fixed host register (e.g. the QEMU CPU state pointer) +or a memory location which is stored in a register outside QEMU TBs +(not implemented yet). + +A TCG "basic block" corresponds to a list of instructions terminated +by a branch instruction. + +3) Intermediate representation + +3.1) Introduction + +TCG instructions operate on variables which are temporaries, local +temporaries or globals. TCG instructions and variables are strongly +typed. Two types are supported: 32 bit integers and 64 bit +integers. Pointers are defined as an alias to 32 bit or 64 bit +integers depending on the TCG target word size. + +Each instruction has a fixed number of output variable operands, input +variable operands and always constant operands. + +The notable exception is the call instruction which has a variable +number of outputs and inputs. + +In the textual form, output operands usually come first, followed by +input operands, followed by constant operands. The output type is +included in the instruction name. Constants are prefixed with a '$'. + +add_i32 t0, t1, t2 (t0 <- t1 + t2) + +3.2) Assumptions + +* Basic blocks + +- Basic blocks end after branches (e.g. brcond_i32 instruction), + goto_tb and exit_tb instructions. +- Basic blocks start after the end of a previous basic block, or at a + set_label instruction. + +After the end of a basic block, the content of temporaries is +destroyed, but local temporaries and globals are preserved. + +* Floating point types are not supported yet + +* Pointers: depending on the TCG target, pointer size is 32 bit or 64 + bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or + TCG_TYPE_I64. + +* Helpers: + +Using the tcg_gen_helper_x_y it is possible to call any function +taking i32, i64 or pointer types. By default, before calling an helper, +all globals are stored at their canonical location and it is assumed +that the function can modify them. This can be overriden by the +TCG_CALL_CONST function modifier. By default, the helper is allowed to +modify the CPU state or raise an exception. This can be overriden by +the TCG_CALL_PURE function modifier, in which case the call to the +function is removed if the return value is not used. + +On some TCG targets (e.g. x86), several calling conventions are +supported. + +* Branches: + +Use the instruction 'br' to jump to a label. Use 'jmp' to jump to an +explicit address. Conditional branches can only jump to labels. + +3.3) Code Optimizations + +When generating instructions, you can count on at least the following +optimizations: + +- Single instructions are simplified, e.g. + + and_i32 t0, t0, $0xffffffff + + is suppressed. + +- A liveness analysis is done at the basic block level. The + information is used to suppress moves from a dead variable to + another one. It is also used to remove instructions which compute + dead results. The later is especially useful for condition code + optimization in QEMU. + + In the following example: + + add_i32 t0, t1, t2 + add_i32 t0, t0, $1 + mov_i32 t0, $1 + + only the last instruction is kept. + +3.4) Instruction Reference + +********* Function call + +* call <ret> <params> ptr + +call function 'ptr' (pointer type) + +<ret> optional 32 bit or 64 bit return value +<params> optional 32 bit or 64 bit parameters + +********* Jumps/Labels + +* jmp t0 + +Absolute jump to address t0 (pointer type). + +* set_label $label + +Define label 'label' at the current program point. + +* br $label + +Jump to label. + +* brcond_i32/i64 cond, t0, t1, label + +Conditional jump if t0 cond t1 is true. cond can be: + TCG_COND_EQ + TCG_COND_NE + TCG_COND_LT /* signed */ + TCG_COND_GE /* signed */ + TCG_COND_LE /* signed */ + TCG_COND_GT /* signed */ + TCG_COND_LTU /* unsigned */ + TCG_COND_GEU /* unsigned */ + TCG_COND_LEU /* unsigned */ + TCG_COND_GTU /* unsigned */ + +********* Arithmetic + +* add_i32/i64 t0, t1, t2 + +t0=t1+t2 + +* sub_i32/i64 t0, t1, t2 + +t0=t1-t2 + +* neg_i32/i64 t0, t1 + +t0=-t1 (two's complement) + +* mul_i32/i64 t0, t1, t2 + +t0=t1*t2 + +* div_i32/i64 t0, t1, t2 + +t0=t1/t2 (signed). Undefined behavior if division by zero or overflow. + +* divu_i32/i64 t0, t1, t2 + +t0=t1/t2 (unsigned). Undefined behavior if division by zero. + +* rem_i32/i64 t0, t1, t2 + +t0=t1%t2 (signed). Undefined behavior if division by zero or overflow. + +* remu_i32/i64 t0, t1, t2 + +t0=t1%t2 (unsigned). Undefined behavior if division by zero. + +********* Logical + +* and_i32/i64 t0, t1, t2 + +t0=t1&t2 + +* or_i32/i64 t0, t1, t2 + +t0=t1|t2 + +* xor_i32/i64 t0, t1, t2 + +t0=t1^t2 + +* not_i32/i64 t0, t1 + +t0=~t1 + +* andc_i32/i64 t0, t1, t2 + +t0=t1&~t2 + +* eqv_i32/i64 t0, t1, t2 + +t0=~(t1^t2), or equivalently, t0=t1^~t2 + +* nand_i32/i64 t0, t1, t2 + +t0=~(t1&t2) + +* nor_i32/i64 t0, t1, t2 + +t0=~(t1|t2) + +* orc_i32/i64 t0, t1, t2 + +t0=t1|~t2 + +********* Shifts/Rotates + +* shl_i32/i64 t0, t1, t2 + +t0=t1 << t2. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* shr_i32/i64 t0, t1, t2 + +t0=t1 >> t2 (unsigned). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* sar_i32/i64 t0, t1, t2 + +t0=t1 >> t2 (signed). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* rotl_i32/i64 t0, t1, t2 + +Rotation of t2 bits to the left. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* rotr_i32/i64 t0, t1, t2 + +Rotation of t2 bits to the right. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +********* Misc + +* mov_i32/i64 t0, t1 + +t0 = t1 + +Move t1 to t0 (both operands must have the same type). + +* ext8s_i32/i64 t0, t1 +ext8u_i32/i64 t0, t1 +ext16s_i32/i64 t0, t1 +ext16u_i32/i64 t0, t1 +ext32s_i64 t0, t1 +ext32u_i64 t0, t1 + +8, 16 or 32 bit sign/zero extension (both operands must have the same type) + +* bswap16_i32/i64 t0, t1 + +16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order +bytes are set to zero. + +* bswap32_i32/i64 t0, t1 + +32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that +the four high order bytes are set to zero. + +* bswap64_i64 t0, t1 + +64 bit byte swap + +* discard_i32/i64 t0 + +Indicate that the value of t0 won't be used later. It is useful to +force dead code elimination. + +********* Conditional moves + +* setcond_i32/i64 cond, dest, t1, t2 + +dest = (t1 cond t2) + +Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. + +********* Type conversions + +* ext_i32_i64 t0, t1 +Convert t1 (32 bit) to t0 (64 bit) and does sign extension + +* extu_i32_i64 t0, t1 +Convert t1 (32 bit) to t0 (64 bit) and does zero extension + +* trunc_i64_i32 t0, t1 +Truncate t1 (64 bit) to t0 (32 bit) + +* concat_i32_i64 t0, t1, t2 +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half +from t2 (32 bit). + +* concat32_i64 t0, t1, t2 +Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half +from t2 (64 bit). + +********* Load/Store + +* ld_i32/i64 t0, t1, offset +ld8s_i32/i64 t0, t1, offset +ld8u_i32/i64 t0, t1, offset +ld16s_i32/i64 t0, t1, offset +ld16u_i32/i64 t0, t1, offset +ld32s_i64 t0, t1, offset +ld32u_i64 t0, t1, offset + +t0 = read(t1 + offset) +Load 8, 16, 32 or 64 bits with or without sign extension from host memory. +offset must be a constant. + +* st_i32/i64 t0, t1, offset +st8_i32/i64 t0, t1, offset +st16_i32/i64 t0, t1, offset +st32_i64 t0, t1, offset + +write(t0, t1 + offset) +Write 8, 16, 32 or 64 bits to host memory. + +********* 64-bit target on 32-bit host support + +The following opcodes are internal to TCG. Thus they are to be implemented by +32-bit host code generators, but are not to be emitted by guest translators. +They are emitted as needed by inline functions within "tcg-op.h". + +* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label + +Similar to brcond, except that the 64-bit values T0 and T1 +are formed from two 32-bit arguments. + +* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high +* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high + +Similar to add/sub, except that the 64-bit inputs T1 and T2 are +formed from two 32-bit arguments, and the 64-bit output T0 +is returned in two 32-bit outputs. + +* mulu2_i32 t0_low, t0_high, t1, t2 + +Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding +the full 64-bit product T0. The later is returned in two 32-bit outputs. + +* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high + +Similar to setcond, except that the 64-bit values T1 and T2 are +formed from two 32-bit arguments. The result is a 32-bit value. + +********* QEMU specific operations + +* tb_exit t0 + +Exit the current TB and return the value t0 (word type). + +* goto_tb index + +Exit the current TB and jump to the TB index 'index' (constant) if the +current TB was linked to this TB. Otherwise execute the next +instructions. + +* qemu_ld8u t0, t1, flags +qemu_ld8s t0, t1, flags +qemu_ld16u t0, t1, flags +qemu_ld16s t0, t1, flags +qemu_ld32 t0, t1, flags +qemu_ld32u t0, t1, flags +qemu_ld32s t0, t1, flags +qemu_ld64 t0, t1, flags + +Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address +type. 'flags' contains the QEMU memory index (selects user or kernel access) +for example. + +Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and +"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits. + +* qemu_st8 t0, t1, flags +qemu_st16 t0, t1, flags +qemu_st32 t0, t1, flags +qemu_st64 t0, t1, flags + +Store the data t0 at the QEMU CPU Address t1. t1 has the QEMU CPU +address type. 'flags' contains the QEMU memory index (selects user or +kernel access) for example. + +Note 1: Some shortcuts are defined when the last operand is known to be +a constant (e.g. addi for add, movi for mov). + +Note 2: When using TCG, the opcodes must never be generated directly +as some of them may not be available as "real" opcodes. Always use the +function tcg_gen_xxx(args). + +4) Backend + +tcg-target.h contains the target specific definitions. tcg-target.c +contains the target specific code. + +4.1) Assumptions + +The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or +64 bit. It is expected that the pointer has the same size as the word. + +On a 32 bit target, all 64 bit operations are converted to 32 bits. A +few specific operations must be implemented to allow it (see add2_i32, +sub2_i32, brcond2_i32). + +Floating point operations are not supported in this version. A +previous incarnation of the code generator had full support of them, +but it is better to concentrate on integer operations first. + +On a 64 bit target, no assumption is made in TCG about the storage of +the 32 bit values in 64 bit registers. + +4.2) Constraints + +GCC like constraints are used to define the constraints of every +instruction. Memory constraints are not supported in this +version. Aliases are specified in the input operands as for GCC. + +The same register may be used for both an input and an output, even when +they are not explicitly aliased. If an op expands to multiple target +instructions then care must be taken to avoid clobbering input values. +GCC style "early clobber" outputs are not currently supported. + +A target can define specific register or constant constraints. If an +operation uses a constant input constraint which does not allow all +constants, it must also accept registers in order to have a fallback. + +The movi_i32 and movi_i64 operations must accept any constants. + +The mov_i32 and mov_i64 operations must accept any registers of the +same type. + +The ld/st instructions must accept signed 32 bit constant offsets. It +can be implemented by reserving a specific register to compute the +address if the offset is too big. + +The ld/st instructions must accept any destination (ld) or source (st) +register. + +4.3) Function call assumptions + +- The only supported types for parameters and return value are: 32 and + 64 bit integers and pointer. +- The stack grows downwards. +- The first N parameters are passed in registers. +- The next parameters are passed on the stack by storing them as words. +- Some registers are clobbered during the call. +- The function can return 0 or 1 value in registers. On a 32 bit + target, functions must be able to return 2 values in registers for + 64 bit return type. + +5) Recommended coding rules for best performance + +- Use globals to represent the parts of the QEMU CPU state which are + often modified, e.g. the integer registers and the condition + codes. TCG will be able to use host registers to store them. + +- Avoid globals stored in fixed registers. They must be used only to + store the pointer to the CPU state and possibly to store a pointer + to a register window. + +- Use temporaries. Use local temporaries only when really needed, + e.g. when you need to use a value after a jump. Local temporaries + introduce a performance hit in the current TCG implementation: their + content is saved to memory at end of each basic block. + +- Free temporaries and local temporaries when they are no longer used + (tcg_temp_free). Since tcg_const_x() also creates a temporary, you + should free it after it is used. Freeing temporaries does not yield + a better generated code, but it reduces the memory usage of TCG and + the speed of the translation. + +- Don't hesitate to use helpers for complicated or seldom used target + intructions. There is little performance advantage in using TCG to + implement target instructions taking more than about twenty TCG + instructions. + +- Use the 'discard' instruction if you know that TCG won't be able to + prove that a given global is "dead" at a given program point. The + x86 target uses it to improve the condition codes optimisation. diff --git a/src/recompiler/tcg/TODO b/src/recompiler/tcg/TODO new file mode 100644 index 00000000..2bc2785d --- /dev/null +++ b/src/recompiler/tcg/TODO @@ -0,0 +1,14 @@ +- Add new instructions such as: clz, ctz, popcnt. + +- See if it is worth exporting mul2, mulu2, div2, divu2. + +- Support of globals saved in fixed registers between TBs. + +Ideas: + +- Move the slow part of the qemu_ld/st ops after the end of the TB. + +- Change exception syntax to get closer to QOP system (exception + parameters given with a specific instruction). + +- Add float and vector support. diff --git a/src/recompiler/tcg/i386/Makefile.kup b/src/recompiler/tcg/i386/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/tcg/i386/Makefile.kup diff --git a/src/recompiler/tcg/i386/tcg-target.c b/src/recompiler/tcg/i386/tcg-target.c new file mode 100644 index 00000000..0943d54e --- /dev/null +++ b/src/recompiler/tcg/i386/tcg-target.c @@ -0,0 +1,2231 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +#if TCG_TARGET_REG_BITS == 64 + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", +#else + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", +#endif +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R10, + TCG_REG_R11, +# if !defined(VBOX) || !defined(__MINGW64__) + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_RCX, + TCG_REG_RDX, +# endif + TCG_REG_RSI, + TCG_REG_RDI, +# if defined(VBOX) && defined(__MINGW64__) + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_RDX, + TCG_REG_RCX, +# endif + TCG_REG_RAX, +#else + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, + TCG_REG_EBP, + TCG_REG_ECX, + TCG_REG_EDX, + TCG_REG_EAX, +#endif +}; + +static const int tcg_target_call_iarg_regs[] = { +#if TCG_TARGET_REG_BITS == 64 +# if defined(VBOX) && defined(__MINGW64__) + TCG_REG_RCX, + TCG_REG_RDX, +# else + TCG_REG_RDI, + TCG_REG_RSI, + TCG_REG_RDX, + TCG_REG_RCX, +# endif + TCG_REG_R8, + TCG_REG_R9, +#else + TCG_REG_EAX, + TCG_REG_EDX, + TCG_REG_ECX +#endif +}; + +static const int tcg_target_call_oarg_regs[2] = { + TCG_REG_EAX, + TCG_REG_EDX +}; + +static uint8_t *tb_ret_addr; + +static void patch_reloc(uint8_t *code_ptr, int type, + tcg_target_long value, tcg_target_long addend) +{ + value += addend; + switch(type) { + case R_386_PC32: + value -= (uintptr_t)code_ptr; + if (value != (int32_t)value) { + tcg_abort(); + } + *(uint32_t *)code_ptr = value; + break; + case R_386_PC8: + value -= (uintptr_t)code_ptr; + if (value != (int8_t)value) { + tcg_abort(); + } + *(uint8_t *)code_ptr = value; + break; + default: + tcg_abort(); + } +} + +#ifdef VBOX +/* emits stack alignment checks for strict builds. */ +DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s) +{ +# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */ + tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */ + tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1); + tcg_out8(s, 0x74); /* jz imm8 */ + tcg_out8(s, 1); /* $+3 (over int3) */ + tcg_out8(s, 0xcc); /* int3 */ +# else + NOREF(s); +# endif +} +#endif /* VBOX */ + +/* maximum number of register used for input function arguments */ +static inline int tcg_target_get_call_iarg_regs_count(int flags) +{ + if (TCG_TARGET_REG_BITS == 64) { + return 6; + } + + flags &= TCG_CALL_TYPE_MASK; + switch(flags) { + case TCG_CALL_TYPE_STD: + return 0; + case TCG_CALL_TYPE_REGPARM_1: + case TCG_CALL_TYPE_REGPARM_2: + case TCG_CALL_TYPE_REGPARM: + return flags - TCG_CALL_TYPE_REGPARM_1 + 1; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'a': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); + break; + case 'b': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); + break; + case 'c': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); + break; + case 'd': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); + break; + case 'S': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); + break; + case 'D': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); + break; + case 'q': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xf); + } + break; + case 'r': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + } + break; + + /* qemu_ld/st address constraint */ + case 'L': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); +#if defined(VBOX) && defined(__MINGW64__) + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]); +#else + /** @todo figure why RDX isn't mentioned here. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI); +#endif + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX); + } + break; + + case 'e': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_U32; + break; + + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } + if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } + return 0; +} + +#if TCG_TARGET_REG_BITS == 64 +# define LOWREGMASK(x) ((x) & 7) +#else +# define LOWREGMASK(x) (x) +#endif + +#define P_EXT 0x100 /* 0x0f opcode prefix */ +#define P_DATA16 0x200 /* 0x66 opcode prefix */ +#if TCG_TARGET_REG_BITS == 64 +# define P_ADDR32 0x400 /* 0x67 opcode prefix */ +# define P_REXW 0x800 /* Set REX.W = 1 */ +# define P_REXB_R 0x1000 /* REG field as byte register */ +# define P_REXB_RM 0x2000 /* R/M field as byte register */ +#else +# define P_ADDR32 0 +# define P_REXW 0 +# define P_REXB_R 0 +# define P_REXB_RM 0 +#endif + +#define OPC_ARITH_EvIz (0x81) +#define OPC_ARITH_EvIb (0x83) +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_BSWAP (0xc8 | P_EXT) +#define OPC_CALL_Jz (0xe8) +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32 (0x48) +#define OPC_IMUL_GvEv (0xaf | P_EXT) +#define OPC_IMUL_GvEvIb (0x6b) +#define OPC_IMUL_GvEvIz (0x69) +#define OPC_INC_r32 (0x40) +#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ +#define OPC_JCC_short (0x70) /* ... plus condition code */ +#define OPC_JMP_long (0xe9) +#define OPC_JMP_short (0xeb) +#define OPC_LEA (0x8d) +#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ +#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ +#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_MOVL_EvIz (0xc7) +#define OPC_MOVL_Iv (0xb8) +#define OPC_MOVSBL (0xbe | P_EXT) +#define OPC_MOVSWL (0xbf | P_EXT) +#define OPC_MOVSLQ (0x63 | P_REXW) +#define OPC_MOVZBL (0xb6 | P_EXT) +#define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_POP_r32 (0x58) +#define OPC_PUSH_r32 (0x50) +#define OPC_PUSH_Iv (0x68) +#define OPC_PUSH_Ib (0x6a) +#define OPC_RET (0xc3) +#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ +#define OPC_SHIFT_1 (0xd1) +#define OPC_SHIFT_Ib (0xc1) +#define OPC_SHIFT_cl (0xd3) +#define OPC_TESTL (0x85) +#define OPC_XCHG_ax_r32 (0x90) + +#define OPC_GRP3_Ev (0xf7) +#define OPC_GRP5 (0xff) + +/* Group 1 opcode extensions for 0x80-0x83. + These are also used as modifiers for OPC_ARITH. */ +#define ARITH_ADD 0 +#define ARITH_OR 1 +#define ARITH_ADC 2 +#define ARITH_SBB 3 +#define ARITH_AND 4 +#define ARITH_SUB 5 +#define ARITH_XOR 6 +#define ARITH_CMP 7 + +/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ +#define SHIFT_ROL 0 +#define SHIFT_ROR 1 +#define SHIFT_SHL 4 +#define SHIFT_SHR 5 +#define SHIFT_SAR 7 + +/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ +#define EXT3_NOT 2 +#define EXT3_NEG 3 +#define EXT3_MUL 4 +#define EXT3_IMUL 5 +#define EXT3_DIV 6 +#define EXT3_IDIV 7 + +/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ +#define EXT5_INC_Ev 0 +#define EXT5_DEC_Ev 1 +#define EXT5_CALLN_Ev 2 +#define EXT5_JMPN_Ev 4 + +/* Condition codes to be added to OPC_JCC_{long,short}. */ +#define JCC_JMP (-1) +#define JCC_JO 0x0 +#define JCC_JNO 0x1 +#define JCC_JB 0x2 +#define JCC_JAE 0x3 +#define JCC_JE 0x4 +#define JCC_JNE 0x5 +#define JCC_JBE 0x6 +#define JCC_JA 0x7 +#define JCC_JS 0x8 +#define JCC_JNS 0x9 +#define JCC_JP 0xa +#define JCC_JNP 0xb +#define JCC_JL 0xc +#define JCC_JGE 0xd +#define JCC_JLE 0xe +#define JCC_JG 0xf + +static const uint8_t tcg_cond_to_jcc[10] = { + [TCG_COND_EQ] = JCC_JE, + [TCG_COND_NE] = JCC_JNE, + [TCG_COND_LT] = JCC_JL, + [TCG_COND_GE] = JCC_JGE, + [TCG_COND_LE] = JCC_JLE, + [TCG_COND_GT] = JCC_JG, + [TCG_COND_LTU] = JCC_JB, + [TCG_COND_GEU] = JCC_JAE, + [TCG_COND_LEU] = JCC_JBE, + [TCG_COND_GTU] = JCC_JA, +}; + +#if defined(VBOX) +/* Calc the size of the tcg_out_opc() result. */ +static inline unsigned char tcg_calc_opc_len(TCGContext *s, int opc, int r, int rm, int x) +{ + unsigned char len = 1; +# if TCG_TARGET_REG_BITS == 64 + unsigned rex; + rex = 0; + rex |= (opc & P_REXW) >> 8; /* REX.W */ + rex |= (r & 8) >> 1; /* REX.R */ + rex |= (x & 8) >> 2; /* REX.X */ + rex |= (rm & 8) >> 3; /* REX.B */ + rex |= opc & (r >= 4 ? P_REXB_R : 0); + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + if (rex) len++; + if (opc & P_ADDR32) len++; +# endif + if (opc & P_DATA16) len++; + if (opc & P_EXT) len++; + + return len; +} +#endif + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) +{ + int rex; + + if (opc & P_DATA16) { + /* We should never be asking for both 16 and 64-bit operation. */ + assert((opc & P_REXW) == 0); + tcg_out8(s, 0x66); + } + if (opc & P_ADDR32) { + tcg_out8(s, 0x67); + } + + rex = 0; + rex |= (opc & P_REXW) >> 8; /* REX.W */ + rex |= (r & 8) >> 1; /* REX.R */ + rex |= (x & 8) >> 2; /* REX.X */ + rex |= (rm & 8) >> 3; /* REX.B */ + + /* P_REXB_{R,RM} indicates that the given register is the low byte. + For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, + as otherwise the encoding indicates %[abcd]h. Note that the values + that are ORed in merely indicate that the REX byte must be present; + those bits get discarded in output. */ + rex |= opc & (r >= 4 ? P_REXB_R : 0); + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + + if (rex) { + tcg_out8(s, (uint8_t)(rex | 0x40)); + } + + if (opc & P_EXT) { + tcg_out8(s, 0x0f); + } + tcg_out8(s, opc); +} +#else +static void tcg_out_opc(TCGContext *s, int opc) +{ + if (opc & P_DATA16) { + tcg_out8(s, 0x66); + } + if (opc & P_EXT) { + tcg_out8(s, 0x0f); + } + tcg_out8(s, opc); +} +/* Discard the register arguments to tcg_out_opc early, so as not to penalize + the 32-bit compilation paths. This method works with all versions of gcc, + whereas relying on optimization may not be able to exclude them. */ +#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) +#endif + +static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) +{ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +/* Output an opcode with a full "rm + (index<<shift) + offset" address mode. + We handle either RM and INDEX missing with a negative value. In 64-bit + mode for absolute addresses, ~RM is the size of the immediate operand + that will follow the instruction. */ + +static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, + int index, int shift, + tcg_target_long offset) +{ + int mod, len; + + if (index < 0 && rm < 0) { + if (TCG_TARGET_REG_BITS == 64) { + /* Try for a rip-relative addressing mode. This has replaced + the 32-bit-mode absolute addressing encoding. */ +#ifdef VBOX + tcg_target_long pc = (tcg_target_long)s->code_ptr + + tcg_calc_opc_len(s, opc, r, 0, 0) + 1 + 4; +#else + tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm; +#endif + tcg_target_long disp = offset - pc; + if (disp == (int32_t)disp) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 5); + tcg_out32(s, disp); +#ifdef VBOX + Assert(pc == (tcg_target_long)s->code_ptr); +#endif + return; + } + + /* Try for an absolute address encoding. This requires the + use of the MODRM+SIB encoding and is therefore larger than + rip-relative addressing. */ + if (offset == (int32_t)offset) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (4 << 3) | 5); + tcg_out32(s, offset); + return; + } + + /* ??? The memory isn't directly addressable. */ + tcg_abort(); + } else { + /* Absolute address. */ + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (r << 3) | 5); + tcg_out32(s, offset); + return; + } + } + + /* Find the length of the immediate addend. Note that the encoding + that would be used for (%ebp) indicates absolute addressing. */ + if (rm < 0) { + mod = 0, len = 4, rm = 5; + } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { + mod = 0, len = 0; + } else if (offset == (int8_t)offset) { + mod = 0x40, len = 1; + } else { + mod = 0x80, len = 4; + } + + /* Use a single byte MODRM format if possible. Note that the encoding + that would be used for %esp is the escape to the two byte form. */ + if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { + /* Single byte MODRM format. */ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); + } else { + /* Two byte MODRM+SIB format. */ + + /* Note that the encoding that would place %esp into the index + field indicates no index register. In 64-bit mode, the REX.X + bit counts, so %r12 can be used as the index. */ + if (index < 0) { + index = 4; + } else { + assert(index != TCG_REG_ESP); + } + + tcg_out_opc(s, opc, r, rm, index); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); + } + + if (len == 1) { + tcg_out8(s, offset); + } else if (len == 4) { + tcg_out32(s, offset); + } +} + +/* A simplification of the above with no index or shift. */ +static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, + int rm, tcg_target_long offset) +{ + tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); +} + +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) +{ + /* Propagate an opcode prefix, such as P_REXW. */ + int ext = subop & ~0x7; + subop &= 0x7; + + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) +{ + if (arg != ret) { + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm(s, opc, ret, arg); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + int ret, tcg_target_long arg) +{ + if (arg == 0) { + tgen_arithr(s, ARITH_XOR, ret, ret); + return; + } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { + tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); + tcg_out32(s, arg); + } else if (arg == (int32_t)arg) { + tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); + tcg_out32(s, arg); + } else { + tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); + tcg_out32(s, arg); + tcg_out32(s, arg >> 31 >> 1); + } +} + +static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) +{ + if (val == (int8_t)val) { + tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); + tcg_out8(s, val); + } else if (val == (int32_t)val) { + tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); + tcg_out32(s, val); + } else { + tcg_abort(); + } +} + +static inline void tcg_out_push(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_pop(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret, + int arg1, tcg_target_long arg2) +{ + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, int arg, + int arg1, tcg_target_long arg2) +{ + int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, arg, arg1, arg2); +} + +static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) +{ + /* Propagate an opcode prefix, such as P_DATA16. */ + int ext = subopc & ~0x7; + subopc &= 0x7; + + if (count == 1) { + tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); + } else { + tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); + tcg_out8(s, count); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_rolw_8(TCGContext *s, int reg) +{ + tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); +} + +static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) +{ + /* movzbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); +} + +static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); +} + +static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) +{ + /* movzwl */ + tcg_out_modrm(s, OPC_MOVZWL, dest, src); +} + +static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsw[lq] */ + tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); +} + +static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) +{ + /* 32-bit mov zero extends. */ + tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); +} + +static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) +{ + tcg_out_modrm(s, OPC_MOVSLQ, dest, src); +} + +static inline void tcg_out_bswap64(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); +} + +static void tgen_arithi(TCGContext *s, int c, int r0, + tcg_target_long val, int cf) +{ + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64) { + rexw = c & -8; + c &= 7; + } + + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce + partial flags update stalls on Pentium4 and are not recommended + by current Intel optimization manuals. */ + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { + int is_inc = (c == ARITH_ADD) != (val < 0); /* VBox: cppcheck: "xor" for bools is "not-equals" */ + if (TCG_TARGET_REG_BITS == 64) { + /* The single-byte increment encodings are re-tasked as the + REX prefixes. Use the MODRM encoding. */ + tcg_out_modrm(s, OPC_GRP5 + rexw, + (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); + } else { + tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); + } + return; + } + + if (c == ARITH_AND) { + if (TCG_TARGET_REG_BITS == 64) { + if (val == 0xffffffffu) { + tcg_out_ext32u(s, r0, r0); + return; + } + if (val == (uint32_t)val) { + /* AND with no high bits set can use a 32-bit operation. */ + rexw = 0; + } + } + if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { + tcg_out_ext8u(s, r0, r0); + return; + } + if (val == 0xffffu) { + tcg_out_ext16u(s, r0, r0); + return; + } + } + + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); + tcg_out8(s, val); + return; + } + if (rexw == 0 || val == (int32_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); + tcg_out32(s, val); + return; + } + + tcg_abort(); +} + +static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +{ + if (val != 0) { + tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); + } +} + +#if defined(VBOX) && defined(RT_OS_DARWIN) && ARCH_BITS == 32 +# define VBOX_16_BYTE_STACK_ALIGN +#endif +#ifdef VBOX_16_BYTE_STACK_ALIGN +static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val) +{ + if (val != 0) { + tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0); + } +} +#endif + +/* Use SMALL != 0 to force a short forward branch. */ +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) +{ + int32_t val, val1; + TCGLabel *l = &s->labels[label_index]; + + if (l->has_value) { + val = l->u.value - (tcg_target_long)s->code_ptr; + val1 = val - 2; + if ((int8_t)val1 == val1) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out8(s, val1); + } else { + if (small) { + tcg_abort(); + } + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + tcg_out32(s, val - 5); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + tcg_out32(s, val - 6); + } + } + } else if (small) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1); + s->code_ptr += 1; + } else { + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4); + s->code_ptr += 4; + } +} + +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, + int const_arg2, int rexw) +{ + if (const_arg2) { + if (arg2 == 0) { + /* test r, r */ + tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); + } else { + tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); + } + } else { + tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + } +} + +static void tcg_out_brcond32(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_brcond64(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); +} +#else +/* XXX: we implement it at the target level to avoid having to + handle cross basic blocks temporaries */ +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, + const int *const_args, int small) +{ + int label_next; + label_next = gen_new_label(); + switch(args[4]) { + case TCG_COND_EQ: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + label_next, 1); + tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], + args[5], small); + break; + case TCG_COND_NE: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + args[5], small); + tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], + args[5], small); + break; + case TCG_COND_LT: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_LE: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GT: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GE: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_LTU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_LEU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GTU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GEU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + args[5], small); + break; + default: + tcg_abort(); + } + tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr); +} +#endif + +static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} +#else +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + TCGArg new_args[6]; + int label_true, label_over; + + memcpy(new_args, args+1, 5*sizeof(TCGArg)); + + if (args[0] == args[1] || args[0] == args[2] + || (!const_args[3] && args[0] == args[3]) + || (!const_args[4] && args[0] == args[4])) { + /* When the destination overlaps with one of the argument + registers, don't do anything tricky. */ + label_true = gen_new_label(); + label_over = gen_new_label(); + + new_args[5] = label_true; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_jxx(s, JCC_JMP, label_over, 1); + tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } else { + /* When the destination does not overlap one of the arguments, + clear the destination first, jump if cond false, and emit an + increment in the true case. This results in smaller code. */ + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + + label_over = gen_new_label(); + new_args[4] = tcg_invert_cond(new_args[4]); + new_args[5] = label_over; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tgen_arithi(s, ARITH_ADD, args[0], 1, 0); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } +} +#endif + +static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest) +{ +#ifdef VBOX + tcg_target_long disp = dest - (tcg_target_long)s->code_ptr + - tcg_calc_opc_len(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0) + - 4; +#else + tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5; +#endif + + if (disp == (int32_t)disp) { + tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); + tcg_out32(s, disp); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest); + tcg_out_modrm(s, OPC_GRP5, + call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); + } +} + +static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest) +{ +#ifdef VBOX + tcg_gen_stack_alignment_check(s); +#endif + tcg_out_branch(s, 1, dest); +} + +static void tcg_out_jmp(TCGContext *s, tcg_target_long dest) +{ + tcg_out_branch(s, 0, dest); +} + +#if defined(CONFIG_SOFTMMU) + +#include "../../softmmu_defs.h" + +static void *qemu_ld_helpers[4] = { + __ldb_mmu, + __ldw_mmu, + __ldl_mmu, + __ldq_mmu, +}; + +static void *qemu_st_helpers[4] = { + __stb_mmu, + __stw_mmu, + __stl_mmu, + __stq_mmu, +}; + +/* Perform the TLB load and compare. + + Inputs: + ADDRLO_IDX contains the index into ARGS of the low part of the + address; the high part of the address is at ADDR_LOW_IDX+1. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + Outputs: + LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) + positions of the displacements of forward jumps to the TLB miss case. + + First argument register is loaded with the low part of the address. + In the TLB hit case, it has been adjusted as indicated by the TLB + and so is a host address. In the TLB miss case, it continues to + hold a guest address. + + Second argument register is clobbered. */ + +static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, + int mem_index, int s_bits, + const TCGArg *args, + uint8_t **label_ptr, int which) +{ + const int addrlo = args[addrlo_idx]; + const int r0 = tcg_target_call_iarg_regs[0]; + const int r1 = tcg_target_call_iarg_regs[1]; + TCGType type = TCG_TYPE_I32; + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) { + type = TCG_TYPE_I64; + rexw = P_REXW; + } + + tcg_out_mov(s, type, r1, addrlo); + tcg_out_mov(s, type, r0, addrlo); + + tcg_out_shifti(s, SHIFT_SHR + rexw, r1, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tgen_arithi(s, ARITH_AND + rexw, r0, + TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); + tgen_arithi(s, ARITH_AND + rexw, r1, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); + + tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0, + offsetof(CPUState, tlb_table[mem_index][0]) + + which); + + /* cmp 0(r1), r0 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0); + + tcg_out_mov(s, type, r0, addrlo); + + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[0] = s->code_ptr; + s->code_ptr++; + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* cmp 4(r1), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4); + + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[1] = s->code_ptr; + s->code_ptr++; + } + + /* TLB Hit. */ + + /* add addend(r1), r0 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1, + offsetof(CPUTLBEntry, addend) - which); +} +#endif + +static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, + int base, tcg_target_long ofs, int sizeop) +{ +#ifdef TARGET_WORDS_BIGENDIAN + const int bswap = 1; +#else + const int bswap = 0; +#endif + switch (sizeop) { + case 0: + tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs); + break; + case 0 | 4: + tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs); + break; + case 1: + tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); + if (bswap) { + tcg_out_rolw_8(s, datalo); + } + break; + case 1 | 4: + if (bswap) { + tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); + tcg_out_rolw_8(s, datalo); + tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); + } else { + tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs); + } + break; + case 2: + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + if (bswap) { + tcg_out_bswap32(s, datalo); + } + break; +#if TCG_TARGET_REG_BITS == 64 + case 2 | 4: + if (bswap) { + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_bswap32(s, datalo); + tcg_out_ext32s(s, datalo, datalo); + } else { + tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs); + } + break; +#endif + case 3: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs); + if (bswap) { + tcg_out_bswap64(s, datalo); + } + } else { + if (bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + if (base != datalo) { + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); + } else { + tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + } + if (bswap) { + tcg_out_bswap32(s, datalo); + tcg_out_bswap32(s, datahi); + } + } + break; + default: + tcg_abort(); + } +} + +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) + +static void * const vbox_ld_helpers[] = { + __ldub_vbox_phys, + __lduw_vbox_phys, + __ldul_vbox_phys, + __ldq_vbox_phys, + __ldb_vbox_phys, + __ldw_vbox_phys, + __ldl_vbox_phys, + __ldq_vbox_phys, +}; + +static void * const vbox_st_helpers[] = { + __stb_vbox_phys, + __stw_vbox_phys, + __stl_vbox_phys, + __stq_vbox_phys +}; + +DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst) +{ + intptr_t disp; +# ifdef VBOX + tcg_gen_stack_alignment_check(s); +# endif + disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5; + tcg_out8(s, 0xe8); /* call disp32 */ + tcg_out32(s, disp); /* disp32 */ +} + +static void tcg_out_vbox_phys_read(TCGContext *s, int index, + int addr_reg, + int data_reg, int data_reg2) +{ + int useReg2 = ((index & 3) == 3); + + /** @todo should we make phys address accessors fastcalls - probably not a big deal */ + /* out parameter (address), note that phys address is always 64-bit */ + AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n")); + +# if 0 + tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */ + tcg_out_push(s, addr_reg); +# else + /* mov addr_reg, %eax */ + tcg_out_mov(s, TCG_REG_EAX, addr_reg); +# endif + + tcg_out_long_call(s, vbox_ld_helpers[index]); + + /* mov %eax, data_reg */ + tcg_out_mov(s, data_reg, TCG_REG_EAX); + + /* returned 64-bit value */ + if (useReg2) + tcg_out_mov(s, data_reg2, TCG_REG_EDX); +} + +static void tcg_out_vbox_phys_write(TCGContext *s, int index, + int addr_reg, + int val_reg, int val_reg2) { + int useReg2 = ((index & 3) == 3); + +# if 0 + /* out parameter (value2) */ + if (useReg2) + tcg_out_push(s, val_reg2); + /* out parameter (value) */ + tcg_out_push(s, val_reg); + /* out parameter (address), note that phys address is always 64-bit */ + AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n")); + tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */ + tcg_out_push(s, addr_reg); +# else + Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX))); + /* mov addr_reg, %eax */ + tcg_out_mov(s, TCG_REG_EAX, addr_reg); + Assert(!useReg2 || (val_reg2 != TCG_REG_EDX)); + /* mov val_reg, %edx */ + tcg_out_mov(s, TCG_REG_EDX, val_reg); + if (useReg2) + tcg_out_mov(s, TCG_REG_ECX, val_reg2); + +# endif + /* call it */ + tcg_out_long_call(s, vbox_st_helpers[index]); + + /* clean stack after us */ +# if 0 + tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4)); +# endif +} + +#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */ + +/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and + EAX. It will be useful once fixed registers globals are less + common. */ +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, + int opc) +{ + int data_reg, data_reg2 = 0; + int addrlo_idx; +#if defined(CONFIG_SOFTMMU) + int mem_index, s_bits, arg_idx; + uint8_t *label_ptr[3]; +#endif + + data_reg = args[0]; + addrlo_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + data_reg2 = args[1]; + addrlo_idx = 2; + } + +#if defined(CONFIG_SOFTMMU) + mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; + s_bits = opc & 3; + + tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + label_ptr, offsetof(CPUTLBEntry, addr_read)); + + /* TLB Hit. */ + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, + tcg_target_call_iarg_regs[0], 0, opc); + + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; + s->code_ptr++; + + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } + + /* XXX: move that code at the end of the TB */ + /* The first argument is already loaded with addrlo. */ + arg_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++], + args[addrlo_idx + 1]); + } + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx], + mem_index); + tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); + + switch(opc) { + case 0 | 4: + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case 1 | 4: + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case 0: + tcg_out_ext8u(s, data_reg, TCG_REG_EAX); + break; + case 1: + tcg_out_ext16u(s, data_reg, TCG_REG_EAX); + break; + case 2: + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + break; +#if TCG_TARGET_REG_BITS == 64 + case 2 | 4: + tcg_out_ext32s(s, data_reg, TCG_REG_EAX); + break; +#endif + case 3: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); + } else if (data_reg == TCG_REG_EDX) { + /* xchg %edx, %eax */ + tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); + tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX); + } + break; + default: + tcg_abort(); + } + + /* label2: */ + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; +#else +# if defined(VBOX) && defined(__MINGW64__) +# error port me +# endif + { + int32_t offset = GUEST_BASE; + int base = args[addrlo_idx]; + + if (TCG_TARGET_REG_BITS == 64) { + /* ??? We assume all operations have left us with register + contents that are zero extended. So far this appears to + be true. If we want to enforce this, we can either do + an explicit zero-extension here, or (if GUEST_BASE == 0) + use the ADDR32 prefix. For now, do nothing. */ + + if (offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base); + base = TCG_REG_RDI, offset = 0; + } + } + + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc); + } +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, + int base, tcg_target_long ofs, int sizeop) +{ +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) +#ifdef TARGET_WORDS_BIGENDIAN + const int bswap = 1; +#else + const int bswap = 0; +#endif + /* ??? Ideally we wouldn't need a scratch register. For user-only, + we could perform the bswap twice to restore the original value + instead of moving to the scratch. But as it is, the L constraint + means that the second argument reg is definitely free here. */ + int scratch = tcg_target_call_iarg_regs[1]; + + switch (sizeop) { + case 0: + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs); + break; + case 1: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_rolw_8(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs); + break; + case 2: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + datalo = scratch; + } + tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); + break; + case 3: + if (TCG_TARGET_REG_BITS == 64) { + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); + tcg_out_bswap64(s, scratch); + datalo = scratch; + } + tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs); + } else if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); + tcg_out_bswap32(s, scratch); + tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs); + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4); + } else { + tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4); + } + break; + default: + tcg_abort(); + } +#else /* VBOX */ +# error "broken" + tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2); +#endif +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, + int opc) +{ + int data_reg, data_reg2 = 0; + int addrlo_idx; +#if defined(CONFIG_SOFTMMU) + int mem_index, s_bits; + int stack_adjust; + uint8_t *label_ptr[3]; +#endif + + data_reg = args[0]; + addrlo_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + data_reg2 = args[1]; + addrlo_idx = 2; + } + +#if defined(CONFIG_SOFTMMU) + mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; + s_bits = opc; + + tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + label_ptr, offsetof(CPUTLBEntry, addr_write)); + + /* TLB Hit. */ + tcg_out_qemu_st_direct(s, data_reg, data_reg2, + tcg_target_call_iarg_regs[0], 0, opc); + + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; + s->code_ptr++; + + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } + +# if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + + /* XXX: move that code at the end of the TB */ + if (TCG_TARGET_REG_BITS == 64) { +# if defined(VBOX) && defined(__MINGW64__) + tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), + tcg_target_call_iarg_regs[1], data_reg); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index); +# else + tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), + TCG_REG_RSI, data_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index); +# endif + stack_adjust = 0; + } else if (TARGET_LONG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg); + if (opc == 3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2); +# ifdef VBOX_16_BYTE_STACK_ALIGN + tcg_out_subi(s, TCG_REG_ESP, 12); +# endif + tcg_out_pushi(s, mem_index); + stack_adjust = 4; + } else { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index); + stack_adjust = 0; + } + } else { + if (opc == 3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]); +# ifdef VBOX_16_BYTE_STACK_ALIGN + tcg_out_pushi(s, 0); +# endif + tcg_out_pushi(s, mem_index); + tcg_out_push(s, data_reg2); + tcg_out_push(s, data_reg); + stack_adjust = 12; + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]); + switch(opc) { + case 0: + tcg_out_ext8u(s, TCG_REG_ECX, data_reg); + break; + case 1: + tcg_out_ext16u(s, TCG_REG_ECX, data_reg); + break; + case 2: + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg); + break; + } +# ifdef VBOX_16_BYTE_STACK_ALIGN + tcg_out_subi(s, TCG_REG_ESP, 12); +# endif + tcg_out_pushi(s, mem_index); + stack_adjust = 4; + } + } + + tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); + +# ifdef VBOX_16_BYTE_STACK_ALIGN + if (stack_adjust != 0) { + tcg_out_addi(s, TCG_REG_ESP, RT_ALIGN(stack_adjust, 16)); + } +# else + if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) { + /* Pop and discard. This is 2 bytes smaller than the add. */ + tcg_out_pop(s, TCG_REG_ECX); + } else if (stack_adjust != 0) { + tcg_out_addi(s, TCG_REG_ESP, stack_adjust); + } +# endif + +# else /* VBOX && REM_PHYS_ADDR_IN_TLB */ +# error Borked + tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2); +# endif /* VBOX && REM_PHYS_ADDR_IN_TLB */ + + /* label2: */ + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; +#else +# if defined(VBOX) && defined(__MINGW64__) +# error port me +# endif + { + int32_t offset = GUEST_BASE; + int base = args[addrlo_idx]; + + if (TCG_TARGET_REG_BITS == 64) { + /* ??? We assume all operations have left us with register + contents that are zero extended. So far this appears to + be true. If we want to enforce this, we can either do + an explicit zero-extension here, or (if GUEST_BASE == 0) + use the ADDR32 prefix. For now, do nothing. */ + + if (offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base); + base = TCG_REG_RDI, offset = 0; + } + } + + tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc); + } +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + int c, rexw = 0; + +#if TCG_TARGET_REG_BITS == 64 +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i64): \ + rexw = P_REXW; /* FALLTHRU */ \ + case glue(glue(INDEX_op_, x), _i32) +#else +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i32) +#endif + + switch(opc) { + case INDEX_op_exit_tb: + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); + tcg_out_jmp(s, (tcg_target_long) tb_ret_addr); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + tcg_out8(s, OPC_JMP_long); /* jmp im */ + s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + tcg_out32(s, 0); + } else { + /* indirect jump method */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, + (tcg_target_long)(s->tb_next + args[0])); + } + s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + break; + case INDEX_op_call: + if (const_args[0]) { + tcg_out_calli(s, args[0]); + } else { + /* call *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]); + } + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_jmp(s, args[0]); + } else { + /* jmp *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]); + } + break; + case INDEX_op_br: + tcg_out_jxx(s, JCC_JMP, args[0], 0); + break; + case INDEX_op_movi_i32: + tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); + break; + OP_32_64(ld8u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); + break; + OP_32_64(ld8s): + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); + break; + OP_32_64(ld16u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); + break; + OP_32_64(ld16s): + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_ld32u_i64: +#endif + case INDEX_op_ld_i32: + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(st8): + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, + args[0], args[1], args[2]); + break; + OP_32_64(st16): + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, + args[0], args[1], args[2]); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_st32_i64: +#endif + case INDEX_op_st_i32: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(add): + /* For 3-operand addition, use LEA. */ + if (args[0] != args[1]) { + TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; + + if (const_args[2]) { + c3 = a2, a2 = -1; + } else if (a0 == a2) { + /* Watch out for dest = src + dest, since we've removed + the matching constraint on the add. */ + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + break; + } + + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); + break; + } + c = ARITH_ADD; + goto gen_arith; + OP_32_64(sub): + c = ARITH_SUB; + goto gen_arith; + OP_32_64(and): + c = ARITH_AND; + goto gen_arith; + OP_32_64(or): + c = ARITH_OR; + goto gen_arith; + OP_32_64(xor): + c = ARITH_XOR; + goto gen_arith; + gen_arith: + if (const_args[2]) { + tgen_arithi(s, c + rexw, args[0], args[2], 0); + } else { + tgen_arithr(s, c + rexw, args[0], args[2]); + } + break; + + OP_32_64(mul): + if (const_args[2]) { + int32_t val; + val = args[2]; + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); + tcg_out8(s, val); + } else { + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); + tcg_out32(s, val); + } + } else { + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); + } + break; + + OP_32_64(div2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); + break; + OP_32_64(divu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); + break; + + OP_32_64(shl): + c = SHIFT_SHL; + goto gen_shift; + OP_32_64(shr): + c = SHIFT_SHR; + goto gen_shift; + OP_32_64(sar): + c = SHIFT_SAR; + goto gen_shift; + OP_32_64(rotl): + c = SHIFT_ROL; + goto gen_shift; + OP_32_64(rotr): + c = SHIFT_ROR; + goto gen_shift; + gen_shift: + if (const_args[2]) { + tcg_out_shifti(s, c + rexw, args[0], args[2]); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], + args[3], 0); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond32(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + + OP_32_64(bswap16): + tcg_out_rolw_8(s, args[0]); + break; + OP_32_64(bswap32): + tcg_out_bswap32(s, args[0]); + break; + + OP_32_64(neg): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); + break; + OP_32_64(not): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); + break; + + OP_32_64(ext8s): + tcg_out_ext8s(s, args[0], args[1], rexw); + break; + OP_32_64(ext16s): + tcg_out_ext16s(s, args[0], args[1], rexw); + break; + OP_32_64(ext8u): + tcg_out_ext8u(s, args[0], args[1]); + break; + OP_32_64(ext16u): + tcg_out_ext16u(s, args[0], args[1]); + break; + + case INDEX_op_qemu_ld8u: + tcg_out_qemu_ld(s, args, 0); + break; + case INDEX_op_qemu_ld8s: + tcg_out_qemu_ld(s, args, 0 | 4); + break; + case INDEX_op_qemu_ld16u: + tcg_out_qemu_ld(s, args, 1); + break; + case INDEX_op_qemu_ld16s: + tcg_out_qemu_ld(s, args, 1 | 4); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_qemu_ld32u: +#endif + case INDEX_op_qemu_ld32: + tcg_out_qemu_ld(s, args, 2); + break; + case INDEX_op_qemu_ld64: + tcg_out_qemu_ld(s, args, 3); + break; + + case INDEX_op_qemu_st8: + tcg_out_qemu_st(s, args, 0); + break; + case INDEX_op_qemu_st16: + tcg_out_qemu_st(s, args, 1); + break; + case INDEX_op_qemu_st32: + tcg_out_qemu_st(s, args, 2); + break; + case INDEX_op_qemu_st64: + tcg_out_qemu_st(s, args, 3); + break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args, 0); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; + case INDEX_op_mulu2_i32: + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]); + break; + case INDEX_op_add2_i32: + if (const_args[4]) { + tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_ADD, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_ADC, args[1], args[5]); + } + break; + case INDEX_op_sub2_i32: + if (const_args[4]) { + tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_SUB, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_SBB, args[1], args[5]); + } + break; +#else /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_movi_i64: + tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ld32s_i64: + tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_qemu_ld32s: + tcg_out_qemu_ld(s, args, 2 | 4); + break; + + case INDEX_op_brcond_i64: + tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], + args[3], 0); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond64(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + + case INDEX_op_bswap64_i64: + tcg_out_bswap64(s, args[0]); + break; + case INDEX_op_ext32u_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; + case INDEX_op_ext32s_i64: + tcg_out_ext32s(s, args[0], args[1]); + break; +#endif + + default: + tcg_abort(); + } + +#undef OP_32_64 +} + +static const TCGTargetOpDef x86_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_call, { "ri" } }, + { INDEX_op_jmp, { "ri" } }, + { INDEX_op_br, { } }, + { INDEX_op_mov_i32, { "r", "r" } }, + { INDEX_op_movi_i32, { "r" } }, + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "q", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "0", "ri" } }, + { INDEX_op_mul_i32, { "r", "0", "ri" } }, + { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_and_i32, { "r", "0", "ri" } }, + { INDEX_op_or_i32, { "r", "0", "ri" } }, + { INDEX_op_xor_i32, { "r", "0", "ri" } }, + + { INDEX_op_shl_i32, { "r", "0", "ci" } }, + { INDEX_op_shr_i32, { "r", "0", "ci" } }, + { INDEX_op_sar_i32, { "r", "0", "ci" } }, + { INDEX_op_rotl_i32, { "r", "0", "ci" } }, + { INDEX_op_rotr_i32, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i32, { "r", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "0" } }, + { INDEX_op_bswap32_i32, { "r", "0" } }, + + { INDEX_op_neg_i32, { "r", "0" } }, + + { INDEX_op_not_i32, { "r", "0" } }, + + { INDEX_op_ext8s_i32, { "r", "q" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "q" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_setcond_i32, { "q", "r", "ri" } }, + +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#else + { INDEX_op_mov_i64, { "r", "r" } }, + { INDEX_op_movi_i64, { "r" } }, + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "0", "re" } }, + { INDEX_op_mul_i64, { "r", "0", "re" } }, + { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_sub_i64, { "r", "0", "re" } }, + { INDEX_op_and_i64, { "r", "0", "reZ" } }, + { INDEX_op_or_i64, { "r", "0", "re" } }, + { INDEX_op_xor_i64, { "r", "0", "re" } }, + + { INDEX_op_shl_i64, { "r", "0", "ci" } }, + { INDEX_op_shr_i64, { "r", "0", "ci" } }, + { INDEX_op_sar_i64, { "r", "0", "ci" } }, + { INDEX_op_rotl_i64, { "r", "0", "ci" } }, + { INDEX_op_rotr_i64, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i64, { "r", "re" } }, + { INDEX_op_setcond_i64, { "r", "r", "re" } }, + + { INDEX_op_bswap16_i64, { "r", "0" } }, + { INDEX_op_bswap32_i64, { "r", "0" } }, + { INDEX_op_bswap64_i64, { "r", "0" } }, + { INDEX_op_neg_i64, { "r", "0" } }, + { INDEX_op_not_i64, { "r", "0" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32s, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L" } }, +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "r", "L" } }, + + { INDEX_op_qemu_st8, { "cb", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L", "L" } }, +#else + { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L", "L" } }, + { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, + + { INDEX_op_qemu_st8, { "cb", "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L", "L", "L" } }, +#endif + { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, +# if defined(VBOX) && defined(__MINGW64__) + TCG_REG_RSI, + TCG_REG_RDI, +# endif + TCG_REG_R12, + TCG_REG_R13, + /* TCG_REG_R14, */ /* Currently used for the global env. */ + TCG_REG_R15, +#else +# ifndef VBOX + /* TCG_REG_EBP, */ /* Currently used for the global env. */ + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, +# else + TCG_REG_EBP, + TCG_REG_EBX, + /* TCG_REG_ESI, */ /* Currently used for the global env. */ + TCG_REG_EDI, +# endif +#endif +}; + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i, frame_size, push_size, stack_addend; + + /* TB prologue */ + + /* Save all callee saved registers. */ + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_push(s, tcg_target_callee_save_regs[i]); + } +# if defined(VBOX_STRICT) && defined(RT_ARCH_X86) + tcg_out8(s, 0x31); /* xor ebp, ebp */ + tcg_out8(s, 0xed); +# endif + + /* Reserve some stack space. */ + push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs); + push_size *= TCG_TARGET_REG_BITS / 8; + + frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE; +#if defined(VBOX) && defined(__MINGW64__) + frame_size += TCG_TARGET_CALL_STACK_OFFSET; +#endif + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + stack_addend = frame_size - push_size; + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + + /* jmp *tb. */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]); +# ifdef VBOX + tcg_gen_stack_alignment_check(s); +# endif + + tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */ + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; + + tcg_out_addi(s, TCG_REG_ESP, stack_addend); + + for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { + tcg_out_pop(s, tcg_target_callee_save_regs[i]); + } + tcg_out_opc(s, OPC_RET, 0, 0, 0); +} + +static void tcg_target_init(TCGContext *s) +{ +#if !defined(CONFIG_USER_ONLY) + /* fail safe */ + if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) + tcg_abort(); +#endif + + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + } else { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); + } + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); + if (TCG_TARGET_REG_BITS == 64) { +# if !defined(VBOX) || !defined(__MINGW64__) + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); +# endif + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); + } + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP); + + tcg_add_target_add_op_defs(x86_op_defs); +} diff --git a/src/recompiler/tcg/i386/tcg-target.h b/src/recompiler/tcg/i386/tcg-target.h new file mode 100644 index 00000000..e812bc58 --- /dev/null +++ b/src/recompiler/tcg/i386/tcg-target.h @@ -0,0 +1,134 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#define TCG_TARGET_I386 1 + +#if defined(__x86_64__) +# define TCG_TARGET_REG_BITS 64 +#else +# define TCG_TARGET_REG_BITS 32 +#endif +//#define TCG_TARGET_WORDS_BIGENDIAN + +#if TCG_TARGET_REG_BITS == 64 +# define TCG_TARGET_NB_REGS 16 +#else +# define TCG_TARGET_NB_REGS 8 +#endif + +enum { + TCG_REG_EAX = 0, + TCG_REG_ECX, + TCG_REG_EDX, + TCG_REG_EBX, + TCG_REG_ESP, + TCG_REG_EBP, + TCG_REG_ESI, + TCG_REG_EDI, + + /* 64-bit registers; always define the symbols to avoid + too much if-deffing. */ + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_RAX = TCG_REG_EAX, + TCG_REG_RCX = TCG_REG_ECX, + TCG_REG_RDX = TCG_REG_EDX, + TCG_REG_RBX = TCG_REG_EBX, + TCG_REG_RSP = TCG_REG_ESP, + TCG_REG_RBP = TCG_REG_EBP, + TCG_REG_RSI = TCG_REG_ESI, + TCG_REG_RDI = TCG_REG_EDI, +}; + +#define TCG_CT_CONST_S32 0x100 +#define TCG_CT_CONST_U32 0x200 + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_ESP +#define TCG_TARGET_STACK_ALIGN 16 +#if defined(VBOX) && defined(__MINGW64__) +# define TCG_TARGET_CALL_STACK_OFFSET 32 /* 4 qword argument/register spill zone */ +#else +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#endif + +/* optional instructions */ +#define TCG_TARGET_HAS_div2_i32 +#define TCG_TARGET_HAS_rot_i32 +#define TCG_TARGET_HAS_ext8s_i32 +#define TCG_TARGET_HAS_ext16s_i32 +#define TCG_TARGET_HAS_ext8u_i32 +#define TCG_TARGET_HAS_ext16u_i32 +#define TCG_TARGET_HAS_bswap16_i32 +#define TCG_TARGET_HAS_bswap32_i32 +#define TCG_TARGET_HAS_neg_i32 +#define TCG_TARGET_HAS_not_i32 +// #define TCG_TARGET_HAS_andc_i32 +// #define TCG_TARGET_HAS_orc_i32 +// #define TCG_TARGET_HAS_eqv_i32 +// #define TCG_TARGET_HAS_nand_i32 +// #define TCG_TARGET_HAS_nor_i32 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_div2_i64 +#define TCG_TARGET_HAS_rot_i64 +#define TCG_TARGET_HAS_ext8s_i64 +#define TCG_TARGET_HAS_ext16s_i64 +#define TCG_TARGET_HAS_ext32s_i64 +#define TCG_TARGET_HAS_ext8u_i64 +#define TCG_TARGET_HAS_ext16u_i64 +#define TCG_TARGET_HAS_ext32u_i64 +#define TCG_TARGET_HAS_bswap16_i64 +#define TCG_TARGET_HAS_bswap32_i64 +#define TCG_TARGET_HAS_bswap64_i64 +#define TCG_TARGET_HAS_neg_i64 +#define TCG_TARGET_HAS_not_i64 +// #define TCG_TARGET_HAS_andc_i64 +// #define TCG_TARGET_HAS_orc_i64 +// #define TCG_TARGET_HAS_eqv_i64 +// #define TCG_TARGET_HAS_nand_i64 +// #define TCG_TARGET_HAS_nor_i64 +#endif + +#define TCG_TARGET_HAS_GUEST_BASE + +/* Note: must be synced with dyngen-exec.h */ +#if TCG_TARGET_REG_BITS == 64 +# define TCG_AREG0 TCG_REG_R14 +#else +# ifndef VBOX /* we're using ESI instead of EBP, probably due to frame pointer opt issues */ +# define TCG_AREG0 TCG_REG_EBP +# else /* VBOX */ +# define TCG_AREG0 TCG_REG_ESI +# endif /* VBOX */ +#endif + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} diff --git a/src/recompiler/tcg/tcg-dyngen.c b/src/recompiler/tcg/tcg-dyngen.c new file mode 100644 index 00000000..b068a0a7 --- /dev/null +++ b/src/recompiler/tcg/tcg-dyngen.c @@ -0,0 +1,437 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef VBOX +#include <assert.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#else +# include <stdio.h> +# include "osdep.h" +#endif + +#include "config.h" +#include "osdep.h" + +#include "tcg.h" + +int __op_param1, __op_param2, __op_param3; +#if defined(__sparc__) || defined(__arm__) + void __op_gen_label1(){} + void __op_gen_label2(){} + void __op_gen_label3(){} +#else + int __op_gen_label1, __op_gen_label2, __op_gen_label3; +#endif +int __op_jmp0, __op_jmp1, __op_jmp2, __op_jmp3; + +#if 0 +#if defined(__s390__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} +#elif defined(__ia64__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + while (start < stop) { + asm volatile ("fc %0" :: "r"(start)); + start += 32; + } + asm volatile (";;sync.i;;srlz.i;;"); +} +#elif defined(__powerpc__) + +#define MIN_CACHE_LINE_SIZE 8 /* conservative value */ + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + uintptr_t p; + + start &= ~(MIN_CACHE_LINE_SIZE - 1); + stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1); + + for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + asm volatile ("isync" : : : "memory"); +} +#elif defined(__alpha__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + asm ("imb"); +} +#elif defined(__sparc__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + uintptr_t p; + + p = start & ~(8UL - 1UL); + stop = (stop + (8UL - 1UL)) & ~(8UL - 1UL); + + for (; p < stop; p += 8) + __asm__ __volatile__("flush\t%0" : : "r" (p)); +} +#elif defined(__arm__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + register uintptr_t _beg __asm ("a1") = start; + register uintptr_t _end __asm ("a2") = stop; + register uintptr_t _flg __asm ("a3") = 0; + __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); +} +#elif defined(__mc68000) + +# include <asm/cachectl.h> +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + cacheflush(start,FLUSH_SCOPE_LINE,FLUSH_CACHE_BOTH,stop-start+16); +} +#elif defined(__mips__) + +#include <sys/cachectl.h> +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + _flush_cache ((void *)start, stop - start, BCACHE); +} +#else +#error unsupported CPU +#endif + +#ifdef __alpha__ + +register int gp asm("$29"); + +static inline void immediate_ldah(void *p, int val) { + uint32_t *dest = p; + long high = ((val >> 16) + ((val >> 15) & 1)) & 0xffff; + + *dest &= ~0xffff; + *dest |= high; + *dest |= 31 << 16; +} +static inline void immediate_lda(void *dest, int val) { + *(uint16_t *) dest = val; +} +void fix_bsr(void *p, int offset) { + uint32_t *dest = p; + *dest &= ~((1 << 21) - 1); + *dest |= (offset >> 2) & ((1 << 21) - 1); +} + +#endif /* __alpha__ */ + +#ifdef __ia64 + +/* Patch instruction with "val" where "mask" has 1 bits. */ +static inline void ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val) +{ + uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16); +# define insn_mask ((1UL << 41) - 1) + uintptr_t shift; + + b0 = b[0]; b1 = b[1]; + shift = 5 + 41 * (insn_addr % 16); /* 5 template, 3 x 41-bit insns */ + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +static inline void ia64_patch_imm60 (uint64_t insn_addr, uint64_t val) +{ + ia64_patch(insn_addr, + 0x011ffffe000UL, + ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ + | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18); +} + +static inline void ia64_imm64 (void *insn, uint64_t val) +{ + /* Ignore the slot number of the relocation; GCC and Intel + toolchains differed for some time on whether IMM64 relocs are + against slot 1 (Intel) or slot 2 (GCC). */ + uint64_t insn_addr = (uint64_t) insn & ~3UL; + + ia64_patch(insn_addr + 2, + 0x01fffefe000UL, + ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ + | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ + | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */) + ); + ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); +} + +static inline void ia64_imm60b (void *insn, uint64_t val) +{ + /* Ignore the slot number of the relocation; GCC and Intel + toolchains differed for some time on whether IMM64 relocs are + against slot 1 (Intel) or slot 2 (GCC). */ + uint64_t insn_addr = (uint64_t) insn & ~3UL; + + if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) + fprintf(stderr, "%s: value %ld out of IMM60 range\n", + __FUNCTION__, (int64_t) val); + ia64_patch_imm60(insn_addr + 2, val); +} + +static inline void ia64_imm22 (void *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) + fprintf(stderr, "%s: value %li out of IMM22 range\n", + __FUNCTION__, (int64_t)val); + ia64_patch((uint64_t) insn, 0x01fffcfe000UL, + ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ + | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); +} + +/* Like ia64_imm22(), but also clear bits 20-21. For addl, this has + the effect of turning "addl rX=imm22,rY" into "addl + rX=imm22,r0". */ +static inline void ia64_imm22_r0 (void *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) + fprintf(stderr, "%s: value %li out of IMM22 range\n", + __FUNCTION__, (int64_t)val); + ia64_patch((uint64_t) insn, 0x01fffcfe000UL | (0x3UL << 20), + ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ + | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); +} + +static inline void ia64_imm21b (void *insn, uint64_t val) +{ + if (val + (1 << 20) >= (1 << 21)) + fprintf(stderr, "%s: value %li out of IMM21b range\n", + __FUNCTION__, (int64_t)val); + ia64_patch((uint64_t) insn, 0x11ffffe000UL, + ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ + | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); +} + +static inline void ia64_nop_b (void *insn) +{ + ia64_patch((uint64_t) insn, (1UL << 41) - 1, 2UL << 37); +} + +static inline void ia64_ldxmov(void *insn, uint64_t val) +{ + if (val + (1 << 21) < (1 << 22)) + ia64_patch((uint64_t) insn, 0x1fff80fe000UL, 8UL << 37); +} + +static inline int ia64_patch_ltoff(void *insn, uint64_t val, + int relaxable) +{ + if (relaxable && (val + (1 << 21) < (1 << 22))) { + ia64_imm22_r0(insn, val); + return 0; + } + return 1; +} + +struct ia64_fixup { + struct ia64_fixup *next; + void *addr; /* address that needs to be patched */ + long value; +}; + +#define IA64_PLT(insn, plt_index) \ +do { \ + struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \ + fixup->next = plt_fixes; \ + plt_fixes = fixup; \ + fixup->addr = (insn); \ + fixup->value = (plt_index); \ + plt_offset[(plt_index)] = 1; \ +} while (0) + +#define IA64_LTOFF(insn, val, relaxable) \ +do { \ + if (ia64_patch_ltoff(insn, val, relaxable)) { \ + struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \ + fixup->next = ltoff_fixes; \ + ltoff_fixes = fixup; \ + fixup->addr = (insn); \ + fixup->value = (val); \ + } \ +} while (0) + +static inline void ia64_apply_fixes (uint8_t **gen_code_pp, + struct ia64_fixup *ltoff_fixes, + uint64_t gp, + struct ia64_fixup *plt_fixes, + int num_plts, + uintptr_t *plt_target, + unsigned int *plt_offset) +{ + static const uint8_t plt_bundle[] = { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; movl r1=GP */ + 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, + + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; brl IP */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0 + }; + uint8_t *gen_code_ptr = *gen_code_pp, *plt_start, *got_start; + uint64_t *vp; + struct ia64_fixup *fixup; + unsigned int offset = 0; + struct fdesc { + long ip; + long gp; + } *fdesc; + int i; + + if (plt_fixes) { + plt_start = gen_code_ptr; + + for (i = 0; i < num_plts; ++i) { + if (plt_offset[i]) { + plt_offset[i] = offset; + offset += sizeof(plt_bundle); + + fdesc = (struct fdesc *) plt_target[i]; + memcpy(gen_code_ptr, plt_bundle, sizeof(plt_bundle)); + ia64_imm64 (gen_code_ptr + 0x02, fdesc->gp); + ia64_imm60b(gen_code_ptr + 0x12, + (fdesc->ip - (long) (gen_code_ptr + 0x10)) >> 4); + gen_code_ptr += sizeof(plt_bundle); + } + } + + for (fixup = plt_fixes; fixup; fixup = fixup->next) + ia64_imm21b(fixup->addr, + ((long) plt_start + plt_offset[fixup->value] + - ((long) fixup->addr & ~0xf)) >> 4); + } + + got_start = gen_code_ptr; + + /* First, create the GOT: */ + for (fixup = ltoff_fixes; fixup; fixup = fixup->next) { + /* first check if we already have this value in the GOT: */ + for (vp = (uint64_t *) got_start; vp < (uint64_t *) gen_code_ptr; ++vp) + if (*vp == fixup->value) + break; + if (vp == (uint64_t *) gen_code_ptr) { + /* Nope, we need to put the value in the GOT: */ + *vp = fixup->value; + gen_code_ptr += 8; + } + ia64_imm22(fixup->addr, (long) vp - gp); + } + /* Keep code ptr aligned. */ + if ((long) gen_code_ptr & 15) + gen_code_ptr += 8; + *gen_code_pp = gen_code_ptr; +} +#endif +#endif + +#ifdef CONFIG_DYNGEN_OP + +#if defined __hppa__ +struct hppa_branch_stub { + uint32_t *location; + long target; + struct hppa_branch_stub *next; +}; + +#define HPPA_RECORD_BRANCH(LIST, LOC, TARGET) \ +do { \ + struct hppa_branch_stub *stub = alloca(sizeof(struct hppa_branch_stub)); \ + stub->location = LOC; \ + stub->target = TARGET; \ + stub->next = LIST; \ + LIST = stub; \ +} while (0) + +static inline void hppa_process_stubs(struct hppa_branch_stub *stub, + uint8_t **gen_code_pp) +{ + uint32_t *s = (uint32_t *)*gen_code_pp; + uint32_t *p = s + 1; + + if (!stub) return; + + for (; stub != NULL; stub = stub->next) { + uintptr_t l = (uintptr_t)p; + /* stub: + * ldil L'target, %r1 + * be,n R'target(%sr4,%r1) + */ + *p++ = 0x20200000 | reassemble_21(lrsel(stub->target, 0)); + *p++ = 0xe0202002 | (reassemble_17(rrsel(stub->target, 0) >> 2)); + hppa_patch17f(stub->location, l, 0); + } + /* b,l,n stub,%r0 */ + *s = 0xe8000002 | reassemble_17((p - s) - 2); + *gen_code_pp = (uint8_t *)p; +} +#endif /* __hppa__ */ + +const TCGArg *dyngen_op(TCGContext *s, int opc, const TCGArg *opparam_ptr) +{ + uint8_t *gen_code_ptr; + +#ifdef __hppa__ + struct hppa_branch_stub *hppa_stubs = NULL; +#endif + + gen_code_ptr = s->code_ptr; + switch(opc) { + +/* op.h is dynamically generated by dyngen.c from op.c */ +#include "op.h" + + default: + tcg_abort(); + } + +#ifdef __hppa__ + hppa_process_stubs(hppa_stubs, &gen_code_ptr); +#endif + + s->code_ptr = gen_code_ptr; + return opparam_ptr; +} +#endif diff --git a/src/recompiler/tcg/tcg-op.h b/src/recompiler/tcg/tcg-op.h new file mode 100644 index 00000000..b3890804 --- /dev/null +++ b/src/recompiler/tcg/tcg-op.h @@ -0,0 +1,2469 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "tcg.h" + +int gen_new_label(void); + +static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); +} + +static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 arg1) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); +} + +static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg arg1) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = arg1; +} + +static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); +} + +static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); +} + +static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGArg arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = arg2; +} + +static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGArg arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = arg2; +} + +static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg arg1, TCGArg arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = arg1; + *gen_opparam_ptr++ = arg2; +} + +static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); +} + +static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); +} + +static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 arg1, + TCGv_i32 arg2, TCGArg arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = arg3; +} + +static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGArg arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = arg3; +} + +static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, + TCGv_ptr base, TCGArg offset) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(val); + *gen_opparam_ptr++ = GET_TCGV_PTR(base); + *gen_opparam_ptr++ = offset; +} + +static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_ptr base, TCGArg offset) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(val); + *gen_opparam_ptr++ = GET_TCGV_PTR(base); + *gen_opparam_ptr++ = offset; +} + +static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val, + TCGv_i32 addr, TCGArg mem_index) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(val); + *gen_opparam_ptr++ = GET_TCGV_I32(addr); + *gen_opparam_ptr++ = mem_index; +} + +static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_i64 addr, TCGArg mem_index) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(val); + *gen_opparam_ptr++ = GET_TCGV_I64(addr); + *gen_opparam_ptr++ = mem_index; +} + +static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); +} + +static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); +} + +static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGArg arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = arg3; + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGArg arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = arg3; + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = GET_TCGV_I32(arg5); +} + +static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = GET_TCGV_I64(arg5); +} + +static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = arg5; +} + +static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = arg5; +} + +static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5, + TCGv_i32 arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = GET_TCGV_I32(arg5); + *gen_opparam_ptr++ = GET_TCGV_I32(arg6); +} + +static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5, + TCGv_i64 arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = GET_TCGV_I64(arg5); + *gen_opparam_ptr++ = GET_TCGV_I64(arg6); +} + +static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, + TCGv_i32 arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = GET_TCGV_I32(arg5); + *gen_opparam_ptr++ = arg6; +} + +static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, + TCGv_i64 arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = GET_TCGV_I64(arg5); + *gen_opparam_ptr++ = arg6; +} + +static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 arg1, + TCGv_i32 arg2, TCGv_i32 arg3, + TCGv_i32 arg4, TCGArg arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = arg5; + *gen_opparam_ptr++ = arg6; +} + +static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGv_i64 arg3, + TCGv_i64 arg4, TCGArg arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = arg5; + *gen_opparam_ptr++ = arg6; +} + +static inline void gen_set_label(int n) +{ + tcg_gen_op1i(INDEX_op_set_label, n); +} + +static inline void tcg_gen_br(int label) +{ + tcg_gen_op1i(INDEX_op_br, label); +} + +static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (!TCGV_EQUAL_I32(ret, arg)) + tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); +} + +static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) +{ + tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg); +} + +/* A version of dh_sizemask from def-helper.h that doesn't rely on + preprocessor magic. */ +static inline int tcg_gen_sizemask(int n, int is_64bit, int is_signed) +{ + return (is_64bit << n*2) | (is_signed << (n*2 + 1)); +} + +/* helper calls */ +static inline void tcg_gen_helperN(void *func, int flags, int sizemask, + TCGArg ret, int nargs, TCGArg *args) +{ + TCGv_ptr fn; + fn = tcg_const_ptr((tcg_target_long)func); + tcg_gen_callN(&tcg_ctx, fn, flags, sizemask, ret, + nargs, args); + tcg_temp_free_ptr(fn); +} + +/* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently + reserved for helpers in tcg-runtime.c. These helpers are all const + and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST | + TCG_CALL_PURE. This may need to be adjusted if these functions + start to be used with other helpers. */ +static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret, + TCGv_i32 a, TCGv_i32 b) +{ + TCGv_ptr fn; + TCGArg args[2]; + fn = tcg_const_ptr((tcg_target_long)func); + args[0] = GET_TCGV_I32(a); + args[1] = GET_TCGV_I32(b); + tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, + GET_TCGV_I32(ret), 2, args); + tcg_temp_free_ptr(fn); +} + +static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret, + TCGv_i64 a, TCGv_i64 b) +{ + TCGv_ptr fn; + TCGArg args[2]; + fn = tcg_const_ptr((tcg_target_long)func); + args[0] = GET_TCGV_I64(a); + args[1] = GET_TCGV_I64(b); + tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, + GET_TCGV_I64(ret), 2, args); + tcg_temp_free_ptr(fn); +} + +/* 32 bit ops */ + +static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_add_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0 = tcg_const_i32(arg1); + tcg_gen_sub_i32(ret, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_sub_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCGV_EQUAL_I32(arg1, arg2)) { + tcg_gen_mov_i32(ret, arg1); + } else { + tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); + } +} + +static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_movi_i32(ret, 0); + } else if (arg2 == 0xffffffff) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_and_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCGV_EQUAL_I32(arg1, arg2)) { + tcg_gen_mov_i32(ret, arg1); + } else { + tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); + } +} + +static inline void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0xffffffff) { + tcg_gen_movi_i32(ret, 0xffffffff); + } else if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_or_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCGV_EQUAL_I32(arg1, arg2)) { + tcg_gen_movi_i32(ret, 0); + } else { + tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); + } +} + +static inline void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_xor_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_shl_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_shr_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_sar_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, + TCGv_i32 arg2, int label_index) +{ + tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index); +} + +static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, + int32_t arg2, int label_index) +{ + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_brcond_i32(cond, arg1, t0, label_index); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond); +} + +static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, int32_t arg2) +{ + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_setcond_i32(cond, ret, arg1, t0); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_mul_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +} + +#ifdef TCG_TARGET_HAS_div_i32 +static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); +} +#elif defined(TCG_TARGET_HAS_div2_i32) +static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t0, arg1, 31); + tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t0, arg1, 31); + tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_movi_i32(t0, 0); + tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_movi_i32(t0, 0); + tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} +#else +static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 0, 1); + sizemask |= tcg_gen_sizemask(1, 0, 1); + sizemask |= tcg_gen_sizemask(2, 0, 1); + + tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 0, 1); + sizemask |= tcg_gen_sizemask(1, 0, 1); + sizemask |= tcg_gen_sizemask(2, 0, 1); + + tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 0, 0); + sizemask |= tcg_gen_sizemask(1, 0, 0); + sizemask |= tcg_gen_sizemask(2, 0, 0); + + tcg_gen_helper32(tcg_helper_divu_i32, ret, arg1, arg2, 0); +} + +static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 0, 0); + sizemask |= tcg_gen_sizemask(1, 0, 0); + sizemask |= tcg_gen_sizemask(2, 0, 0); + + tcg_gen_helper32(tcg_helper_remu_i32, ret, arg1, arg2, 0); +} +#endif + +#if TCG_TARGET_REG_BITS == 32 + +static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (!TCGV_EQUAL_I64(ret, arg)) { + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + } +} + +static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) +{ + tcg_gen_movi_i32(TCGV_LOW(ret), arg); + tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32); +} + +static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31); +} + +static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + /* since arg2 and ret have different types, they cannot be the + same temporary */ +#ifdef TCG_TARGET_WORDS_BIGENDIAN + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); +#else + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4); +#endif +} + +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ +#ifdef TCG_TARGET_WORDS_BIGENDIAN + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); +#else + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4); +#endif +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); + tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +} + +static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); + tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +} + +static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); + tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +} + +/* XXX: use generic code when basic block handling is OK or CPU + specific code (x86) */ +static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_shl_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0); +} + +static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_shr_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0); +} + +static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_sar_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1); +} + +static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, + TCGv_i64 arg2, int label_index) +{ + tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, + TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2), cond, label_index); +} + +static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), + TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + TCGv_i32 t1; + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i32(); + + tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + + tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); + tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); + tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2)); + tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); + + tcg_gen_mov_i64(ret, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i32(t1); +} + +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); +} + +#else + +static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (!TCGV_EQUAL_I64(ret, arg)) + tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); +} + +static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) +{ + tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg); +} + +static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_i64 arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_i64 arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCGV_EQUAL_I64(arg1, arg2)) { + tcg_gen_mov_i64(ret, arg1); + } else { + tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); + } +} + +static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_and_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCGV_EQUAL_I64(arg1, arg2)) { + tcg_gen_mov_i64(ret, arg1); + } else { + tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); + } +} + +static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_or_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCGV_EQUAL_I64(arg1, arg2)) { + tcg_gen_movi_i64(ret, 0); + } else { + tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); + } +} + +static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_xor_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_shl_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_shr_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_sar_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, + TCGv_i64 arg2, int label_index) +{ + tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index); +} + +static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond); +} + +static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); +} + +#ifdef TCG_TARGET_HAS_div_i64 +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); +} +#elif defined(TCG_TARGET_HAS_div2_i64) +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_sari_i64(t0, arg1, 63); + tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_sari_i64(t0, arg1, 63); + tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t0, 0); + tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t0, 0); + tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} +#else +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); +} +#endif + +#endif + +static inline void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_add_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg1); + tcg_gen_sub_i64(ret, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_sub_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} +static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, + int64_t arg2, int label_index) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_brcond_i64(cond, arg1, t0, label_index); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_setcond_i64(cond, ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_mul_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + + +/***************************************/ +/* optional operations */ + +static inline void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext8s_i32 + tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg); +#else + tcg_gen_shli_i32(ret, arg, 24); + tcg_gen_sari_i32(ret, ret, 24); +#endif +} + +static inline void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext16s_i32 + tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg); +#else + tcg_gen_shli_i32(ret, arg, 16); + tcg_gen_sari_i32(ret, ret, 16); +#endif +} + +static inline void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext8u_i32 + tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg); +#else + tcg_gen_andi_i32(ret, arg, 0xffu); +#endif +} + +static inline void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext16u_i32 + tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg); +#else + tcg_gen_andi_i32(ret, arg, 0xffffu); +#endif +} + +/* Note: we assume the two high bytes are set to zero */ +static inline void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_bswap16_i32 + tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg); +#else + TCGv_i32 t0 = tcg_temp_new_i32(); + + tcg_gen_ext8u_i32(t0, arg); + tcg_gen_shli_i32(t0, t0, 8); + tcg_gen_shri_i32(ret, arg, 8); + tcg_gen_or_i32(ret, ret, t0); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_bswap32_i32 + tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); +#else + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + + tcg_gen_shli_i32(t0, arg, 24); + + tcg_gen_andi_i32(t1, arg, 0x0000ff00); + tcg_gen_shli_i32(t1, t1, 8); + tcg_gen_or_i32(t0, t0, t1); + + tcg_gen_shri_i32(t1, arg, 8); + tcg_gen_andi_i32(t1, t1, 0x0000ff00); + tcg_gen_or_i32(t0, t0, t1); + + tcg_gen_shri_i32(t1, arg, 24); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif +} + +#if TCG_TARGET_REG_BITS == 32 +static inline void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); +} + +static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), arg); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), arg); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +/* Note: we assume the six high bytes are set to zero */ +static inline void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +} + +/* Note: we assume the four high bytes are set to zero */ +static inline void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +} + +static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + + tcg_gen_bswap32_i32(t0, TCGV_LOW(arg)); + tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg)); + tcg_gen_mov_i32(TCGV_LOW(ret), t1); + tcg_gen_mov_i32(TCGV_HIGH(ret), t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +} +#else + +static inline void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext8s_i64 + tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg); +#else + tcg_gen_shli_i64(ret, arg, 56); + tcg_gen_sari_i64(ret, ret, 56); +#endif +} + +static inline void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext16s_i64 + tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg); +#else + tcg_gen_shli_i64(ret, arg, 48); + tcg_gen_sari_i64(ret, ret, 48); +#endif +} + +static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext32s_i64 + tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg); +#else + tcg_gen_shli_i64(ret, arg, 32); + tcg_gen_sari_i64(ret, ret, 32); +#endif +} + +static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext8u_i64 + tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg); +#else + tcg_gen_andi_i64(ret, arg, 0xffu); +#endif +} + +static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext16u_i64 + tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg); +#else + tcg_gen_andi_i64(ret, arg, 0xffffu); +#endif +} + +static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext32u_i64 + tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg); +#else + tcg_gen_andi_i64(ret, arg, 0xffffffffu); +#endif +} + +/* Note: we assume the target supports move between 32 and 64 bit + registers. This will probably break MIPS64 targets. */ +static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); +} + +/* Note: we assume the target supports move between 32 and 64 bit + registers */ +static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); +} + +/* Note: we assume the target supports move between 32 and 64 bit + registers */ +static inline void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); +} + +/* Note: we assume the six high bytes are set to zero */ +static inline void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_bswap16_i64 + tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg); +#else + TCGv_i64 t0 = tcg_temp_new_i64(); + + tcg_gen_ext8u_i64(t0, arg); + tcg_gen_shli_i64(t0, t0, 8); + tcg_gen_shri_i64(ret, arg, 8); + tcg_gen_or_i64(ret, ret, t0); + tcg_temp_free_i64(t0); +#endif +} + +/* Note: we assume the four high bytes are set to zero */ +static inline void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_bswap32_i64 + tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); +#else + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t0, arg, 24); + tcg_gen_ext32u_i64(t0, t0); + + tcg_gen_andi_i64(t1, arg, 0x0000ff00); + tcg_gen_shli_i64(t1, t1, 8); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 8); + tcg_gen_andi_i64(t1, t1, 0x0000ff00); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 24); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_bswap64_i64 + tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg); +#else + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t0, arg, 56); + + tcg_gen_andi_i64(t1, arg, 0x0000ff00); + tcg_gen_shli_i64(t1, t1, 40); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_andi_i64(t1, arg, 0x00ff0000); + tcg_gen_shli_i64(t1, t1, 24); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_andi_i64(t1, arg, 0xff000000); + tcg_gen_shli_i64(t1, t1, 8); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 8); + tcg_gen_andi_i64(t1, t1, 0xff000000); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 24); + tcg_gen_andi_i64(t1, t1, 0x00ff0000); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 40); + tcg_gen_andi_i64(t1, t1, 0x0000ff00); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 56); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +#endif + +static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_neg_i32 + tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); +#else + TCGv_i32 t0 = tcg_const_i32(0); + tcg_gen_sub_i32(ret, t0, arg); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_neg_i64 + tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); +#else + TCGv_i64 t0 = tcg_const_i64(0); + tcg_gen_sub_i64(ret, t0, arg); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_not_i32 + tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); +#else + tcg_gen_xori_i32(ret, arg, -1); +#endif +} + +static inline void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_not_i64 + tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); +#elif defined(TCG_TARGET_HAS_not_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); +#else + tcg_gen_xori_i64(ret, arg, -1); +#endif +} + +static inline void tcg_gen_discard_i32(TCGv_i32 arg) +{ + tcg_gen_op1_i32(INDEX_op_discard, arg); +} + +#if TCG_TARGET_REG_BITS == 32 +static inline void tcg_gen_discard_i64(TCGv_i64 arg) +{ + tcg_gen_discard_i32(TCGV_LOW(arg)); + tcg_gen_discard_i32(TCGV_HIGH(arg)); +} +#else +static inline void tcg_gen_discard_i64(TCGv_i64 arg) +{ + tcg_gen_op1_i64(INDEX_op_discard, arg); +} +#endif + +static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(TCGV_LOW(dest), low); + tcg_gen_mov_i32(TCGV_HIGH(dest), high); +#else + TCGv_i64 tmp = tcg_temp_new_i64(); + /* This extension is only needed for type correctness. + We may be able to do better given target specific information. */ + tcg_gen_extu_i32_i64(tmp, high); + tcg_gen_shli_i64(tmp, tmp, 32); + tcg_gen_extu_i32_i64(dest, low); + tcg_gen_or_i64(dest, dest, tmp); + tcg_temp_free_i64(tmp); +#endif +} + +static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, TCGv_i64 high) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_concat_i32_i64(dest, TCGV_LOW(low), TCGV_LOW(high)); +#else + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(dest, low); + tcg_gen_shli_i64(tmp, high, 32); + tcg_gen_or_i64(dest, dest, tmp); + tcg_temp_free_i64(tmp); +#endif +} + +static inline void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_andc_i32 + tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2); +#else + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_not_i32(t0, arg2); + tcg_gen_and_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_andc_i64 + tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_andc_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_not_i64(t0, arg2); + tcg_gen_and_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_eqv_i32 + tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); +#else + tcg_gen_xor_i32(ret, arg1, arg2); + tcg_gen_not_i32(ret, ret); +#endif +} + +static inline void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_eqv_i64 + tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_eqv_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + tcg_gen_xor_i64(ret, arg1, arg2); + tcg_gen_not_i64(ret, ret); +#endif +} + +static inline void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_nand_i32 + tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); +#else + tcg_gen_and_i32(ret, arg1, arg2); + tcg_gen_not_i32(ret, ret); +#endif +} + +static inline void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_nand_i64 + tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_nand_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + tcg_gen_and_i64(ret, arg1, arg2); + tcg_gen_not_i64(ret, ret); +#endif +} + +static inline void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_nor_i32 + tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); +#else + tcg_gen_or_i32(ret, arg1, arg2); + tcg_gen_not_i32(ret, ret); +#endif +} + +static inline void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_nor_i64 + tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_nor_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + tcg_gen_or_i64(ret, arg1, arg2); + tcg_gen_not_i64(ret, ret); +#endif +} + +static inline void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_orc_i32 + tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2); +#else + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_not_i32(t0, arg2); + tcg_gen_or_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_orc_i64 + tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_orc_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_not_i64(t0, arg2); + tcg_gen_or_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i32 + tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2); +#else + TCGv_i32 t0, t1; + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_shl_i32(t0, arg1, arg2); + tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_shr_i32(t1, arg1, t1); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif +} + +static inline void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i64 + tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2); +#else + TCGv_i64 t0, t1; + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_shl_i64(t0, arg1, arg2); + tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_shr_i64(t1, arg1, t1); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +static inline void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { +#ifdef TCG_TARGET_HAS_rot_i32 + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_rotl_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +#else + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_shli_i32(t0, arg1, arg2); + tcg_gen_shri_i32(t1, arg1, 32 - arg2); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif + } +} + +static inline void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { +#ifdef TCG_TARGET_HAS_rot_i64 + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_rotl_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +#else + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_shli_i64(t0, arg1, arg2); + tcg_gen_shri_i64(t1, arg1, 64 - arg2); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif + } +} + +static inline void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i32 + tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2); +#else + TCGv_i32 t0, t1; + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_shr_i32(t0, arg1, arg2); + tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_shl_i32(t1, arg1, t1); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif +} + +static inline void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i64 + tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2); +#else + TCGv_i64 t0, t1; + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_shr_i64(t0, arg1, arg2); + tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_shl_i64(t1, arg1, t1); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +static inline void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + tcg_gen_rotli_i32(ret, arg1, 32 - arg2); + } +} + +static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + tcg_gen_rotli_i64(ret, arg1, 64 - arg2); + } +} + +/***************************************/ +/* QEMU specific operations. Their type depend on the QEMU CPU + type. */ +#ifndef TARGET_LONG_BITS +#error must include QEMU headers +#endif + +#if TARGET_LONG_BITS == 32 +#define TCGv TCGv_i32 +#define tcg_temp_new() tcg_temp_new_i32() +#define tcg_global_reg_new tcg_global_reg_new_i32 +#define tcg_global_mem_new tcg_global_mem_new_i32 +#define tcg_temp_local_new() tcg_temp_local_new_i32() +#define tcg_temp_free tcg_temp_free_i32 +#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i32 +#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i32 +#define TCGV_UNUSED(x) TCGV_UNUSED_I32(x) +#define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b) +#else +#define TCGv TCGv_i64 +#define tcg_temp_new() tcg_temp_new_i64() +#define tcg_global_reg_new tcg_global_reg_new_i64 +#define tcg_global_mem_new tcg_global_mem_new_i64 +#define tcg_temp_local_new() tcg_temp_local_new_i64() +#define tcg_temp_free tcg_temp_free_i64 +#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i64 +#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i64 +#define TCGV_UNUSED(x) TCGV_UNUSED_I64(x) +#define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b) +#endif + +/* debug info: write the PC of the corresponding QEMU CPU instruction */ +static inline void tcg_gen_debug_insn_start(uint64_t pc) +{ + /* XXX: must really use a 32 bit size for TCGArg in all cases */ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + tcg_gen_op2ii(INDEX_op_debug_insn_start, + (uint32_t)(pc), (uint32_t)(pc >> 32)); +#else + tcg_gen_op1i(INDEX_op_debug_insn_start, pc); +#endif +} + +static inline void tcg_gen_exit_tb(tcg_target_long val) +{ + tcg_gen_op1i(INDEX_op_exit_tb, val); +} + +static inline void tcg_gen_goto_tb(int idx) +{ + tcg_gen_op1i(INDEX_op_goto_tb, idx); +} + +#if TCG_TARGET_REG_BITS == 32 +static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld8u, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld8u, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +#endif +} + +static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld8s, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld8s, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +#endif +} + +static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld16u, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld16u, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +#endif +} + +static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld16s, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld16s, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +#endif +} + +static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +#endif +} + +static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +#endif +} + +static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op4i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), addr, mem_index); +#else + tcg_gen_op5i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), + TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_st8, arg, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_st8, TCGV_LOW(arg), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_st16, arg, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_st16, TCGV_LOW(arg), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_st32, arg, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_st32, TCGV_LOW(arg), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op4i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), addr, + mem_index); +#else + tcg_gen_op5i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), + TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); +#endif +} + +#define tcg_gen_ld_ptr tcg_gen_ld_i32 +#define tcg_gen_discard_ptr tcg_gen_discard_i32 + +#else /* TCG_TARGET_REG_BITS == 32 */ + +static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8u, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8s, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16u, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16s, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32u, ret, addr, mem_index); +#endif +} + +static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32s, ret, addr, mem_index); +#endif +} + +static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_ld64, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_st8, arg, addr, mem_index); +} + +static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_st16, arg, addr, mem_index); +} + +static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_st32, arg, addr, mem_index); +} + +static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_st64, arg, addr, mem_index); +} + +#define tcg_gen_ld_ptr tcg_gen_ld_i64 +#define tcg_gen_discard_ptr tcg_gen_discard_i64 + +#endif /* TCG_TARGET_REG_BITS != 32 */ + +#if TARGET_LONG_BITS == 64 +#define tcg_gen_movi_tl tcg_gen_movi_i64 +#define tcg_gen_mov_tl tcg_gen_mov_i64 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64 +#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64 +#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64 +#define tcg_gen_ld_tl tcg_gen_ld_i64 +#define tcg_gen_st8_tl tcg_gen_st8_i64 +#define tcg_gen_st16_tl tcg_gen_st16_i64 +#define tcg_gen_st32_tl tcg_gen_st32_i64 +#define tcg_gen_st_tl tcg_gen_st_i64 +#define tcg_gen_add_tl tcg_gen_add_i64 +#define tcg_gen_addi_tl tcg_gen_addi_i64 +#define tcg_gen_sub_tl tcg_gen_sub_i64 +#define tcg_gen_neg_tl tcg_gen_neg_i64 +#define tcg_gen_subfi_tl tcg_gen_subfi_i64 +#define tcg_gen_subi_tl tcg_gen_subi_i64 +#define tcg_gen_and_tl tcg_gen_and_i64 +#define tcg_gen_andi_tl tcg_gen_andi_i64 +#define tcg_gen_or_tl tcg_gen_or_i64 +#define tcg_gen_ori_tl tcg_gen_ori_i64 +#define tcg_gen_xor_tl tcg_gen_xor_i64 +#define tcg_gen_xori_tl tcg_gen_xori_i64 +#define tcg_gen_not_tl tcg_gen_not_i64 +#define tcg_gen_shl_tl tcg_gen_shl_i64 +#define tcg_gen_shli_tl tcg_gen_shli_i64 +#define tcg_gen_shr_tl tcg_gen_shr_i64 +#define tcg_gen_shri_tl tcg_gen_shri_i64 +#define tcg_gen_sar_tl tcg_gen_sar_i64 +#define tcg_gen_sari_tl tcg_gen_sari_i64 +#define tcg_gen_brcond_tl tcg_gen_brcond_i64 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64 +#define tcg_gen_setcond_tl tcg_gen_setcond_i64 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64 +#define tcg_gen_mul_tl tcg_gen_mul_i64 +#define tcg_gen_muli_tl tcg_gen_muli_i64 +#define tcg_gen_div_tl tcg_gen_div_i64 +#define tcg_gen_rem_tl tcg_gen_rem_i64 +#define tcg_gen_divu_tl tcg_gen_divu_i64 +#define tcg_gen_remu_tl tcg_gen_remu_i64 +#define tcg_gen_discard_tl tcg_gen_discard_i64 +#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_mov_i64 +#define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64 +#define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64 +#define tcg_gen_extu_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i64 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i64 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i64 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64 +#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64 +#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 +#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 +#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 +#define tcg_gen_andc_tl tcg_gen_andc_i64 +#define tcg_gen_eqv_tl tcg_gen_eqv_i64 +#define tcg_gen_nand_tl tcg_gen_nand_i64 +#define tcg_gen_nor_tl tcg_gen_nor_i64 +#define tcg_gen_orc_tl tcg_gen_orc_i64 +#define tcg_gen_rotl_tl tcg_gen_rotl_i64 +#define tcg_gen_rotli_tl tcg_gen_rotli_i64 +#define tcg_gen_rotr_tl tcg_gen_rotr_i64 +#define tcg_gen_rotri_tl tcg_gen_rotri_i64 +#define tcg_const_tl tcg_const_i64 +#define tcg_const_local_tl tcg_const_local_i64 +#else +#define tcg_gen_movi_tl tcg_gen_movi_i32 +#define tcg_gen_mov_tl tcg_gen_mov_i32 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32 +#define tcg_gen_ld32u_tl tcg_gen_ld_i32 +#define tcg_gen_ld32s_tl tcg_gen_ld_i32 +#define tcg_gen_ld_tl tcg_gen_ld_i32 +#define tcg_gen_st8_tl tcg_gen_st8_i32 +#define tcg_gen_st16_tl tcg_gen_st16_i32 +#define tcg_gen_st32_tl tcg_gen_st_i32 +#define tcg_gen_st_tl tcg_gen_st_i32 +#define tcg_gen_add_tl tcg_gen_add_i32 +#define tcg_gen_addi_tl tcg_gen_addi_i32 +#define tcg_gen_sub_tl tcg_gen_sub_i32 +#define tcg_gen_neg_tl tcg_gen_neg_i32 +#define tcg_gen_subfi_tl tcg_gen_subfi_i32 +#define tcg_gen_subi_tl tcg_gen_subi_i32 +#define tcg_gen_and_tl tcg_gen_and_i32 +#define tcg_gen_andi_tl tcg_gen_andi_i32 +#define tcg_gen_or_tl tcg_gen_or_i32 +#define tcg_gen_ori_tl tcg_gen_ori_i32 +#define tcg_gen_xor_tl tcg_gen_xor_i32 +#define tcg_gen_xori_tl tcg_gen_xori_i32 +#define tcg_gen_not_tl tcg_gen_not_i32 +#define tcg_gen_shl_tl tcg_gen_shl_i32 +#define tcg_gen_shli_tl tcg_gen_shli_i32 +#define tcg_gen_shr_tl tcg_gen_shr_i32 +#define tcg_gen_shri_tl tcg_gen_shri_i32 +#define tcg_gen_sar_tl tcg_gen_sar_i32 +#define tcg_gen_sari_tl tcg_gen_sari_i32 +#define tcg_gen_brcond_tl tcg_gen_brcond_i32 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32 +#define tcg_gen_setcond_tl tcg_gen_setcond_i32 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32 +#define tcg_gen_mul_tl tcg_gen_mul_i32 +#define tcg_gen_muli_tl tcg_gen_muli_i32 +#define tcg_gen_div_tl tcg_gen_div_i32 +#define tcg_gen_rem_tl tcg_gen_rem_i32 +#define tcg_gen_divu_tl tcg_gen_divu_i32 +#define tcg_gen_remu_tl tcg_gen_remu_i32 +#define tcg_gen_discard_tl tcg_gen_discard_i32 +#define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32 +#define tcg_gen_extu_i32_tl tcg_gen_mov_i32 +#define tcg_gen_ext_i32_tl tcg_gen_mov_i32 +#define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i32 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i32 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i32 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32 +#define tcg_gen_ext32u_tl tcg_gen_mov_i32 +#define tcg_gen_ext32s_tl tcg_gen_mov_i32 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 +#define tcg_gen_andc_tl tcg_gen_andc_i32 +#define tcg_gen_eqv_tl tcg_gen_eqv_i32 +#define tcg_gen_nand_tl tcg_gen_nand_i32 +#define tcg_gen_nor_tl tcg_gen_nor_i32 +#define tcg_gen_orc_tl tcg_gen_orc_i32 +#define tcg_gen_rotl_tl tcg_gen_rotl_i32 +#define tcg_gen_rotli_tl tcg_gen_rotli_i32 +#define tcg_gen_rotr_tl tcg_gen_rotr_i32 +#define tcg_gen_rotri_tl tcg_gen_rotri_i32 +#define tcg_const_tl tcg_const_i32 +#define tcg_const_local_tl tcg_const_local_i32 +#endif + +#if TCG_TARGET_REG_BITS == 32 +#define tcg_gen_add_ptr tcg_gen_add_i32 +#define tcg_gen_addi_ptr tcg_gen_addi_i32 +#define tcg_gen_ext_i32_ptr tcg_gen_mov_i32 +#else /* TCG_TARGET_REG_BITS == 32 */ +#define tcg_gen_add_ptr tcg_gen_add_i64 +#define tcg_gen_addi_ptr tcg_gen_addi_i64 +#define tcg_gen_ext_i32_ptr tcg_gen_ext_i32_i64 +#endif /* TCG_TARGET_REG_BITS != 32 */ diff --git a/src/recompiler/tcg/tcg-opc.h b/src/recompiler/tcg/tcg-opc.h new file mode 100644 index 00000000..2a98fed9 --- /dev/null +++ b/src/recompiler/tcg/tcg-opc.h @@ -0,0 +1,304 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * DEF(name, oargs, iargs, cargs, flags) + */ + +/* predefined ops */ +DEF(end, 0, 0, 0, 0) /* must be kept first */ +DEF(nop, 0, 0, 0, 0) +DEF(nop1, 0, 0, 1, 0) +DEF(nop2, 0, 0, 2, 0) +DEF(nop3, 0, 0, 3, 0) +DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */ + +DEF(discard, 1, 0, 0, 0) + +DEF(set_label, 0, 0, 1, 0) +DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */ +DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) + +DEF(mov_i32, 1, 1, 0, 0) +DEF(movi_i32, 1, 0, 1, 0) +DEF(setcond_i32, 1, 2, 1, 0) +/* load/store */ +DEF(ld8u_i32, 1, 1, 1, 0) +DEF(ld8s_i32, 1, 1, 1, 0) +DEF(ld16u_i32, 1, 1, 1, 0) +DEF(ld16s_i32, 1, 1, 1, 0) +DEF(ld_i32, 1, 1, 1, 0) +DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +/* arith */ +DEF(add_i32, 1, 2, 0, 0) +DEF(sub_i32, 1, 2, 0, 0) +DEF(mul_i32, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_div_i32 +DEF(div_i32, 1, 2, 0, 0) +DEF(divu_i32, 1, 2, 0, 0) +DEF(rem_i32, 1, 2, 0, 0) +DEF(remu_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_div2_i32 +DEF(div2_i32, 2, 3, 0, 0) +DEF(divu2_i32, 2, 3, 0, 0) +#endif +DEF(and_i32, 1, 2, 0, 0) +DEF(or_i32, 1, 2, 0, 0) +DEF(xor_i32, 1, 2, 0, 0) +/* shifts/rotates */ +DEF(shl_i32, 1, 2, 0, 0) +DEF(shr_i32, 1, 2, 0, 0) +DEF(sar_i32, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_rot_i32 +DEF(rotl_i32, 1, 2, 0, 0) +DEF(rotr_i32, 1, 2, 0, 0) +#endif + +DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +#if TCG_TARGET_REG_BITS == 32 +DEF(add2_i32, 2, 4, 0, 0) +DEF(sub2_i32, 2, 4, 0, 0) +DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(mulu2_i32, 2, 2, 0, 0) +DEF(setcond2_i32, 1, 4, 1, 0) +#endif +#ifdef TCG_TARGET_HAS_ext8s_i32 +DEF(ext8s_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16s_i32 +DEF(ext16s_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext8u_i32 +DEF(ext8u_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16u_i32 +DEF(ext16u_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap16_i32 +DEF(bswap16_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap32_i32 +DEF(bswap32_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_not_i32 +DEF(not_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_neg_i32 +DEF(neg_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_andc_i32 +DEF(andc_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_orc_i32 +DEF(orc_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_eqv_i32 +DEF(eqv_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nand_i32 +DEF(nand_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nor_i32 +DEF(nor_i32, 1, 2, 0, 0) +#endif + +#if TCG_TARGET_REG_BITS == 64 +DEF(mov_i64, 1, 1, 0, 0) +DEF(movi_i64, 1, 0, 1, 0) +DEF(setcond_i64, 1, 2, 1, 0) +/* load/store */ +DEF(ld8u_i64, 1, 1, 1, 0) +DEF(ld8s_i64, 1, 1, 1, 0) +DEF(ld16u_i64, 1, 1, 1, 0) +DEF(ld16s_i64, 1, 1, 1, 0) +DEF(ld32u_i64, 1, 1, 1, 0) +DEF(ld32s_i64, 1, 1, 1, 0) +DEF(ld_i64, 1, 1, 1, 0) +DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +/* arith */ +DEF(add_i64, 1, 2, 0, 0) +DEF(sub_i64, 1, 2, 0, 0) +DEF(mul_i64, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_div_i64 +DEF(div_i64, 1, 2, 0, 0) +DEF(divu_i64, 1, 2, 0, 0) +DEF(rem_i64, 1, 2, 0, 0) +DEF(remu_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_div2_i64 +DEF(div2_i64, 2, 3, 0, 0) +DEF(divu2_i64, 2, 3, 0, 0) +#endif +DEF(and_i64, 1, 2, 0, 0) +DEF(or_i64, 1, 2, 0, 0) +DEF(xor_i64, 1, 2, 0, 0) +/* shifts/rotates */ +DEF(shl_i64, 1, 2, 0, 0) +DEF(shr_i64, 1, 2, 0, 0) +DEF(sar_i64, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_rot_i64 +DEF(rotl_i64, 1, 2, 0, 0) +DEF(rotr_i64, 1, 2, 0, 0) +#endif + +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +#ifdef TCG_TARGET_HAS_ext8s_i64 +DEF(ext8s_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16s_i64 +DEF(ext16s_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext32s_i64 +DEF(ext32s_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext8u_i64 +DEF(ext8u_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16u_i64 +DEF(ext16u_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext32u_i64 +DEF(ext32u_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap16_i64 +DEF(bswap16_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap32_i64 +DEF(bswap32_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap64_i64 +DEF(bswap64_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_not_i64 +DEF(not_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_neg_i64 +DEF(neg_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_andc_i64 +DEF(andc_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_orc_i64 +DEF(orc_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_eqv_i64 +DEF(eqv_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nand_i64 +DEF(nand_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nor_i64 +DEF(nor_i64, 1, 2, 0, 0) +#endif +#endif + +/* QEMU specific */ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +DEF(debug_insn_start, 0, 0, 2, 0) +#else +DEF(debug_insn_start, 0, 0, 1, 0) +#endif +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +/* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op + constants must be defined */ +#if TCG_TARGET_REG_BITS == 32 +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld32, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif + +#if TARGET_LONG_BITS == 32 +DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif + +#else /* TCG_TARGET_REG_BITS == 32 */ + +DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) + +DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) + +#endif /* TCG_TARGET_REG_BITS != 32 */ + +#undef DEF diff --git a/src/recompiler/tcg/tcg-runtime.h b/src/recompiler/tcg/tcg-runtime.h new file mode 100644 index 00000000..5615b133 --- /dev/null +++ b/src/recompiler/tcg/tcg-runtime.h @@ -0,0 +1,18 @@ +#ifndef TCG_RUNTIME_H +#define TCG_RUNTIME_H + +/* tcg-runtime.c */ +int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2); +int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2); +uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2); +uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2); + +int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2); +uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2); +uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2); + +#endif diff --git a/src/recompiler/tcg/tcg.c b/src/recompiler/tcg/tcg.c new file mode 100644 index 00000000..4e636099 --- /dev/null +++ b/src/recompiler/tcg/tcg.c @@ -0,0 +1,2212 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* define it to use liveness analysis (better code) */ +#define USE_LIVENESS_ANALYSIS + +#include "config.h" + +#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG) +/* define it to suppress various consistency checks (faster) */ +#define NDEBUG +#endif + +#ifndef VBOX +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#ifdef _WIN32 +#include <malloc.h> +#endif +#ifdef _AIX +#include <alloca.h> +#endif +#else /* VBOX */ +# include <stdio.h> +# include "osdep.h" +#endif /* VBOX */ + +#include "qemu-common.h" +#include "cache-utils.h" +#include "host-utils.h" +#include "qemu-timer.h" + +/* Note: the long term plan is to reduce the dependancies on the QEMU + CPU definitions. Currently they are used for qemu_ld/st + instructions */ +#define NO_CPU_IO_DEFS +#include "cpu.h" +#include "exec-all.h" + +#include "tcg-op.h" +#include "elf.h" + +#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE) +#error GUEST_BASE not supported on this host. +#endif + +#ifdef VBOX +/* + * Liveness analysis doesn't work well with 32-bit hosts and 64-bit targets, + * second element of the register pair to store 64-bit value is considered + * dead, it seems. */ + /** @todo re-test this */ +# if defined(TARGET_X86_64) && (TCG_TARGET_REG_BITS == 32) +# undef USE_LIVENESS_ANALYSIS +# endif +#endif /* VBOX */ + +static void tcg_target_init(TCGContext *s); +static void tcg_target_qemu_prologue(TCGContext *s); +static void patch_reloc(uint8_t *code_ptr, int type, + tcg_target_long value, tcg_target_long addend); + +static TCGOpDef tcg_op_defs[] = { +#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, +#include "tcg-opc.h" +#undef DEF +}; + +static TCGRegSet tcg_target_available_regs[2]; +static TCGRegSet tcg_target_call_clobber_regs; + +/* XXX: move that inside the context */ +uint16_t *gen_opc_ptr; +TCGArg *gen_opparam_ptr; + +static inline void tcg_out8(TCGContext *s, uint8_t v) +{ + *s->code_ptr++ = v; +} + +static inline void tcg_out16(TCGContext *s, uint16_t v) +{ + *(uint16_t *)s->code_ptr = v; + s->code_ptr += 2; +} + +static inline void tcg_out32(TCGContext *s, uint32_t v) +{ + *(uint32_t *)s->code_ptr = v; + s->code_ptr += 4; +} + +/* label relocation processing */ + +static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, + int label_index, tcg_target_long addend) +{ + TCGLabel *l; + TCGRelocation *r; + + l = &s->labels[label_index]; + if (l->has_value) { + /* FIXME: This may break relocations on RISC targets that + modify instruction fields in place. The caller may not have + written the initial value. */ + patch_reloc(code_ptr, type, l->u.value, addend); + } else { + /* add a new relocation entry */ + r = tcg_malloc(sizeof(TCGRelocation)); + r->type = type; + r->ptr = code_ptr; + r->addend = addend; + r->next = l->u.first_reloc; + l->u.first_reloc = r; + } +} + +static void tcg_out_label(TCGContext *s, int label_index, + tcg_target_long value) +{ + TCGLabel *l; + TCGRelocation *r; + + l = &s->labels[label_index]; + if (l->has_value) + tcg_abort(); + r = l->u.first_reloc; + while (r != NULL) { + patch_reloc(r->ptr, r->type, value, r->addend); + r = r->next; + } + l->has_value = 1; + l->u.value = value; +} + +int gen_new_label(void) +{ + TCGContext *s = &tcg_ctx; + int idx; + TCGLabel *l; + + if (s->nb_labels >= TCG_MAX_LABELS) + tcg_abort(); + idx = s->nb_labels++; + l = &s->labels[idx]; + l->has_value = 0; + l->u.first_reloc = NULL; + return idx; +} + +#include "tcg-target.c" + +/* pool based memory allocation */ +void *tcg_malloc_internal(TCGContext *s, int size) +{ + TCGPool *p; + int pool_size; + + if (size > TCG_POOL_CHUNK_SIZE) { + /* big malloc: insert a new pool (XXX: could optimize) */ + p = qemu_malloc(sizeof(TCGPool) + size); + p->size = size; + if (s->pool_current) + s->pool_current->next = p; + else + s->pool_first = p; + p->next = s->pool_current; + } else { + p = s->pool_current; + if (!p) { + p = s->pool_first; + if (!p) + goto new_pool; + } else { + if (!p->next) { + new_pool: + pool_size = TCG_POOL_CHUNK_SIZE; + p = qemu_malloc(sizeof(TCGPool) + pool_size); + p->size = pool_size; + p->next = NULL; + if (s->pool_current) + s->pool_current->next = p; + else + s->pool_first = p; + } else { + p = p->next; + } + } + } + s->pool_current = p; + s->pool_cur = p->data + size; + s->pool_end = p->data + p->size; + return p->data; +} + +void tcg_pool_reset(TCGContext *s) +{ + s->pool_cur = s->pool_end = NULL; + s->pool_current = NULL; +} + +void tcg_context_init(TCGContext *s) +{ + int op, total_args, n; + TCGOpDef *def; + TCGArgConstraint *args_ct; + int *sorted_args; + + memset(s, 0, sizeof(*s)); + s->temps = s->static_temps; + s->nb_globals = 0; + + /* Count total number of arguments and allocate the corresponding + space */ + total_args = 0; + for(op = 0; op < NB_OPS; op++) { + def = &tcg_op_defs[op]; + n = def->nb_iargs + def->nb_oargs; + total_args += n; + } + + args_ct = qemu_malloc(sizeof(TCGArgConstraint) * total_args); + sorted_args = qemu_malloc(sizeof(int) * total_args); + + for(op = 0; op < NB_OPS; op++) { + def = &tcg_op_defs[op]; + def->args_ct = args_ct; + def->sorted_args = sorted_args; + n = def->nb_iargs + def->nb_oargs; + sorted_args += n; + args_ct += n; + } + + tcg_target_init(s); +} + +void tcg_prologue_init(TCGContext *s) +{ + /* init global prologue and epilogue */ + s->code_buf = code_gen_prologue; + s->code_ptr = s->code_buf; + tcg_target_qemu_prologue(s); + flush_icache_range((uintptr_t)s->code_buf, + (uintptr_t)s->code_ptr); +} + +void tcg_set_frame(TCGContext *s, int reg, + tcg_target_long start, tcg_target_long size) +{ + s->frame_start = start; + s->frame_end = start + size; + s->frame_reg = reg; +} + +void tcg_func_start(TCGContext *s) +{ + int i; + tcg_pool_reset(s); + s->nb_temps = s->nb_globals; + for(i = 0; i < (TCG_TYPE_COUNT * 2); i++) + s->first_free_temp[i] = -1; + s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); + s->nb_labels = 0; + s->current_frame_offset = s->frame_start; + + gen_opc_ptr = gen_opc_buf; + gen_opparam_ptr = gen_opparam_buf; +} + +static inline void tcg_temp_alloc(TCGContext *s, int n) +{ + if (n > TCG_MAX_TEMPS) + tcg_abort(); +} + +static inline int tcg_global_reg_new_internal(TCGType type, int reg, + const char *name) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int idx; + +#if TCG_TARGET_REG_BITS == 32 + if (type != TCG_TYPE_I32) + tcg_abort(); +#endif + if (tcg_regset_test_reg(s->reserved_regs, reg)) + tcg_abort(); + idx = s->nb_globals; + tcg_temp_alloc(s, s->nb_globals + 1); + ts = &s->temps[s->nb_globals]; + ts->base_type = type; + ts->type = type; + ts->fixed_reg = 1; + ts->reg = reg; + ts->name = name; + s->nb_globals++; + tcg_regset_set_reg(s->reserved_regs, reg); + return idx; +} + +TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name) +{ + int idx; + + idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name); + return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name) +{ + int idx; + + idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name); + return MAKE_TCGV_I64(idx); +} + +static inline int tcg_global_mem_new_internal(TCGType type, int reg, + tcg_target_long offset, + const char *name) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int idx; + + idx = s->nb_globals; +#if TCG_TARGET_REG_BITS == 32 + if (type == TCG_TYPE_I64) { + char buf[64]; + tcg_temp_alloc(s, s->nb_globals + 2); + ts = &s->temps[s->nb_globals]; + ts->base_type = type; + ts->type = TCG_TYPE_I32; + ts->fixed_reg = 0; + ts->mem_allocated = 1; + ts->mem_reg = reg; +#ifdef TCG_TARGET_WORDS_BIGENDIAN + ts->mem_offset = offset + 4; +#else + ts->mem_offset = offset; +#endif + pstrcpy(buf, sizeof(buf), name); + pstrcat(buf, sizeof(buf), "_0"); + ts->name = strdup(buf); + ts++; + + ts->base_type = type; + ts->type = TCG_TYPE_I32; + ts->fixed_reg = 0; + ts->mem_allocated = 1; + ts->mem_reg = reg; +#ifdef TCG_TARGET_WORDS_BIGENDIAN + ts->mem_offset = offset; +#else + ts->mem_offset = offset + 4; +#endif + pstrcpy(buf, sizeof(buf), name); + pstrcat(buf, sizeof(buf), "_1"); + ts->name = strdup(buf); + + s->nb_globals += 2; + } else +#endif + { + tcg_temp_alloc(s, s->nb_globals + 1); + ts = &s->temps[s->nb_globals]; + ts->base_type = type; + ts->type = type; + ts->fixed_reg = 0; + ts->mem_allocated = 1; + ts->mem_reg = reg; + ts->mem_offset = offset; + ts->name = name; + s->nb_globals++; + } + return idx; +} + +TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset, + const char *name) +{ + int idx; + + idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); + return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset, + const char *name) +{ + int idx; + + idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); + return MAKE_TCGV_I64(idx); +} + +static inline int tcg_temp_new_internal(TCGType type, int temp_local) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int idx, k; + + k = type; + if (temp_local) + k += TCG_TYPE_COUNT; + idx = s->first_free_temp[k]; + if (idx != -1) { + /* There is already an available temp with the + right type */ + ts = &s->temps[idx]; + s->first_free_temp[k] = ts->next_free_temp; + ts->temp_allocated = 1; + assert(ts->temp_local == temp_local); + } else { + idx = s->nb_temps; +#if TCG_TARGET_REG_BITS == 32 + if (type == TCG_TYPE_I64) { + tcg_temp_alloc(s, s->nb_temps + 2); + ts = &s->temps[s->nb_temps]; + ts->base_type = type; + ts->type = TCG_TYPE_I32; + ts->temp_allocated = 1; + ts->temp_local = temp_local; + ts->name = NULL; + ts++; + ts->base_type = TCG_TYPE_I32; + ts->type = TCG_TYPE_I32; + ts->temp_allocated = 1; + ts->temp_local = temp_local; + ts->name = NULL; + s->nb_temps += 2; + } else +#endif + { + tcg_temp_alloc(s, s->nb_temps + 1); + ts = &s->temps[s->nb_temps]; + ts->base_type = type; + ts->type = type; + ts->temp_allocated = 1; + ts->temp_local = temp_local; + ts->name = NULL; + s->nb_temps++; + } + } + return idx; +} + +TCGv_i32 tcg_temp_new_internal_i32(int temp_local) +{ + int idx; + + idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); + return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_temp_new_internal_i64(int temp_local) +{ + int idx; + + idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); + return MAKE_TCGV_I64(idx); +} + +static inline void tcg_temp_free_internal(int idx) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int k; + + assert(idx >= s->nb_globals && idx < s->nb_temps); + ts = &s->temps[idx]; + assert(ts->temp_allocated != 0); + ts->temp_allocated = 0; + k = ts->base_type; + if (ts->temp_local) + k += TCG_TYPE_COUNT; + ts->next_free_temp = s->first_free_temp[k]; + s->first_free_temp[k] = idx; +} + +void tcg_temp_free_i32(TCGv_i32 arg) +{ + tcg_temp_free_internal(GET_TCGV_I32(arg)); +} + +void tcg_temp_free_i64(TCGv_i64 arg) +{ + tcg_temp_free_internal(GET_TCGV_I64(arg)); +} + +TCGv_i32 tcg_const_i32(int32_t val) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_movi_i32(t0, val); + return t0; +} + +TCGv_i64 tcg_const_i64(int64_t val) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t0, val); + return t0; +} + +TCGv_i32 tcg_const_local_i32(int32_t val) +{ + TCGv_i32 t0; + t0 = tcg_temp_local_new_i32(); + tcg_gen_movi_i32(t0, val); + return t0; +} + +TCGv_i64 tcg_const_local_i64(int64_t val) +{ + TCGv_i64 t0; + t0 = tcg_temp_local_new_i64(); + tcg_gen_movi_i64(t0, val); + return t0; +} + +void tcg_register_helper(void *func, const char *name) +{ + TCGContext *s = &tcg_ctx; + int n; + if ((s->nb_helpers + 1) > s->allocated_helpers) { + n = s->allocated_helpers; + if (n == 0) { + n = 4; + } else { + n *= 2; + } +#ifdef VBOX + s->helpers = qemu_realloc(s->helpers, n * sizeof(TCGHelperInfo)); +#else + s->helpers = realloc(s->helpers, n * sizeof(TCGHelperInfo)); +#endif + s->allocated_helpers = n; + } + s->helpers[s->nb_helpers].func = (tcg_target_ulong)func; + s->helpers[s->nb_helpers].name = name; + s->nb_helpers++; +} + +/* Note: we convert the 64 bit args to 32 bit and do some alignment + and endian swap. Maybe it would be better to do the alignment + and endian swap in tcg_reg_alloc_call(). */ +void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, + int sizemask, TCGArg ret, int nargs, TCGArg *args) +{ +#ifdef TCG_TARGET_I386 + int call_type; +#endif + int i; + int real_args; + int nb_rets; + TCGArg *nparam; + +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 + for (i = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + int is_signed = sizemask & (2 << (i+1)*2); + if (!is_64bit) { + TCGv_i64 temp = tcg_temp_new_i64(); + TCGv_i64 orig = MAKE_TCGV_I64(args[i]); + if (is_signed) { + tcg_gen_ext32s_i64(temp, orig); + } else { + tcg_gen_ext32u_i64(temp, orig); + } + args[i] = GET_TCGV_I64(temp); + } + } +#endif /* TCG_TARGET_EXTEND_ARGS */ + + *gen_opc_ptr++ = INDEX_op_call; + nparam = gen_opparam_ptr++; +#ifdef TCG_TARGET_I386 + call_type = (flags & TCG_CALL_TYPE_MASK); +#endif + if (ret != TCG_CALL_DUMMY_ARG) { +#if TCG_TARGET_REG_BITS < 64 + if (sizemask & 1) { +#ifdef TCG_TARGET_WORDS_BIGENDIAN + *gen_opparam_ptr++ = ret + 1; + *gen_opparam_ptr++ = ret; +#else + *gen_opparam_ptr++ = ret; + *gen_opparam_ptr++ = ret + 1; +#endif + nb_rets = 2; + } else +#endif + { + *gen_opparam_ptr++ = ret; + nb_rets = 1; + } + } else { + nb_rets = 0; + } + real_args = 0; + for (i = 0; i < nargs; i++) { +#if TCG_TARGET_REG_BITS < 64 + int is_64bit = sizemask & (1 << (i+1)*2); + if (is_64bit) { +#ifdef TCG_TARGET_I386 + /* REGPARM case: if the third parameter is 64 bit, it is + allocated on the stack */ + if (i == 2 && call_type == TCG_CALL_TYPE_REGPARM) { + call_type = TCG_CALL_TYPE_REGPARM_2; + flags = (flags & ~TCG_CALL_TYPE_MASK) | call_type; + } +#endif +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + /* some targets want aligned 64 bit args */ + if (real_args & 1) { + *gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG; + real_args++; + } +#endif + /* If stack grows up, then we will be placing successive + arguments at lower addresses, which means we need to + reverse the order compared to how we would normally + treat either big or little-endian. For those arguments + that will wind up in registers, this still works for + HPPA (the only current STACK_GROWSUP target) since the + argument registers are *also* allocated in decreasing + order. If another such target is added, this logic may + have to get more complicated to differentiate between + stack arguments and register arguments. */ +#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) + *gen_opparam_ptr++ = args[i] + 1; + *gen_opparam_ptr++ = args[i]; +#else + *gen_opparam_ptr++ = args[i]; + *gen_opparam_ptr++ = args[i] + 1; +#endif + real_args += 2; + continue; + } +#endif /* TCG_TARGET_REG_BITS < 64 */ + + *gen_opparam_ptr++ = args[i]; + real_args++; + } + *gen_opparam_ptr++ = GET_TCGV_PTR(func); + + *gen_opparam_ptr++ = flags; + + *nparam = (nb_rets << 16) | (real_args + 1); + + /* total parameters, needed to go backward in the instruction stream */ + *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3; + +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 + for (i = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + if (!is_64bit) { + TCGv_i64 temp = MAKE_TCGV_I64(args[i]); + tcg_temp_free_i64(temp); + } + } +#endif /* TCG_TARGET_EXTEND_ARGS */ +} + +#if TCG_TARGET_REG_BITS == 32 +void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, + int c, int right, int arith) +{ + if (c == 0) { + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); + } else if (c >= 32) { + c -= 32; + if (right) { + if (arith) { + tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31); + } else { + tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } + } else { + tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c); + tcg_gen_movi_i32(TCGV_LOW(ret), 0); + } + } else { + TCGv_i32 t0, t1; + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + if (right) { + tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c); + if (arith) + tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c); + else + tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c); + tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); + tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0); + tcg_gen_mov_i32(TCGV_HIGH(ret), t1); + } else { + tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); + /* Note: ret can be the same as arg1, so we use t1 */ + tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c); + tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); + tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0); + tcg_gen_mov_i32(TCGV_LOW(ret), t1); + } + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + } +} +#endif + + +static void tcg_reg_alloc_start(TCGContext *s) +{ + int i; + TCGTemp *ts; + for(i = 0; i < s->nb_globals; i++) { + ts = &s->temps[i]; + if (ts->fixed_reg) { + ts->val_type = TEMP_VAL_REG; + } else { + ts->val_type = TEMP_VAL_MEM; + } + } + for(i = s->nb_globals; i < s->nb_temps; i++) { + ts = &s->temps[i]; + ts->val_type = TEMP_VAL_DEAD; + ts->mem_allocated = 0; + ts->fixed_reg = 0; + } + for(i = 0; i < TCG_TARGET_NB_REGS; i++) { + s->reg_to_temp[i] = -1; + } +} + +static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size, + int idx) +{ + TCGTemp *ts; + + ts = &s->temps[idx]; + if (idx < s->nb_globals) { + pstrcpy(buf, buf_size, ts->name); + } else { + if (ts->temp_local) + snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); + else + snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); + } + return buf; +} + +char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg) +{ + return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg)); +} + +char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg) +{ + return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg)); +} + +static int helper_cmp(const void *p1, const void *p2) +{ + const TCGHelperInfo *th1 = p1; + const TCGHelperInfo *th2 = p2; + if (th1->func < th2->func) + return -1; + else if (th1->func == th2->func) + return 0; + else + return 1; +} + +/* find helper definition (Note: A hash table would be better) */ +static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val) +{ + int m, m_min, m_max; + TCGHelperInfo *th; + tcg_target_ulong v; + + if (unlikely(!s->helpers_sorted)) { +#ifdef VBOX + qemu_qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo), + helper_cmp); +#else + qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo), + helper_cmp); +#endif + s->helpers_sorted = 1; + } + + /* binary search */ + m_min = 0; + m_max = s->nb_helpers - 1; + while (m_min <= m_max) { + m = (m_min + m_max) >> 1; + th = &s->helpers[m]; + v = th->func; + if (v == val) + return th; + else if (val < v) { + m_max = m - 1; + } else { + m_min = m + 1; + } + } + return NULL; +} + +static const char * const cond_name[] = +{ + [TCG_COND_EQ] = "eq", + [TCG_COND_NE] = "ne", + [TCG_COND_LT] = "lt", + [TCG_COND_GE] = "ge", + [TCG_COND_LE] = "le", + [TCG_COND_GT] = "gt", + [TCG_COND_LTU] = "ltu", + [TCG_COND_GEU] = "geu", + [TCG_COND_LEU] = "leu", + [TCG_COND_GTU] = "gtu" +}; + +void tcg_dump_ops(TCGContext *s, FILE *outfile) +{ + const uint16_t *opc_ptr; + const TCGArg *args; + TCGArg arg; + TCGOpcode c; + int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn; + const TCGOpDef *def; + char buf[128]; + + first_insn = 1; + opc_ptr = gen_opc_buf; + args = gen_opparam_buf; + while (opc_ptr < gen_opc_ptr) { + c = *opc_ptr++; + def = &tcg_op_defs[c]; + if (c == INDEX_op_debug_insn_start) { + uint64_t pc; +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + pc = ((uint64_t)args[1] << 32) | args[0]; +#else + pc = args[0]; +#endif + if (!first_insn) + fprintf(outfile, "\n"); + fprintf(outfile, " ---- 0x%" PRIx64, pc); + first_insn = 0; + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + } else if (c == INDEX_op_call) { + TCGArg arg; + + /* variable number of arguments */ + arg = *args++; + nb_oargs = arg >> 16; + nb_iargs = arg & 0xffff; + nb_cargs = def->nb_cargs; + + fprintf(outfile, " %s ", def->name); + + /* function name */ + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + nb_iargs - 1])); + /* flags */ + fprintf(outfile, ",$0x%" TCG_PRIlx, + args[nb_oargs + nb_iargs]); + /* nb out args */ + fprintf(outfile, ",$%d", nb_oargs); + for(i = 0; i < nb_oargs; i++) { + fprintf(outfile, ","); + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[i])); + } + for(i = 0; i < (nb_iargs - 1); i++) { + fprintf(outfile, ","); + if (args[nb_oargs + i] == TCG_CALL_DUMMY_ARG) { + fprintf(outfile, "<dummy>"); + } else { + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + i])); + } + } + } else if (c == INDEX_op_movi_i32 +#if TCG_TARGET_REG_BITS == 64 + || c == INDEX_op_movi_i64 +#endif + ) { + tcg_target_ulong val; + TCGHelperInfo *th; + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + fprintf(outfile, " %s %s,$", def->name, + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0])); + val = args[1]; + th = tcg_find_helper(s, val); + if (th) { + fprintf(outfile, "%s", th->name); + } else { + if (c == INDEX_op_movi_i32) + fprintf(outfile, "0x%x", (uint32_t)val); + else + fprintf(outfile, "0x%" PRIx64 , (uint64_t)val); + } + } else { + fprintf(outfile, " %s ", def->name); + if (c == INDEX_op_nopn) { + /* variable number of arguments */ + nb_cargs = *args; + nb_oargs = 0; + nb_iargs = 0; + } else { + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + } + + k = 0; + for(i = 0; i < nb_oargs; i++) { + if (k != 0) + fprintf(outfile, ","); + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++])); + } + for(i = 0; i < nb_iargs; i++) { + if (k != 0) + fprintf(outfile, ","); + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++])); + } + switch (c) { + case INDEX_op_brcond_i32: +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_brcond_i64: +#endif + case INDEX_op_setcond_i32: +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_setcond2_i32: +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_setcond_i64: +#endif + if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) + fprintf(outfile, ",%s", cond_name[args[k++]]); + else + fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]); + i = 1; + break; + default: + i = 0; + break; + } + for(; i < nb_cargs; i++) { + if (k != 0) + fprintf(outfile, ","); + arg = args[k++]; + fprintf(outfile, "$0x%" TCG_PRIlx, arg); + } + } + fprintf(outfile, "\n"); + args += nb_iargs + nb_oargs + nb_cargs; + } +} + +/* we give more priority to constraints with less registers */ +static int get_constraint_priority(const TCGOpDef *def, int k) +{ + const TCGArgConstraint *arg_ct; + + int i, n; + arg_ct = &def->args_ct[k]; + if (arg_ct->ct & TCG_CT_ALIAS) { + /* an alias is equivalent to a single register */ + n = 1; + } else { + if (!(arg_ct->ct & TCG_CT_REG)) + return 0; + n = 0; + for(i = 0; i < TCG_TARGET_NB_REGS; i++) { + if (tcg_regset_test_reg(arg_ct->u.regs, i)) + n++; + } + } + return TCG_TARGET_NB_REGS - n + 1; +} + +/* sort from highest priority to lowest */ +static void sort_constraints(TCGOpDef *def, int start, int n) +{ + int i, j, p1, p2, tmp; + + for(i = 0; i < n; i++) + def->sorted_args[start + i] = start + i; + if (n <= 1) + return; + for(i = 0; i < n - 1; i++) { + for(j = i + 1; j < n; j++) { + p1 = get_constraint_priority(def, def->sorted_args[start + i]); + p2 = get_constraint_priority(def, def->sorted_args[start + j]); + if (p1 < p2) { + tmp = def->sorted_args[start + i]; + def->sorted_args[start + i] = def->sorted_args[start + j]; + def->sorted_args[start + j] = tmp; + } + } + } +} + +void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) +{ + TCGOpcode op; + TCGOpDef *def; + const char *ct_str; + int i, nb_args; + + for(;;) { + if (tdefs->op == (TCGOpcode)-1) + break; + op = tdefs->op; + assert((unsigned)op < (unsigned)NB_OPS); + def = &tcg_op_defs[op]; +#if defined(CONFIG_DEBUG_TCG) + /* Duplicate entry in op definitions? */ + assert(!def->used); + def->used = 1; +#endif + nb_args = def->nb_iargs + def->nb_oargs; + for(i = 0; i < nb_args; i++) { + ct_str = tdefs->args_ct_str[i]; + /* Incomplete TCGTargetOpDef entry? */ + assert(ct_str != NULL); + tcg_regset_clear(def->args_ct[i].u.regs); + def->args_ct[i].ct = 0; + if (ct_str[0] >= '0' && ct_str[0] <= '9') { + int oarg; + oarg = ct_str[0] - '0'; + assert(oarg < def->nb_oargs); + assert(def->args_ct[oarg].ct & TCG_CT_REG); + /* TCG_CT_ALIAS is for the output arguments. The input + argument is tagged with TCG_CT_IALIAS. */ + def->args_ct[i] = def->args_ct[oarg]; + def->args_ct[oarg].ct = TCG_CT_ALIAS; + def->args_ct[oarg].alias_index = i; + def->args_ct[i].ct |= TCG_CT_IALIAS; + def->args_ct[i].alias_index = oarg; + } else { + for(;;) { + if (*ct_str == '\0') + break; + switch(*ct_str) { + case 'i': + def->args_ct[i].ct |= TCG_CT_CONST; + ct_str++; + break; + default: + if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) { + fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n", + ct_str, i, def->name); +#ifndef VBOX + exit(1); +#else + tcg_exit(1); +#endif + } + } + } + } + } + + /* TCGTargetOpDef entry with too much information? */ + assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); + + /* sort the constraints (XXX: this is just an heuristic) */ + sort_constraints(def, 0, def->nb_oargs); + sort_constraints(def, def->nb_oargs, def->nb_iargs); + +#if 0 + { + int i; + + printf("%s: sorted=", def->name); + for(i = 0; i < def->nb_oargs + def->nb_iargs; i++) + printf(" %d", def->sorted_args[i]); + printf("\n"); + } +#endif + tdefs++; + } + +#if defined(CONFIG_DEBUG_TCG) + i = 0; + for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) { + if (op < INDEX_op_call || op == INDEX_op_debug_insn_start) { + /* Wrong entry in op definitions? */ + if (tcg_op_defs[op].used) { + fprintf(stderr, "Invalid op definition for %s\n", + tcg_op_defs[op].name); + i = 1; + } + } else { + /* Missing entry in op definitions? */ + if (!tcg_op_defs[op].used) { + fprintf(stderr, "Missing op definition for %s\n", + tcg_op_defs[op].name); + i = 1; + } + } + } + if (i == 1) { + tcg_abort(); + } +#endif +} + +#ifdef USE_LIVENESS_ANALYSIS + +/* set a nop for an operation using 'nb_args' */ +static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr, + TCGArg *args, int nb_args) +{ + if (nb_args == 0) { + *opc_ptr = INDEX_op_nop; + } else { + *opc_ptr = INDEX_op_nopn; + args[0] = nb_args; + args[nb_args - 1] = nb_args; + } +} + +/* liveness analysis: end of function: globals are live, temps are + dead. */ +/* XXX: at this stage, not used as there would be little gains because + most TBs end with a conditional jump. */ +static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps) +{ + memset(dead_temps, 0, s->nb_globals); + memset(dead_temps + s->nb_globals, 1, s->nb_temps - s->nb_globals); +} + +/* liveness analysis: end of basic block: globals are live, temps are + dead, local temps are live. */ +static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps) +{ + int i; + TCGTemp *ts; + + memset(dead_temps, 0, s->nb_globals); + ts = &s->temps[s->nb_globals]; + for(i = s->nb_globals; i < s->nb_temps; i++) { + if (ts->temp_local) + dead_temps[i] = 0; + else + dead_temps[i] = 1; + ts++; + } +} + +/* Liveness analysis : update the opc_dead_iargs array to tell if a + given input arguments is dead. Instructions updating dead + temporaries are removed. */ +static void tcg_liveness_analysis(TCGContext *s) +{ + int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; + TCGOpcode op; + TCGArg *args; + const TCGOpDef *def; + uint8_t *dead_temps; + unsigned int dead_iargs; + + gen_opc_ptr++; /* skip end */ + + nb_ops = gen_opc_ptr - gen_opc_buf; + + s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t)); + + dead_temps = tcg_malloc(s->nb_temps); + memset(dead_temps, 1, s->nb_temps); + + args = gen_opparam_ptr; + op_index = nb_ops - 1; + while (op_index >= 0) { + op = gen_opc_buf[op_index]; + def = &tcg_op_defs[op]; + switch(op) { + case INDEX_op_call: + { + int call_flags; + + nb_args = args[-1]; + args -= nb_args; + nb_iargs = args[0] & 0xffff; + nb_oargs = args[0] >> 16; + args++; + call_flags = args[nb_oargs + nb_iargs]; + + /* pure functions can be removed if their result is not + used */ + if (call_flags & TCG_CALL_PURE) { + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + if (!dead_temps[arg]) + goto do_not_remove_call; + } + tcg_set_nop(s, gen_opc_buf + op_index, + args - 1, nb_args); + } else { + do_not_remove_call: + + /* output args are dead */ + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + dead_temps[arg] = 1; + } + + if (!(call_flags & TCG_CALL_CONST)) { + /* globals are live (they may be used by the call) */ + memset(dead_temps, 0, s->nb_globals); + } + + /* input args are live */ + dead_iargs = 0; + for(i = 0; i < nb_iargs; i++) { + arg = args[i + nb_oargs]; + if (arg != TCG_CALL_DUMMY_ARG) { + if (dead_temps[arg]) { + dead_iargs |= (1 << i); + } + dead_temps[arg] = 0; + } + } + s->op_dead_iargs[op_index] = dead_iargs; + } + args--; + } + break; + case INDEX_op_set_label: + args--; + /* mark end of basic block */ + tcg_la_bb_end(s, dead_temps); + break; + case INDEX_op_debug_insn_start: + args -= def->nb_args; + break; + case INDEX_op_nopn: + nb_args = args[-1]; + args -= nb_args; + break; + case INDEX_op_discard: + args--; + /* mark the temporary as dead */ + dead_temps[args[0]] = 1; + break; + case INDEX_op_end: + break; + /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ + default: + args -= def->nb_args; + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + + /* Test if the operation can be removed because all + its outputs are dead. We assume that nb_oargs == 0 + implies side effects */ + if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + if (!dead_temps[arg]) + goto do_not_remove; + } + tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args); +#ifdef CONFIG_PROFILER + s->del_op_count++; +#endif + } else { + do_not_remove: + + /* output args are dead */ + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + dead_temps[arg] = 1; + } + + /* if end of basic block, update */ + if (def->flags & TCG_OPF_BB_END) { + tcg_la_bb_end(s, dead_temps); + } else if (def->flags & TCG_OPF_CALL_CLOBBER) { + /* globals are live */ + memset(dead_temps, 0, s->nb_globals); + } + + /* input args are live */ + dead_iargs = 0; + for(i = 0; i < nb_iargs; i++) { + arg = args[i + nb_oargs]; + if (dead_temps[arg]) { + dead_iargs |= (1 << i); + } + dead_temps[arg] = 0; + } + s->op_dead_iargs[op_index] = dead_iargs; + } + break; + } + op_index--; + } + + if (args != gen_opparam_buf) + tcg_abort(); +} +#else +/* dummy liveness analysis */ +static void tcg_liveness_analysis(TCGContext *s) +{ + int nb_ops; + nb_ops = gen_opc_ptr - gen_opc_buf; + + s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t)); + memset(s->op_dead_iargs, 0, nb_ops * sizeof(uint16_t)); +} +#endif + +#ifndef NDEBUG +static void dump_regs(TCGContext *s) +{ + TCGTemp *ts; + int i; + char buf[64]; + + for(i = 0; i < s->nb_temps; i++) { + ts = &s->temps[i]; + printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); + switch(ts->val_type) { + case TEMP_VAL_REG: + printf("%s", tcg_target_reg_names[ts->reg]); + break; + case TEMP_VAL_MEM: + printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]); + break; + case TEMP_VAL_CONST: + printf("$0x%" TCG_PRIlx, ts->val); + break; + case TEMP_VAL_DEAD: + printf("D"); + break; + default: + printf("???"); + break; + } + printf("\n"); + } + + for(i = 0; i < TCG_TARGET_NB_REGS; i++) { + if (s->reg_to_temp[i] >= 0) { + printf("%s: %s\n", + tcg_target_reg_names[i], + tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i])); + } + } +} + +static void check_regs(TCGContext *s) +{ + int reg, k; + TCGTemp *ts; + char buf[64]; + + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + k = s->reg_to_temp[reg]; + if (k >= 0) { + ts = &s->temps[k]; + if (ts->val_type != TEMP_VAL_REG || + ts->reg != reg) { + printf("Inconsistency for register %s:\n", + tcg_target_reg_names[reg]); + goto fail; + } + } + } + for(k = 0; k < s->nb_temps; k++) { + ts = &s->temps[k]; + if (ts->val_type == TEMP_VAL_REG && + !ts->fixed_reg && + s->reg_to_temp[ts->reg] != k) { + printf("Inconsistency for temp %s:\n", + tcg_get_arg_str_idx(s, buf, sizeof(buf), k)); + fail: + printf("reg state:\n"); + dump_regs(s); + tcg_abort(); + } + } +} +#endif + +static void temp_allocate_frame(TCGContext *s, int temp) +{ + TCGTemp *ts; + ts = &s->temps[temp]; + s->current_frame_offset = (s->current_frame_offset + sizeof(tcg_target_long) - 1) & ~(sizeof(tcg_target_long) - 1); +#ifndef VBOX + if (s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end) +#else + if ((tcg_target_long)s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end) +#endif + tcg_abort(); + ts->mem_offset = s->current_frame_offset; + ts->mem_reg = s->frame_reg; + ts->mem_allocated = 1; + s->current_frame_offset += sizeof(tcg_target_long); +} + +/* free register 'reg' by spilling the corresponding temporary if necessary */ +static void tcg_reg_free(TCGContext *s, int reg) +{ + TCGTemp *ts; + int temp; + + temp = s->reg_to_temp[reg]; + if (temp != -1) { + ts = &s->temps[temp]; + assert(ts->val_type == TEMP_VAL_REG); + if (!ts->mem_coherent) { + if (!ts->mem_allocated) + temp_allocate_frame(s, temp); + tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + } + ts->val_type = TEMP_VAL_MEM; + s->reg_to_temp[reg] = -1; + } +} + +/* Allocate a register belonging to reg1 & ~reg2 */ +static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2) +{ + int i, reg; + TCGRegSet reg_ct; + + tcg_regset_andnot(reg_ct, reg1, reg2); + + /* first try free registers */ + for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { + reg = tcg_target_reg_alloc_order[i]; + if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1) + return reg; + } + + /* XXX: do better spill choice */ + for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { + reg = tcg_target_reg_alloc_order[i]; + if (tcg_regset_test_reg(reg_ct, reg)) { + tcg_reg_free(s, reg); + return reg; + } + } + + tcg_abort(); +} + +/* save a temporary to memory. 'allocated_regs' is used in case a + temporary registers needs to be allocated to store a constant. */ +static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs) +{ + TCGTemp *ts; + int reg; + + ts = &s->temps[temp]; + if (!ts->fixed_reg) { + switch(ts->val_type) { + case TEMP_VAL_REG: + tcg_reg_free(s, ts->reg); + break; + case TEMP_VAL_DEAD: + ts->val_type = TEMP_VAL_MEM; + break; + case TEMP_VAL_CONST: + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + allocated_regs); + if (!ts->mem_allocated) + temp_allocate_frame(s, temp); + tcg_out_movi(s, ts->type, reg, ts->val); + tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + ts->val_type = TEMP_VAL_MEM; + break; + case TEMP_VAL_MEM: + break; + default: + tcg_abort(); + } + } +} + +/* save globals to their cannonical location and assume they can be + modified be the following code. 'allocated_regs' is used in case a + temporary registers needs to be allocated to store a constant. */ +static void save_globals(TCGContext *s, TCGRegSet allocated_regs) +{ + int i; + + for(i = 0; i < s->nb_globals; i++) { + temp_save(s, i, allocated_regs); + } +} + +/* at the end of a basic block, we assume all temporaries are dead and + all globals are stored at their canonical location. */ +static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) +{ + TCGTemp *ts; + int i; + + for(i = s->nb_globals; i < s->nb_temps; i++) { + ts = &s->temps[i]; + if (ts->temp_local) { + temp_save(s, i, allocated_regs); + } else { + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = -1; + } + ts->val_type = TEMP_VAL_DEAD; + } + } + + save_globals(s, allocated_regs); +} + +#define IS_DEAD_IARG(n) ((dead_iargs >> (n)) & 1) + +static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args) +{ + TCGTemp *ots; + tcg_target_ulong val; + + ots = &s->temps[args[0]]; + val = args[1]; + + if (ots->fixed_reg) { + /* for fixed registers, we do not do any constant + propagation */ + tcg_out_movi(s, ots->type, ots->reg, val); + } else { + /* The movi is not explicitly generated here */ + if (ots->val_type == TEMP_VAL_REG) + s->reg_to_temp[ots->reg] = -1; + ots->val_type = TEMP_VAL_CONST; + ots->val = val; + } +} + +static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, + const TCGArg *args, + unsigned int dead_iargs) +{ + TCGTemp *ts, *ots; + int reg; + const TCGArgConstraint *arg_ct; + + ots = &s->temps[args[0]]; + ts = &s->temps[args[1]]; + arg_ct = &def->args_ct[0]; + + /* XXX: always mark arg dead if IS_DEAD_IARG(0) */ + if (ts->val_type == TEMP_VAL_REG) { + if (IS_DEAD_IARG(0) && !ts->fixed_reg && !ots->fixed_reg) { + /* the mov can be suppressed */ + if (ots->val_type == TEMP_VAL_REG) + s->reg_to_temp[ots->reg] = -1; + reg = ts->reg; + s->reg_to_temp[reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } else { + if (ots->val_type == TEMP_VAL_REG) { + reg = ots->reg; + } else { + reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs); + } + if (ts->reg != reg) { + tcg_out_mov(s, ots->type, reg, ts->reg); + } + } + } else if (ts->val_type == TEMP_VAL_MEM) { + if (ots->val_type == TEMP_VAL_REG) { + reg = ots->reg; + } else { + reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs); + } + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + } else if (ts->val_type == TEMP_VAL_CONST) { + if (ots->fixed_reg) { + reg = ots->reg; + tcg_out_movi(s, ots->type, reg, ts->val); + } else { + /* propagate constant */ + if (ots->val_type == TEMP_VAL_REG) + s->reg_to_temp[ots->reg] = -1; + ots->val_type = TEMP_VAL_CONST; + ots->val = ts->val; + return; + } + } else { + tcg_abort(); + } + s->reg_to_temp[reg] = args[0]; + ots->reg = reg; + ots->val_type = TEMP_VAL_REG; + ots->mem_coherent = 0; +} + +static void tcg_reg_alloc_op(TCGContext *s, + const TCGOpDef *def, TCGOpcode opc, + const TCGArg *args, + unsigned int dead_iargs) +{ + TCGRegSet allocated_regs; + int i, k, nb_iargs, nb_oargs, reg; + TCGArg arg; + const TCGArgConstraint *arg_ct; + TCGTemp *ts; + TCGArg new_args[TCG_MAX_OP_ARGS]; + int const_args[TCG_MAX_OP_ARGS]; + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + + /* copy constants */ + memcpy(new_args + nb_oargs + nb_iargs, + args + nb_oargs + nb_iargs, + sizeof(TCGArg) * def->nb_cargs); + + /* satisfy input constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(k = 0; k < nb_iargs; k++) { + i = def->sorted_args[nb_oargs + k]; + arg = args[i]; + arg_ct = &def->args_ct[i]; + ts = &s->temps[arg]; + if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 1; + s->reg_to_temp[reg] = arg; + } else if (ts->val_type == TEMP_VAL_CONST) { + if (tcg_target_const_match(ts->val, arg_ct)) { + /* constant is OK for instruction */ + const_args[i] = 1; + new_args[i] = ts->val; + goto iarg_end; + } else { + /* need to move to a register */ + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_movi(s, ts->type, reg, ts->val); + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } + } + assert(ts->val_type == TEMP_VAL_REG); + if (arg_ct->ct & TCG_CT_IALIAS) { + if (ts->fixed_reg) { + /* if fixed register, we must allocate a new register + if the alias is not the same register */ + if (arg != args[arg_ct->alias_index]) + goto allocate_in_reg; + } else { + /* if the input is aliased to an output and if it is + not dead after the instruction, we must allocate + a new register and move it */ + if (!IS_DEAD_IARG(i - nb_oargs)) + goto allocate_in_reg; + } + } + reg = ts->reg; + if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { + /* nothing to do : the constraint is satisfied */ + } else { + allocate_in_reg: + /* allocate a new register matching the constraint + and move the temporary register into it */ + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_mov(s, ts->type, reg, ts->reg); + } + new_args[i] = reg; + const_args[i] = 0; + tcg_regset_set_reg(allocated_regs, reg); + iarg_end: ; + } + + if (def->flags & TCG_OPF_BB_END) { + tcg_reg_alloc_bb_end(s, allocated_regs); + } else { + /* mark dead temporaries and free the associated registers */ + for(i = 0; i < nb_iargs; i++) { + arg = args[nb_oargs + i]; + if (IS_DEAD_IARG(i)) { + ts = &s->temps[arg]; + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + } + + if (def->flags & TCG_OPF_CALL_CLOBBER) { + /* XXX: permit generic clobber register list ? */ + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { + tcg_reg_free(s, reg); + } + } + /* XXX: for load/store we could do that only for the slow path + (i.e. when a memory callback is called) */ + + /* store globals and free associated registers (we assume the insn + can modify any global. */ + save_globals(s, allocated_regs); + } + + /* satisfy the output constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(k = 0; k < nb_oargs; k++) { + i = def->sorted_args[k]; + arg = args[i]; + arg_ct = &def->args_ct[i]; + ts = &s->temps[arg]; + if (arg_ct->ct & TCG_CT_ALIAS) { + reg = new_args[arg_ct->alias_index]; + } else { + /* if fixed register, we try to use it */ + reg = ts->reg; + if (ts->fixed_reg && + tcg_regset_test_reg(arg_ct->u.regs, reg)) { + goto oarg_end; + } + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + } + tcg_regset_set_reg(allocated_regs, reg); + /* if a fixed register is used, then a move will be done afterwards */ + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + /* temp value is modified, so the value kept in memory is + potentially not the same */ + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } + oarg_end: + new_args[i] = reg; + } + } + + /* emit instruction */ + tcg_out_op(s, opc, new_args, const_args); + + /* move the outputs in the correct register if needed */ + for(i = 0; i < nb_oargs; i++) { + ts = &s->temps[args[i]]; + reg = new_args[i]; + if (ts->fixed_reg && ts->reg != reg) { + tcg_out_mov(s, ts->type, ts->reg, reg); + } + } +} + +#ifdef TCG_TARGET_STACK_GROWSUP +#define STACK_DIR(x) (-(x)) +#else +#define STACK_DIR(x) (x) +#endif + +static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, + TCGOpcode opc, const TCGArg *args, + unsigned int dead_iargs) +{ + int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params; + TCGArg arg, func_arg; + TCGTemp *ts; + tcg_target_long stack_offset, call_stack_size, func_addr; + int const_func_arg, allocate_args; + TCGRegSet allocated_regs; + const TCGArgConstraint *arg_ct; + + arg = *args++; + + nb_oargs = arg >> 16; + nb_iargs = arg & 0xffff; + nb_params = nb_iargs - 1; + + flags = args[nb_oargs + nb_iargs]; + + nb_regs = tcg_target_get_call_iarg_regs_count(flags); + if (nb_regs > nb_params) + nb_regs = nb_params; + + /* assign stack slots first */ + /* XXX: preallocate call stack */ + call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long); + call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); + if (allocate_args) { + tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size)); + } + + stack_offset = TCG_TARGET_CALL_STACK_OFFSET; + for(i = nb_regs; i < nb_params; i++) { + arg = args[nb_oargs + i]; +#ifdef TCG_TARGET_STACK_GROWSUP + stack_offset -= sizeof(tcg_target_long); +#endif + if (arg != TCG_CALL_DUMMY_ARG) { + ts = &s->temps[arg]; + if (ts->val_type == TEMP_VAL_REG) { + tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); + } else if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + s->reserved_regs); + /* XXX: not correct if reading values from the stack */ + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); + } else if (ts->val_type == TEMP_VAL_CONST) { + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + s->reserved_regs); + /* XXX: sign extend may be needed on some targets */ + tcg_out_movi(s, ts->type, reg, ts->val); + tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); + } else { + tcg_abort(); + } + } +#ifndef TCG_TARGET_STACK_GROWSUP + stack_offset += sizeof(tcg_target_long); +#endif + } + + /* assign input registers */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(i = 0; i < nb_regs; i++) { + arg = args[nb_oargs + i]; + if (arg != TCG_CALL_DUMMY_ARG) { + ts = &s->temps[arg]; + reg = tcg_target_call_iarg_regs[i]; + tcg_reg_free(s, reg); + if (ts->val_type == TEMP_VAL_REG) { + if (ts->reg != reg) { + tcg_out_mov(s, ts->type, reg, ts->reg); + } + } else if (ts->val_type == TEMP_VAL_MEM) { + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + } else if (ts->val_type == TEMP_VAL_CONST) { + /* XXX: sign extend ? */ + tcg_out_movi(s, ts->type, reg, ts->val); + } else { + tcg_abort(); + } + tcg_regset_set_reg(allocated_regs, reg); + } + } + + /* assign function address */ + func_arg = args[nb_oargs + nb_iargs - 1]; + arg_ct = &def->args_ct[0]; + ts = &s->temps[func_arg]; + func_addr = ts->val; + const_func_arg = 0; + if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + func_arg = reg; + tcg_regset_set_reg(allocated_regs, reg); + } else if (ts->val_type == TEMP_VAL_REG) { + reg = ts->reg; + if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_mov(s, ts->type, reg, ts->reg); + } + func_arg = reg; + tcg_regset_set_reg(allocated_regs, reg); + } else if (ts->val_type == TEMP_VAL_CONST) { + if (tcg_target_const_match(func_addr, arg_ct)) { + const_func_arg = 1; + func_arg = func_addr; + } else { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_movi(s, ts->type, reg, func_addr); + func_arg = reg; + tcg_regset_set_reg(allocated_regs, reg); + } + } else { + tcg_abort(); + } + + + /* mark dead temporaries and free the associated registers */ + for(i = 0; i < nb_iargs; i++) { + arg = args[nb_oargs + i]; + if (IS_DEAD_IARG(i)) { + ts = &s->temps[arg]; + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + } + + /* clobber call registers */ + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { + tcg_reg_free(s, reg); + } + } + + /* store globals and free associated registers (we assume the call + can modify any global. */ + if (!(flags & TCG_CALL_CONST)) { + save_globals(s, allocated_regs); + } + + tcg_out_op(s, opc, &func_arg, &const_func_arg); + + if (allocate_args) { + tcg_out_addi(s, TCG_REG_CALL_STACK, STACK_DIR(call_stack_size)); + } + + /* assign output registers and emit moves if needed */ + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + ts = &s->temps[arg]; + reg = tcg_target_call_oarg_regs[i]; + assert(s->reg_to_temp[reg] == -1); + if (ts->fixed_reg) { + if (ts->reg != reg) { + tcg_out_mov(s, ts->type, ts->reg, reg); + } + } else { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } + } + + return nb_iargs + nb_oargs + def->nb_cargs + 1; +} + +#ifdef CONFIG_PROFILER + +static int64_t tcg_table_op_count[NB_OPS]; + +static void dump_op_count(void) +{ + int i; + FILE *f; + f = fopen("/tmp/op.log", "w"); + for(i = INDEX_op_end; i < NB_OPS; i++) { + fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, tcg_table_op_count[i]); + } + fclose(f); +} +#endif + + +static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, + intptr_t search_pc) +{ + TCGOpcode opc; + int op_index; + const TCGOpDef *def; + unsigned int dead_iargs; + const TCGArg *args; + +#ifdef DEBUG_DISAS + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) { + qemu_log("OP:\n"); + tcg_dump_ops(s, logfile); + qemu_log("\n"); + } +#endif + +#ifdef CONFIG_PROFILER + s->la_time -= profile_getclock(); +#endif + tcg_liveness_analysis(s); +#ifdef CONFIG_PROFILER + s->la_time += profile_getclock(); +#endif + +#ifdef DEBUG_DISAS +# ifdef USE_LIVENESS_ANALYSIS /* vbox */ + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) { + qemu_log("OP after liveness analysis:\n"); + tcg_dump_ops(s, logfile); + qemu_log("\n"); + } +# endif /* USE_LIVENESS_ANALYSIS - vbox */ +#endif + + tcg_reg_alloc_start(s); + + s->code_buf = gen_code_buf; + s->code_ptr = gen_code_buf; + + args = gen_opparam_buf; + op_index = 0; + + for(;;) { + opc = gen_opc_buf[op_index]; +#ifdef CONFIG_PROFILER + tcg_table_op_count[opc]++; +#endif + def = &tcg_op_defs[opc]; +#if 0 + printf("%s: %d %d %d\n", def->name, + def->nb_oargs, def->nb_iargs, def->nb_cargs); + // dump_regs(s); +#endif + switch(opc) { + case INDEX_op_mov_i32: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_mov_i64: +#endif + dead_iargs = s->op_dead_iargs[op_index]; + tcg_reg_alloc_mov(s, def, args, dead_iargs); + break; + case INDEX_op_movi_i32: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_movi_i64: +#endif + tcg_reg_alloc_movi(s, args); + break; + case INDEX_op_debug_insn_start: + /* debug instruction */ + break; + case INDEX_op_nop: + case INDEX_op_nop1: + case INDEX_op_nop2: + case INDEX_op_nop3: + break; + case INDEX_op_nopn: + args += args[0]; + goto next; + case INDEX_op_discard: + { + TCGTemp *ts; + ts = &s->temps[args[0]]; + /* mark the temporary as dead */ + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + break; + case INDEX_op_set_label: + tcg_reg_alloc_bb_end(s, s->reserved_regs); + tcg_out_label(s, args[0], (intptr_t)s->code_ptr); + break; + case INDEX_op_call: + dead_iargs = s->op_dead_iargs[op_index]; + args += tcg_reg_alloc_call(s, def, opc, args, dead_iargs); + goto next; + case INDEX_op_end: + goto the_end; + default: + /* Note: in order to speed up the code, it would be much + faster to have specialized register allocator functions for + some common argument patterns */ + dead_iargs = s->op_dead_iargs[op_index]; + tcg_reg_alloc_op(s, def, opc, args, dead_iargs); + break; + } + args += def->nb_args; + next: + if (search_pc >= 0 && search_pc < s->code_ptr - gen_code_buf) { + return op_index; + } + op_index++; +#ifndef NDEBUG + check_regs(s); +#endif + } + the_end: + return -1; +} + +int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf) +{ +#ifdef CONFIG_PROFILER + { + int n; + n = (gen_opc_ptr - gen_opc_buf); + s->op_count += n; + if (n > s->op_count_max) + s->op_count_max = n; + + s->temp_count += s->nb_temps; + if (s->nb_temps > s->temp_count_max) + s->temp_count_max = s->nb_temps; + } +#endif + + tcg_gen_code_common(s, gen_code_buf, -1); + + /* flush instruction cache */ + flush_icache_range((uintptr_t)gen_code_buf, + (uintptr_t)s->code_ptr); + return s->code_ptr - gen_code_buf; +} + +/* Return the index of the micro operation such as the pc after is < + offset bytes from the start of the TB. The contents of gen_code_buf must + not be changed, though writing the same values is ok. + Return -1 if not found. */ +int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, intptr_t offset) +{ + return tcg_gen_code_common(s, gen_code_buf, offset); +} + +#ifdef CONFIG_PROFILER +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +{ + TCGContext *s = &tcg_ctx; + int64_t tot; + + tot = s->interm_time + s->code_time; + cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", + tot, tot / 2.4e9); + cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", + s->tb_count, + s->tb_count1 - s->tb_count, + s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0); + cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", + s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max); + cpu_fprintf(f, "deleted ops/TB %0.2f\n", + s->tb_count ? + (double)s->del_op_count / s->tb_count : 0); + cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", + s->tb_count ? + (double)s->temp_count / s->tb_count : 0, + s->temp_count_max); + + cpu_fprintf(f, "cycles/op %0.1f\n", + s->op_count ? (double)tot / s->op_count : 0); + cpu_fprintf(f, "cycles/in byte %0.1f\n", + s->code_in_len ? (double)tot / s->code_in_len : 0); + cpu_fprintf(f, "cycles/out byte %0.1f\n", + s->code_out_len ? (double)tot / s->code_out_len : 0); + if (tot == 0) + tot = 1; + cpu_fprintf(f, " gen_interm time %0.1f%%\n", + (double)s->interm_time / tot * 100.0); + cpu_fprintf(f, " gen_code time %0.1f%%\n", + (double)s->code_time / tot * 100.0); + cpu_fprintf(f, "liveness/code time %0.1f%%\n", + (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); + cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", + s->restore_count); + cpu_fprintf(f, " avg cycles %0.1f\n", + s->restore_count ? (double)s->restore_time / s->restore_count : 0); + + dump_op_count(); +} +#else +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +{ + cpu_fprintf(f, "[TCG profiler not compiled]\n"); +} +#endif diff --git a/src/recompiler/tcg/tcg.h b/src/recompiler/tcg/tcg.h new file mode 100644 index 00000000..819fa6c7 --- /dev/null +++ b/src/recompiler/tcg/tcg.h @@ -0,0 +1,512 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "tcg-target.h" +#include "tcg-runtime.h" + +#if TCG_TARGET_REG_BITS == 32 +typedef int32_t tcg_target_long; +typedef uint32_t tcg_target_ulong; +#define TCG_PRIlx PRIx32 +#define TCG_PRIld PRId32 +#elif TCG_TARGET_REG_BITS == 64 +typedef int64_t tcg_target_long; +typedef uint64_t tcg_target_ulong; +#define TCG_PRIlx PRIx64 +#define TCG_PRIld PRId64 +#else +#error unsupported +#endif + +#if TCG_TARGET_NB_REGS <= 32 +typedef uint32_t TCGRegSet; +#elif TCG_TARGET_NB_REGS <= 64 +typedef uint64_t TCGRegSet; +#else +#error unsupported +#endif + +typedef enum TCGOpcode { +#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name, +#include "tcg-opc.h" +#undef DEF + NB_OPS, +} TCGOpcode; + +#define tcg_regset_clear(d) (d) = 0 +#define tcg_regset_set(d, s) (d) = (s) +#define tcg_regset_set32(d, reg, val32) (d) |= (val32) << (reg) +#define tcg_regset_set_reg(d, r) (d) |= 1L << (r) +#define tcg_regset_reset_reg(d, r) (d) &= ~(1L << (r)) +#define tcg_regset_test_reg(d, r) (((d) >> (r)) & 1) +#define tcg_regset_or(d, a, b) (d) = (a) | (b) +#define tcg_regset_and(d, a, b) (d) = (a) & (b) +#define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b) +#define tcg_regset_not(d, a) (d) = ~(a) + +typedef struct TCGRelocation { + struct TCGRelocation *next; + int type; + uint8_t *ptr; + tcg_target_long addend; +} TCGRelocation; + +typedef struct TCGLabel { + int has_value; + union { + tcg_target_ulong value; + TCGRelocation *first_reloc; + } u; +} TCGLabel; + +typedef struct TCGPool { + struct TCGPool *next; + int size; + uint8_t data[0] __attribute__ ((aligned)); +} TCGPool; + +#define TCG_POOL_CHUNK_SIZE 32768 + +#define TCG_MAX_LABELS 512 + +#define TCG_MAX_TEMPS 512 + +/* when the size of the arguments of a called function is smaller than + this value, they are statically allocated in the TB stack frame */ +#define TCG_STATIC_CALL_ARGS_SIZE 128 + +typedef enum TCGType { + TCG_TYPE_I32, + TCG_TYPE_I64, + TCG_TYPE_COUNT, /* number of different types */ + + /* An alias for the size of the host register. */ +#if TCG_TARGET_REG_BITS == 32 + TCG_TYPE_REG = TCG_TYPE_I32, +#else + TCG_TYPE_REG = TCG_TYPE_I64, +#endif + + /* An alias for the size of the native pointer. We don't currently + support any hosts with 64-bit registers and 32-bit pointers. */ + TCG_TYPE_PTR = TCG_TYPE_REG, + + /* An alias for the size of the target "long", aka register. */ +#if TARGET_LONG_BITS == 64 + TCG_TYPE_TL = TCG_TYPE_I64, +#else + TCG_TYPE_TL = TCG_TYPE_I32, +#endif +} TCGType; + +typedef tcg_target_ulong TCGArg; + +/* Define a type and accessor macros for varables. Using a struct is + nice because it gives some level of type safely. Ideally the compiler + be able to see through all this. However in practice this is not true, + expecially on targets with braindamaged ABIs (e.g. i386). + We use plain int by default to avoid this runtime overhead. + Users of tcg_gen_* don't need to know about any of this, and should + treat TCGv as an opaque type. + In additon we do typechecking for different types of variables. TCGv_i32 + and TCGv_i64 are 32/64-bit variables respectively. TCGv and TCGv_ptr + are aliases for target_ulong and host pointer sized values respectively. + */ + +#ifdef CONFIG_DEBUG_TCG +#define DEBUG_TCGV 1 +#endif + +#ifdef DEBUG_TCGV + +typedef struct +{ + int i32; +} TCGv_i32; + +typedef struct +{ + int i64; +} TCGv_i64; + +#define MAKE_TCGV_I32(i) __extension__ \ + ({ TCGv_i32 make_tcgv_tmp = {i}; make_tcgv_tmp;}) +#define MAKE_TCGV_I64(i) __extension__ \ + ({ TCGv_i64 make_tcgv_tmp = {i}; make_tcgv_tmp;}) +#define GET_TCGV_I32(t) ((t).i32) +#define GET_TCGV_I64(t) ((t).i64) +#if TCG_TARGET_REG_BITS == 32 +#define TCGV_LOW(t) MAKE_TCGV_I32(GET_TCGV_I64(t)) +#define TCGV_HIGH(t) MAKE_TCGV_I32(GET_TCGV_I64(t) + 1) +#endif + +#else /* !DEBUG_TCGV */ + +typedef int TCGv_i32; +typedef int TCGv_i64; +#define MAKE_TCGV_I32(x) (x) +#define MAKE_TCGV_I64(x) (x) +#define GET_TCGV_I32(t) (t) +#define GET_TCGV_I64(t) (t) + +#if TCG_TARGET_REG_BITS == 32 +#define TCGV_LOW(t) (t) +#define TCGV_HIGH(t) ((t) + 1) +#endif + +#endif /* DEBUG_TCGV */ + +#define TCGV_EQUAL_I32(a, b) (GET_TCGV_I32(a) == GET_TCGV_I32(b)) +#define TCGV_EQUAL_I64(a, b) (GET_TCGV_I64(a) == GET_TCGV_I64(b)) + +/* Dummy definition to avoid compiler warnings. */ +#define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1) +#define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1) + +/* call flags */ +#define TCG_CALL_TYPE_MASK 0x000f +#define TCG_CALL_TYPE_STD 0x0000 /* standard C call */ +#define TCG_CALL_TYPE_REGPARM_1 0x0001 /* i386 style regparm call (1 reg) */ +#define TCG_CALL_TYPE_REGPARM_2 0x0002 /* i386 style regparm call (2 regs) */ +#define TCG_CALL_TYPE_REGPARM 0x0003 /* i386 style regparm call (3 regs) */ +/* A pure function only reads its arguments and TCG global variables + and cannot raise exceptions. Hence a call to a pure function can be + safely suppressed if the return value is not used. */ +#define TCG_CALL_PURE 0x0010 +/* A const function only reads its arguments and does not use TCG + global variables. Hence a call to such a function does not + save TCG global variables back to their canonical location. */ +#define TCG_CALL_CONST 0x0020 + +/* used to align parameters */ +#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1) +#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1)) + +typedef enum { + TCG_COND_EQ, + TCG_COND_NE, + TCG_COND_LT, + TCG_COND_GE, + TCG_COND_LE, + TCG_COND_GT, + /* unsigned */ + TCG_COND_LTU, + TCG_COND_GEU, + TCG_COND_LEU, + TCG_COND_GTU, +} TCGCond; + +/* Invert the sense of the comparison. */ +static inline TCGCond tcg_invert_cond(TCGCond c) +{ + return (TCGCond)(c ^ 1); +} + +/* Swap the operands in a comparison. */ +static inline TCGCond tcg_swap_cond(TCGCond c) +{ + int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15); + return (TCGCond)(c ^ mask); +} + +static inline TCGCond tcg_unsigned_cond(TCGCond c) +{ + return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c); +} + +#define TEMP_VAL_DEAD 0 +#define TEMP_VAL_REG 1 +#define TEMP_VAL_MEM 2 +#define TEMP_VAL_CONST 3 + +/* XXX: optimize memory layout */ +typedef struct TCGTemp { + TCGType base_type; + TCGType type; + int val_type; + int reg; + tcg_target_long val; + int mem_reg; + tcg_target_long mem_offset; + unsigned int fixed_reg:1; + unsigned int mem_coherent:1; + unsigned int mem_allocated:1; + unsigned int temp_local:1; /* If true, the temp is saved accross + basic blocks. Otherwise, it is not + preserved accross basic blocks. */ + unsigned int temp_allocated:1; /* never used for code gen */ + /* index of next free temp of same base type, -1 if end */ + int next_free_temp; + const char *name; +} TCGTemp; + +typedef struct TCGHelperInfo { + tcg_target_ulong func; + const char *name; +} TCGHelperInfo; + +typedef struct TCGContext TCGContext; + +struct TCGContext { + uint8_t *pool_cur, *pool_end; + TCGPool *pool_first, *pool_current; + TCGLabel *labels; + int nb_labels; + TCGTemp *temps; /* globals first, temps after */ + int nb_globals; + int nb_temps; + /* index of free temps, -1 if none */ + int first_free_temp[TCG_TYPE_COUNT * 2]; + + /* goto_tb support */ + uint8_t *code_buf; + uintptr_t *tb_next; + uint16_t *tb_next_offset; + uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */ + + /* liveness analysis */ + uint16_t *op_dead_iargs; /* for each operation, each bit tells if the + corresponding input argument is dead */ + + /* tells in which temporary a given register is. It does not take + into account fixed registers */ + int reg_to_temp[TCG_TARGET_NB_REGS]; + TCGRegSet reserved_regs; + tcg_target_long current_frame_offset; + tcg_target_long frame_start; + tcg_target_long frame_end; + int frame_reg; + + uint8_t *code_ptr; + TCGTemp static_temps[TCG_MAX_TEMPS]; + + TCGHelperInfo *helpers; + int nb_helpers; + int allocated_helpers; + int helpers_sorted; + +#ifdef CONFIG_PROFILER + /* profiling info */ + int64_t tb_count1; + int64_t tb_count; + int64_t op_count; /* total insn count */ + int op_count_max; /* max insn per TB */ + int64_t temp_count; + int temp_count_max; + int64_t del_op_count; + int64_t code_in_len; + int64_t code_out_len; + int64_t interm_time; + int64_t code_time; + int64_t la_time; + int64_t restore_count; + int64_t restore_time; +#endif +}; + +extern TCGContext tcg_ctx; +extern uint16_t *gen_opc_ptr; +extern TCGArg *gen_opparam_ptr; +extern uint16_t gen_opc_buf[]; +extern TCGArg gen_opparam_buf[]; + +/* pool based memory allocation */ + +void *tcg_malloc_internal(TCGContext *s, int size); +void tcg_pool_reset(TCGContext *s); +void tcg_pool_delete(TCGContext *s); + +static inline void *tcg_malloc(int size) +{ + TCGContext *s = &tcg_ctx; + uint8_t *ptr, *ptr_end; + size = (size + sizeof(void *) - 1) & ~(sizeof(void *) - 1); + ptr = s->pool_cur; + ptr_end = ptr + size; + if (unlikely(ptr_end > s->pool_end)) { + return tcg_malloc_internal(&tcg_ctx, size); + } else { + s->pool_cur = ptr_end; + return ptr; + } +} + +void tcg_context_init(TCGContext *s); +void tcg_prologue_init(TCGContext *s); +void tcg_func_start(TCGContext *s); + +int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf); +int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, intptr_t offset); + +void tcg_set_frame(TCGContext *s, int reg, + tcg_target_long start, tcg_target_long size); + +TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name); +TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset, + const char *name); +TCGv_i32 tcg_temp_new_internal_i32(int temp_local); +static inline TCGv_i32 tcg_temp_new_i32(void) +{ + return tcg_temp_new_internal_i32(0); +} +static inline TCGv_i32 tcg_temp_local_new_i32(void) +{ + return tcg_temp_new_internal_i32(1); +} +void tcg_temp_free_i32(TCGv_i32 arg); +char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg); + +TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name); +TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset, + const char *name); +TCGv_i64 tcg_temp_new_internal_i64(int temp_local); +static inline TCGv_i64 tcg_temp_new_i64(void) +{ + return tcg_temp_new_internal_i64(0); +} +static inline TCGv_i64 tcg_temp_local_new_i64(void) +{ + return tcg_temp_new_internal_i64(1); +} +void tcg_temp_free_i64(TCGv_i64 arg); +char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg); + +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); + +#define TCG_CT_ALIAS 0x80 +#define TCG_CT_IALIAS 0x40 +#define TCG_CT_REG 0x01 +#define TCG_CT_CONST 0x02 /* any constant of register size */ + +typedef struct TCGArgConstraint { + uint16_t ct; + uint8_t alias_index; + union { + TCGRegSet regs; + } u; +} TCGArgConstraint; + +#define TCG_MAX_OP_ARGS 16 + +#define TCG_OPF_BB_END 0x01 /* instruction defines the end of a basic + block */ +#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers + and potentially update globals. */ +#define TCG_OPF_SIDE_EFFECTS 0x04 /* instruction has side effects : it + cannot be removed if its output + are not used */ + +typedef struct TCGOpDef { + const char *name; + uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; + uint8_t flags; + TCGArgConstraint *args_ct; + int *sorted_args; +#if defined(CONFIG_DEBUG_TCG) + int used; +#endif +} TCGOpDef; + +typedef struct TCGTargetOpDef { + TCGOpcode op; + const char *args_ct_str[TCG_MAX_OP_ARGS]; +} TCGTargetOpDef; + +#ifndef VBOX +#define tcg_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\ + abort();\ +} while (0) +#else /* VBOX */ +# define tcg_abort() \ + do {\ + remAbort(-1, "TCG fatal error: "__FILE__":" RT_XSTR(__LINE__)); \ + } while (0) +extern void qemu_qsort(void* base, size_t nmemb, size_t size, + int(*compar)(const void*, const void*)); +#define tcg_exit(status) \ + do {\ + remAbort(-1, "TCG exit: "__FILE__":" RT_XSTR(__LINE__));\ + } while (0) +#endif /* VBOX */ + +void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); + +#if TCG_TARGET_REG_BITS == 32 +#define tcg_const_ptr tcg_const_i32 +#define tcg_add_ptr tcg_add_i32 +#define tcg_sub_ptr tcg_sub_i32 +#define TCGv_ptr TCGv_i32 +#define GET_TCGV_PTR GET_TCGV_I32 +#define tcg_global_reg_new_ptr tcg_global_reg_new_i32 +#define tcg_global_mem_new_ptr tcg_global_mem_new_i32 +#define tcg_temp_new_ptr tcg_temp_new_i32 +#define tcg_temp_free_ptr tcg_temp_free_i32 +#else +#define tcg_const_ptr tcg_const_i64 +#define tcg_add_ptr tcg_add_i64 +#define tcg_sub_ptr tcg_sub_i64 +#define TCGv_ptr TCGv_i64 +#define GET_TCGV_PTR GET_TCGV_I64 +#define tcg_global_reg_new_ptr tcg_global_reg_new_i64 +#define tcg_global_mem_new_ptr tcg_global_mem_new_i64 +#define tcg_temp_new_ptr tcg_temp_new_i64 +#define tcg_temp_free_ptr tcg_temp_free_i64 +#endif + +void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, + int sizemask, TCGArg ret, int nargs, TCGArg *args); + +void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, + int c, int right, int arith); + +/* only used for debugging purposes */ +void tcg_register_helper(void *func, const char *name); +const char *tcg_helper_get_name(TCGContext *s, void *func); +void tcg_dump_ops(TCGContext *s, FILE *outfile); + +void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf); +TCGv_i32 tcg_const_i32(int32_t val); +TCGv_i64 tcg_const_i64(int64_t val); +TCGv_i32 tcg_const_local_i32(int32_t val); +TCGv_i64 tcg_const_local_i64(int64_t val); + +#ifndef VBOX +extern uint8_t code_gen_prologue[]; +#else +extern uint8_t *code_gen_prologue; +#endif +#if defined(_ARCH_PPC) && !defined(_ARCH_PPC64) +#define tcg_qemu_tb_exec(tb_ptr) \ + ((intptr_t REGPARM __attribute__ ((longcall)) (*)(void *))code_gen_prologue)(tb_ptr) +#else +# if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) && !defined(__MINGW64__) +# define tcg_qemu_tb_exec(tb_ptr, ret) \ + __asm__ __volatile__("call *%%ecx" : "=a"(ret) : "a"(tb_ptr), "c" (&code_gen_prologue[0]) : "memory", "%edx", "cc") +# else +#define tcg_qemu_tb_exec(tb_ptr) ((intptr_t REGPARM (*)(void *))code_gen_prologue)(tb_ptr) +# endif +#endif diff --git a/src/recompiler/tests/Makefile b/src/recompiler/tests/Makefile new file mode 100644 index 00000000..ff7f787a --- /dev/null +++ b/src/recompiler/tests/Makefile @@ -0,0 +1,109 @@ +-include ../config-host.mak + +$(call set-vpath, $(SRC_PATH)/tests) + +CFLAGS=-Wall -O2 -g -fno-strict-aliasing +#CFLAGS+=-msse2 +LDFLAGS= + +ifeq ($(ARCH),i386) +TESTS=linux-test testthread sha1-i386 test-i386 +endif +ifeq ($(ARCH),x86_64) +TESTS=test-x86_64 +endif +TESTS+=sha1# test_path +#TESTS+=test_path +#TESTS+=runcom + +QEMU=../i386-linux-user/qemu-i386 + +all: $(TESTS) + +hello-i386: hello-i386.c + $(CC) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $< + strip $@ + +testthread: testthread.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lpthread + +test_path: test_path.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + ./$@ || { rm $@; exit 1; } + +# i386/x86_64 emulation test (test various opcodes) */ +test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \ + test-i386.h test-i386-shift.h test-i386-muldiv.h + $(CC) -m32 $(CFLAGS) $(LDFLAGS) -static -o $@ \ + $(<D)/test-i386.c $(<D)/test-i386-code16.S $(<D)/test-i386-vm86.S -lm + +test-x86_64: test-i386.c \ + test-i386.h test-i386-shift.h test-i386-muldiv.h + $(CC) -m64 $(CFLAGS) $(LDFLAGS) -static -o $@ $(<D)/test-i386.c -lm + +ifeq ($(ARCH),i386) +test: test-i386 + ./test-i386 > test-i386.ref +else +test: +endif + $(QEMU) test-i386 > test-i386.out + @if diff -u test-i386.ref test-i386.out ; then echo "Auto Test OK"; fi + +.PHONY: test-mmap +test-mmap: test-mmap.c + $(CC) $(CFLAGS) -Wall -static -O2 $(LDFLAGS) -o $@ $< + -./test-mmap + -$(QEMU) ./test-mmap + -$(QEMU) -p 8192 ./test-mmap 8192 + -$(QEMU) -p 16384 ./test-mmap 16384 + -$(QEMU) -p 32768 ./test-mmap 32768 + +# generic Linux and CPU test +linux-test: linux-test.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< -lm + +# speed test +sha1-i386: sha1.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + +sha1: sha1.c + $(HOST_CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + +speed: sha1 sha1-i386 + time ./sha1 + time $(QEMU) ./sha1-i386 + +# vm86 test +runcom: runcom.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + +# NOTE: -fomit-frame-pointer is currently needed : this is a bug in libqemu +qruncom: qruncom.c ../ioport-user.c ../i386-user/libqemu.a + $(CC) $(CFLAGS) -fomit-frame-pointer $(LDFLAGS) -I../target-i386 -I.. -I../i386-user -I../fpu \ + -o $@ $(filter %.c, $^) -L../i386-user -lqemu -lm + +# arm test +hello-arm: hello-arm.o + arm-linux-ld -o $@ $< + +hello-arm.o: hello-arm.c + arm-linux-gcc -Wall -g -O2 -c -o $@ $< + +test-arm-iwmmxt: test-arm-iwmmxt.s + cpp < $< | arm-linux-gnu-gcc -Wall -static -march=iwmmxt -mabi=aapcs -x assembler - -o $@ + +# MIPS test +hello-mips: hello-mips.c + mips-linux-gnu-gcc -nostdlib -static -mno-abicalls -fno-PIC -mabi=32 -Wall -Wextra -g -O2 -o $@ $< + +hello-mipsel: hello-mips.c + mipsel-linux-gnu-gcc -nostdlib -static -mno-abicalls -fno-PIC -mabi=32 -Wall -Wextra -g -O2 -o $@ $< + +# testsuite for the CRIS port. +test-cris: + $(MAKE) -C cris check + +clean: + rm -f *~ *.o test-i386.out test-i386.ref \ + test-x86_64.log test-x86_64.ref qruncom $(TESTS) diff --git a/src/recompiler/tests/hello-arm.c b/src/recompiler/tests/hello-arm.c new file mode 100644 index 00000000..e0daa7ad --- /dev/null +++ b/src/recompiler/tests/hello-arm.c @@ -0,0 +1,113 @@ +#define __NR_SYSCALL_BASE 0x900000 +#define __NR_exit1 (__NR_SYSCALL_BASE+ 1) +#define __NR_write (__NR_SYSCALL_BASE+ 4) + +#define __sys2(x) #x +#define __sys1(x) __sys2(x) + +#ifndef __syscall +#define __syscall(name) "swi\t" __sys1(__NR_##name) "\n\t" +#endif + +#define __syscall_return(type, res) \ +do { \ + return (type) (res); \ +} while (0) + +#define _syscall0(type,name) \ +type name(void) { \ + long __res; \ + __asm__ __volatile__ ( \ + __syscall(name) \ + "mov %0,r0" \ + :"=r" (__res) : : "r0","lr"); \ + __syscall_return(type,__res); \ +} + +#define _syscall1(type,name,type1,arg1) \ +type name(type1 arg1) { \ + long __res; \ + __asm__ __volatile__ ( \ + "mov\tr0,%1\n\t" \ + __syscall(name) \ + "mov %0,r0" \ + : "=r" (__res) \ + : "r" ((long)(arg1)) \ + : "r0","lr"); \ + __syscall_return(type,__res); \ +} + +#define _syscall2(type,name,type1,arg1,type2,arg2) \ +type name(type1 arg1,type2 arg2) { \ + long __res; \ + __asm__ __volatile__ ( \ + "mov\tr0,%1\n\t" \ + "mov\tr1,%2\n\t" \ + __syscall(name) \ + "mov\t%0,r0" \ + : "=r" (__res) \ + : "r" ((long)(arg1)),"r" ((long)(arg2)) \ + : "r0","r1","lr"); \ + __syscall_return(type,__res); \ +} + + +#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ +type name(type1 arg1,type2 arg2,type3 arg3) { \ + long __res; \ + __asm__ __volatile__ ( \ + "mov\tr0,%1\n\t" \ + "mov\tr1,%2\n\t" \ + "mov\tr2,%3\n\t" \ + __syscall(name) \ + "mov\t%0,r0" \ + : "=r" (__res) \ + : "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)) \ + : "r0","r1","r2","lr"); \ + __syscall_return(type,__res); \ +} + + +#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + long __res; \ + __asm__ __volatile__ ( \ + "mov\tr0,%1\n\t" \ + "mov\tr1,%2\n\t" \ + "mov\tr2,%3\n\t" \ + "mov\tr3,%4\n\t" \ + __syscall(name) \ + "mov\t%0,r0" \ + : "=r" (__res) \ + : "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)),"r" ((long)(arg4)) \ + : "r0","r1","r2","r3","lr"); \ + __syscall_return(type,__res); \ +} + + +#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ + long __res; \ + __asm__ __volatile__ ( \ + "mov\tr0,%1\n\t" \ + "mov\tr1,%2\n\t" \ + "mov\tr2,%3\n\t" \ + "mov\tr3,%4\n\t" \ + "mov\tr4,%5\n\t" \ + __syscall(name) \ + "mov\t%0,r0" \ + : "=r" (__res) \ + : "r" ((long)(arg1)),"r" ((long)(arg2)),"r" ((long)(arg3)),"r" ((long)(arg4)), \ + "r" ((long)(arg5)) \ + : "r0","r1","r2","r3","r4","lr"); \ + __syscall_return(type,__res); \ +} + +_syscall1(int,exit1,int,status); +_syscall3(int,write,int,fd,const char *,buf, int, len); + +void _start(void) +{ + write(1, "Hello World\n", 12); + exit1(0); +} diff --git a/src/recompiler/tests/hello-i386.c b/src/recompiler/tests/hello-i386.c new file mode 100644 index 00000000..e00245d3 --- /dev/null +++ b/src/recompiler/tests/hello-i386.c @@ -0,0 +1,26 @@ +#include <asm/unistd.h> + +extern inline volatile void exit(int status) +{ + int __res; + __asm__ volatile ("movl %%ecx,%%ebx\n"\ + "int $0x80" \ + : "=a" (__res) : "0" (__NR_exit),"c" ((long)(status))); +} + +extern inline int write(int fd, const char * buf, int len) +{ + int status; + __asm__ volatile ("pushl %%ebx\n"\ + "movl %%esi,%%ebx\n"\ + "int $0x80\n" \ + "popl %%ebx\n"\ + : "=a" (status) \ + : "0" (__NR_write),"S" ((long)(fd)),"c" ((long)(buf)),"d" ((long)(len))); +} + +void _start(void) +{ + write(1, "Hello World\n", 12); + exit(0); +} diff --git a/src/recompiler/tests/linux-test.c b/src/recompiler/tests/linux-test.c new file mode 100644 index 00000000..cce80373 --- /dev/null +++ b/src/recompiler/tests/linux-test.c @@ -0,0 +1,545 @@ +/* + * linux and CPU test + * + * Copyright (c) 2003 Fabrice Bellard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle GPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the General Public License version 2 (GPLv2) at this time for any software where + * a choice of GPL license versions is made available with the language indicating + * that GPLv2 or any later version may be used, or where a choice of which version + * of the GPL is applied is otherwise unspecified. + */ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <inttypes.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <errno.h> +#include <utime.h> +#include <time.h> +#include <sys/time.h> +#include <sys/uio.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sched.h> +#include <dirent.h> +#include <setjmp.h> +#include <sys/shm.h> + +#define TESTPATH "/tmp/linux-test.tmp" +#define TESTPORT 7654 +#define STACK_SIZE 16384 + +void error1(const char *filename, int line, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", filename, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); +} + +int __chk_error(const char *filename, int line, int ret) +{ + if (ret < 0) { + error1(filename, line, "%m (ret=%d, errno=%d)", + ret, errno); + } + return ret; +} + +#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__) + +#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret)) + +/*******************************************************/ + +#define FILE_BUF_SIZE 300 + +void test_file(void) +{ + int fd, i, len, ret; + uint8_t buf[FILE_BUF_SIZE]; + uint8_t buf2[FILE_BUF_SIZE]; + uint8_t buf3[FILE_BUF_SIZE]; + char cur_dir[1024]; + struct stat st; + struct utimbuf tbuf; + struct iovec vecs[2]; + DIR *dir; + struct dirent *de; + + /* clean up, just in case */ + unlink(TESTPATH "/file1"); + unlink(TESTPATH "/file2"); + unlink(TESTPATH "/file3"); + rmdir(TESTPATH); + + if (getcwd(cur_dir, sizeof(cur_dir)) == NULL) + error("getcwd"); + + chk_error(mkdir(TESTPATH, 0755)); + + chk_error(chdir(TESTPATH)); + + /* open/read/write/close/readv/writev/lseek */ + + fd = chk_error(open("file1", O_WRONLY | O_TRUNC | O_CREAT, 0644)); + for(i=0;i < FILE_BUF_SIZE; i++) + buf[i] = i; + len = chk_error(write(fd, buf, FILE_BUF_SIZE / 2)); + if (len != (FILE_BUF_SIZE / 2)) + error("write"); + vecs[0].iov_base = buf + (FILE_BUF_SIZE / 2); + vecs[0].iov_len = 16; + vecs[1].iov_base = buf + (FILE_BUF_SIZE / 2) + 16; + vecs[1].iov_len = (FILE_BUF_SIZE / 2) - 16; + len = chk_error(writev(fd, vecs, 2)); + if (len != (FILE_BUF_SIZE / 2)) + error("writev"); + chk_error(close(fd)); + + chk_error(rename("file1", "file2")); + + fd = chk_error(open("file2", O_RDONLY)); + + len = chk_error(read(fd, buf2, FILE_BUF_SIZE)); + if (len != FILE_BUF_SIZE) + error("read"); + if (memcmp(buf, buf2, FILE_BUF_SIZE) != 0) + error("memcmp"); + +#define FOFFSET 16 + ret = chk_error(lseek(fd, FOFFSET, SEEK_SET)); + if (ret != 16) + error("lseek"); + vecs[0].iov_base = buf3; + vecs[0].iov_len = 32; + vecs[1].iov_base = buf3 + 32; + vecs[1].iov_len = FILE_BUF_SIZE - FOFFSET - 32; + len = chk_error(readv(fd, vecs, 2)); + if (len != FILE_BUF_SIZE - FOFFSET) + error("readv"); + if (memcmp(buf + FOFFSET, buf3, FILE_BUF_SIZE - FOFFSET) != 0) + error("memcmp"); + + chk_error(close(fd)); + + /* access */ + chk_error(access("file2", R_OK)); + + /* stat/chmod/utime/truncate */ + + chk_error(chmod("file2", 0600)); + tbuf.actime = 1001; + tbuf.modtime = 1000; + chk_error(truncate("file2", 100)); + chk_error(utime("file2", &tbuf)); + chk_error(stat("file2", &st)); + if (st.st_size != 100) + error("stat size"); + if (!S_ISREG(st.st_mode)) + error("stat mode"); + if ((st.st_mode & 0777) != 0600) + error("stat mode2"); + if (st.st_atime != 1001 || + st.st_mtime != 1000) + error("stat time"); + + chk_error(stat(TESTPATH, &st)); + if (!S_ISDIR(st.st_mode)) + error("stat mode"); + + /* fstat */ + fd = chk_error(open("file2", O_RDWR)); + chk_error(ftruncate(fd, 50)); + chk_error(fstat(fd, &st)); + chk_error(close(fd)); + + if (st.st_size != 50) + error("stat size"); + if (!S_ISREG(st.st_mode)) + error("stat mode"); + + /* symlink/lstat */ + chk_error(symlink("file2", "file3")); + chk_error(lstat("file3", &st)); + if (!S_ISLNK(st.st_mode)) + error("stat mode"); + + /* getdents */ + dir = opendir(TESTPATH); + if (!dir) + error("opendir"); + len = 0; + for(;;) { + de = readdir(dir); + if (!de) + break; + if (strcmp(de->d_name, ".") != 0 && + strcmp(de->d_name, "..") != 0 && + strcmp(de->d_name, "file2") != 0 && + strcmp(de->d_name, "file3") != 0) + error("readdir"); + len++; + } + closedir(dir); + if (len != 4) + error("readdir"); + + chk_error(unlink("file3")); + chk_error(unlink("file2")); + chk_error(chdir(cur_dir)); + chk_error(rmdir(TESTPATH)); +} + +void test_fork(void) +{ + int pid, status; + + pid = chk_error(fork()); + if (pid == 0) { + /* child */ + exit(2); + } + chk_error(waitpid(pid, &status, 0)); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 2) + error("waitpid status=0x%x", status); +} + +void test_time(void) +{ + struct timeval tv, tv2; + struct timespec ts, rem; + struct rusage rusg1, rusg2; + int ti, i; + + chk_error(gettimeofday(&tv, NULL)); + rem.tv_sec = 1; + ts.tv_sec = 0; + ts.tv_nsec = 20 * 1000000; + chk_error(nanosleep(&ts, &rem)); + if (rem.tv_sec != 1) + error("nanosleep"); + chk_error(gettimeofday(&tv2, NULL)); + ti = tv2.tv_sec - tv.tv_sec; + if (ti >= 2) + error("gettimeofday"); + + chk_error(getrusage(RUSAGE_SELF, &rusg1)); + for(i = 0;i < 10000; i++); + chk_error(getrusage(RUSAGE_SELF, &rusg2)); + if ((rusg2.ru_utime.tv_sec - rusg1.ru_utime.tv_sec) < 0 || + (rusg2.ru_stime.tv_sec - rusg1.ru_stime.tv_sec) < 0) + error("getrusage"); +} + +void pstrcpy(char *buf, int buf_size, const char *str) +{ + int c; + char *q = buf; + + if (buf_size <= 0) + return; + + for(;;) { + c = *str++; + if (c == 0 || q >= buf + buf_size - 1) + break; + *q++ = c; + } + *q = '\0'; +} + +/* strcat and truncate. */ +char *pstrcat(char *buf, int buf_size, const char *s) +{ + int len; + len = strlen(buf); + if (len < buf_size) + pstrcpy(buf + len, buf_size - len, s); + return buf; +} + +int server_socket(void) +{ + int val, fd; + struct sockaddr_in sockaddr; + + /* server socket */ + fd = chk_error(socket(PF_INET, SOCK_STREAM, 0)); + + val = 1; + chk_error(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))); + + sockaddr.sin_family = AF_INET; + sockaddr.sin_port = htons(TESTPORT); + sockaddr.sin_addr.s_addr = 0; + chk_error(bind(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr))); + chk_error(listen(fd, 0)); + return fd; + +} + +int client_socket(void) +{ + int fd; + struct sockaddr_in sockaddr; + + /* server socket */ + fd = chk_error(socket(PF_INET, SOCK_STREAM, 0)); + sockaddr.sin_family = AF_INET; + sockaddr.sin_port = htons(TESTPORT); + inet_aton("127.0.0.1", &sockaddr.sin_addr); + chk_error(connect(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr))); + return fd; +} + +const char socket_msg[] = "hello socket\n"; + +void test_socket(void) +{ + int server_fd, client_fd, fd, pid, ret, val; + struct sockaddr_in sockaddr; + socklen_t len; + char buf[512]; + + server_fd = server_socket(); + + /* test a few socket options */ + len = sizeof(val); + chk_error(getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &val, &len)); + if (val != SOCK_STREAM) + error("getsockopt"); + + pid = chk_error(fork()); + if (pid == 0) { + client_fd = client_socket(); + send(client_fd, socket_msg, sizeof(socket_msg), 0); + close(client_fd); + exit(0); + } + len = sizeof(sockaddr); + fd = chk_error(accept(server_fd, (struct sockaddr *)&sockaddr, &len)); + + ret = chk_error(recv(fd, buf, sizeof(buf), 0)); + if (ret != sizeof(socket_msg)) + error("recv"); + if (memcmp(buf, socket_msg, sizeof(socket_msg)) != 0) + error("socket_msg"); + chk_error(close(fd)); + chk_error(close(server_fd)); +} + +#define WCOUNT_MAX 512 + +void test_pipe(void) +{ + fd_set rfds, wfds; + int fds[2], fd_max, ret; + uint8_t ch; + int wcount, rcount; + + chk_error(pipe(fds)); + chk_error(fcntl(fds[0], F_SETFL, O_NONBLOCK)); + chk_error(fcntl(fds[1], F_SETFL, O_NONBLOCK)); + wcount = 0; + rcount = 0; + for(;;) { + FD_ZERO(&rfds); + fd_max = fds[0]; + FD_SET(fds[0], &rfds); + + FD_ZERO(&wfds); + FD_SET(fds[1], &wfds); + if (fds[1] > fd_max) + fd_max = fds[1]; + + ret = chk_error(select(fd_max + 1, &rfds, &wfds, NULL, NULL)); + if (ret > 0) { + if (FD_ISSET(fds[0], &rfds)) { + chk_error(read(fds[0], &ch, 1)); + rcount++; + if (rcount >= WCOUNT_MAX) + break; + } + if (FD_ISSET(fds[1], &wfds)) { + ch = 'a'; + chk_error(write(fds[0], &ch, 1)); + wcount++; + } + } + } + chk_error(close(fds[0])); + chk_error(close(fds[1])); +} + +int thread1_res; +int thread2_res; + +int thread1_func(void *arg) +{ + int i; + for(i=0;i<5;i++) { + thread1_res++; + usleep(10 * 1000); + } + return 0; +} + +int thread2_func(void *arg) +{ + int i; + for(i=0;i<6;i++) { + thread2_res++; + usleep(10 * 1000); + } + return 0; +} + +void test_clone(void) +{ + uint8_t *stack1, *stack2; + int pid1, pid2, status1, status2; + + stack1 = malloc(STACK_SIZE); + pid1 = chk_error(clone(thread1_func, stack1 + STACK_SIZE, + CLONE_VM | CLONE_FS | CLONE_FILES | SIGCHLD, "hello1")); + + stack2 = malloc(STACK_SIZE); + pid2 = chk_error(clone(thread2_func, stack2 + STACK_SIZE, + CLONE_VM | CLONE_FS | CLONE_FILES | SIGCHLD, "hello2")); + + while (waitpid(pid1, &status1, 0) != pid1); + while (waitpid(pid2, &status2, 0) != pid2); + if (thread1_res != 5 || + thread2_res != 6) + error("clone"); +} + +/***********************************/ + +volatile int alarm_count; +jmp_buf jmp_env; + +void sig_alarm(int sig) +{ + if (sig != SIGALRM) + error("signal"); + alarm_count++; +} + +void sig_segv(int sig, siginfo_t *info, void *puc) +{ + if (sig != SIGSEGV) + error("signal"); + longjmp(jmp_env, 1); +} + +void test_signal(void) +{ + struct sigaction act; + struct itimerval it, oit; + + /* timer test */ + + alarm_count = 0; + + act.sa_handler = sig_alarm; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + chk_error(sigaction(SIGALRM, &act, NULL)); + + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 10 * 1000; + it.it_value.tv_sec = 0; + it.it_value.tv_usec = 10 * 1000; + chk_error(setitimer(ITIMER_REAL, &it, NULL)); + chk_error(getitimer(ITIMER_REAL, &oit)); + if (oit.it_value.tv_sec != it.it_value.tv_sec || + oit.it_value.tv_usec != it.it_value.tv_usec) + error("itimer"); + + while (alarm_count < 5) { + usleep(10 * 1000); + } + + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 0; + it.it_value.tv_sec = 0; + it.it_value.tv_usec = 0; + memset(&oit, 0xff, sizeof(oit)); + chk_error(setitimer(ITIMER_REAL, &it, &oit)); + if (oit.it_value.tv_sec != 0 || + oit.it_value.tv_usec != 10 * 1000) + error("setitimer"); + + /* SIGSEGV test */ + act.sa_sigaction = sig_segv; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + chk_error(sigaction(SIGSEGV, &act, NULL)); + if (setjmp(jmp_env) == 0) { + *(uint8_t *)0 = 0; + } + + act.sa_handler = SIG_DFL; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + chk_error(sigaction(SIGSEGV, &act, NULL)); +} + +#define SHM_SIZE 32768 + +void test_shm(void) +{ + void *ptr; + int shmid; + + shmid = chk_error(shmget(IPC_PRIVATE, SHM_SIZE, IPC_CREAT | 0777)); + ptr = shmat(shmid, NULL, 0); + if (!ptr) + error("shmat"); + + memset(ptr, 0, SHM_SIZE); + + chk_error(shmctl(shmid, IPC_RMID, 0)); + chk_error(shmdt(ptr)); +} + +int main(int argc, char **argv) +{ + test_file(); + test_fork(); + test_time(); + test_socket(); + // test_clone(); + test_signal(); + test_shm(); + return 0; +} diff --git a/src/recompiler/tests/pi_10.com b/src/recompiler/tests/pi_10.com Binary files differnew file mode 100644 index 00000000..8993ba1a --- /dev/null +++ b/src/recompiler/tests/pi_10.com diff --git a/src/recompiler/tests/qruncom.c b/src/recompiler/tests/qruncom.c new file mode 100644 index 00000000..079f7a29 --- /dev/null +++ b/src/recompiler/tests/qruncom.c @@ -0,0 +1,284 @@ +/* + * Example of use of user mode libqemu: launch a basic .com DOS + * executable + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <signal.h> +#include <malloc.h> + +#include "cpu.h" + +//#define SIGTEST + +int cpu_get_pic_interrupt(CPUState *env) +{ + return -1; +} + +uint64_t cpu_get_tsc(CPUState *env) +{ + return 0; +} + +static void set_gate(void *ptr, unsigned int type, unsigned int dpl, + unsigned long addr, unsigned int sel) +{ + unsigned int e1, e2; + e1 = (addr & 0xffff) | (sel << 16); + e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); + stl((uint8_t *)ptr, e1); + stl((uint8_t *)ptr + 4, e2); +} + +uint64_t idt_table[256]; + +/* only dpl matters as we do only user space emulation */ +static void set_idt(int n, unsigned int dpl) +{ + set_gate(idt_table + n, 0, dpl, 0, 0); +} + +void qemu_free(void *ptr) +{ + free(ptr); +} + +void *qemu_malloc(size_t size) +{ + return malloc(size); +} + +void *qemu_mallocz(size_t size) +{ + void *ptr; + ptr = qemu_malloc(size); + if (!ptr) + return NULL; + memset(ptr, 0, size); + return ptr; +} + +void *qemu_vmalloc(size_t size) +{ + return memalign(4096, size); +} + +void qemu_vfree(void *ptr) +{ + free(ptr); +} + +void qemu_printf(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +/* XXX: this is a bug in helper2.c */ +int errno; + +/**********************************************/ + +#define COM_BASE_ADDR 0x10100 + +static void usage(void) +{ + printf("qruncom version 0.1 (c) 2003 Fabrice Bellard\n" + "usage: qruncom file.com\n" + "user mode libqemu demo: run simple .com DOS executables\n"); + exit(1); +} + +static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg) +{ + return (uint8_t *)((seg << 4) + (reg & 0xffff)); +} + +static inline void pushw(CPUState *env, int val) +{ + env->regs[R_ESP] = (env->regs[R_ESP] & ~0xffff) | ((env->regs[R_ESP] - 2) & 0xffff); + *(uint16_t *)seg_to_linear(env->segs[R_SS].selector, env->regs[R_ESP]) = val; +} + +static void host_segv_handler(int host_signum, siginfo_t *info, + void *puc) +{ + if (cpu_signal_handler(host_signum, info, puc)) { + return; + } + abort(); +} + +int main(int argc, char **argv) +{ + uint8_t *vm86_mem; + const char *filename; + int fd, ret, seg; + CPUState *env; + + if (argc != 2) + usage(); + filename = argv[1]; + + vm86_mem = mmap((void *)0x00000000, 0x110000, + PROT_WRITE | PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + if (vm86_mem == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + /* load the MSDOS .com executable */ + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror(filename); + exit(1); + } + ret = read(fd, vm86_mem + COM_BASE_ADDR, 65536 - 256); + if (ret < 0) { + perror("read"); + exit(1); + } + close(fd); + + /* install exception handler for CPU emulator */ + { + struct sigaction act; + + sigfillset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + // act.sa_flags |= SA_ONSTACK; + + act.sa_sigaction = host_segv_handler; + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + } + + // cpu_set_log(CPU_LOG_TB_IN_ASM | CPU_LOG_TB_OUT_ASM | CPU_LOG_EXEC); + + env = cpu_init("qemu32"); + + cpu_x86_set_cpl(env, 3); + + env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; + /* NOTE: hflags duplicates some of the virtual CPU state */ + env->hflags |= HF_PE_MASK | VM_MASK; + + /* flags setup : we activate the IRQs by default as in user + mode. We also activate the VM86 flag to run DOS code */ + env->eflags |= IF_MASK | VM_MASK; + + /* init basic registers */ + env->eip = 0x100; + env->regs[R_ESP] = 0xfffe; + seg = (COM_BASE_ADDR - 0x100) >> 4; + + cpu_x86_load_seg_cache(env, R_CS, seg, + (seg << 4), 0xffff, 0); + cpu_x86_load_seg_cache(env, R_SS, seg, + (seg << 4), 0xffff, 0); + cpu_x86_load_seg_cache(env, R_DS, seg, + (seg << 4), 0xffff, 0); + cpu_x86_load_seg_cache(env, R_ES, seg, + (seg << 4), 0xffff, 0); + cpu_x86_load_seg_cache(env, R_FS, seg, + (seg << 4), 0xffff, 0); + cpu_x86_load_seg_cache(env, R_GS, seg, + (seg << 4), 0xffff, 0); + + /* exception support */ + env->idt.base = (unsigned long)idt_table; + env->idt.limit = sizeof(idt_table) - 1; + set_idt(0, 0); + set_idt(1, 0); + set_idt(2, 0); + set_idt(3, 3); + set_idt(4, 3); + set_idt(5, 3); + set_idt(6, 0); + set_idt(7, 0); + set_idt(8, 0); + set_idt(9, 0); + set_idt(10, 0); + set_idt(11, 0); + set_idt(12, 0); + set_idt(13, 0); + set_idt(14, 0); + set_idt(15, 0); + set_idt(16, 0); + set_idt(17, 0); + set_idt(18, 0); + set_idt(19, 0); + + /* put return code */ + *seg_to_linear(env->segs[R_CS].selector, 0) = 0xb4; /* mov ah, $0 */ + *seg_to_linear(env->segs[R_CS].selector, 1) = 0x00; + *seg_to_linear(env->segs[R_CS].selector, 2) = 0xcd; /* int $0x21 */ + *seg_to_linear(env->segs[R_CS].selector, 3) = 0x21; + pushw(env, 0x0000); + + /* the value of these registers seem to be assumed by pi_10.com */ + env->regs[R_ESI] = 0x100; + env->regs[R_ECX] = 0xff; + env->regs[R_EBP] = 0x0900; + env->regs[R_EDI] = 0xfffe; + + /* inform the emulator of the mmaped memory */ + page_set_flags(0x00000000, 0x110000, + PAGE_WRITE | PAGE_READ | PAGE_EXEC | PAGE_VALID); + + for(;;) { + ret = cpu_x86_exec(env); + switch(ret) { + case EXCP0D_GPF: + { + int int_num, ah; + int_num = *(uint8_t *)(env->segs[R_CS].base + env->eip + 1); + if (int_num != 0x21) + goto unknown_int; + ah = (env->regs[R_EAX] >> 8) & 0xff; + switch(ah) { + case 0x00: /* exit */ + exit(0); + case 0x02: /* write char */ + { + uint8_t c = env->regs[R_EDX]; + write(1, &c, 1); + } + break; + case 0x09: /* write string */ + { + uint8_t c; + for(;;) { + c = *seg_to_linear(env->segs[R_DS].selector, env->regs[R_EAX]); + if (c == '$') + break; + write(1, &c, 1); + } + env->regs[R_EAX] = (env->regs[R_EAX] & ~0xff) | '$'; + } + break; + default: + unknown_int: + fprintf(stderr, "unsupported int 0x%02x\n", int_num); + cpu_dump_state(env, stderr, fprintf, 0); + // exit(1); + } + env->eip += 2; + } + break; + default: + fprintf(stderr, "unhandled cpu_exec return code (0x%x)\n", ret); + cpu_dump_state(env, stderr, fprintf, 0); + exit(1); + } + } +} diff --git a/src/recompiler/tests/runcom.c b/src/recompiler/tests/runcom.c new file mode 100644 index 00000000..63805666 --- /dev/null +++ b/src/recompiler/tests/runcom.c @@ -0,0 +1,195 @@ +/* + * Simple example of use of vm86: launch a basic .com DOS executable + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <signal.h> + +#include <linux/unistd.h> +#include <asm/vm86.h> + +//#define SIGTEST + +#undef __syscall_return +#define __syscall_return(type, res) \ +do { \ + return (type) (res); \ +} while (0) + +_syscall2(int, vm86, int, func, struct vm86plus_struct *, v86) + +#define COM_BASE_ADDR 0x10100 + +static void usage(void) +{ + printf("runcom version 0.1 (c) 2003 Fabrice Bellard\n" + "usage: runcom file.com\n" + "VM86 Run simple .com DOS executables (linux vm86 test mode)\n"); + exit(1); +} + +static inline void set_bit(uint8_t *a, unsigned int bit) +{ + a[bit / 8] |= (1 << (bit % 8)); +} + +static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg) +{ + return (uint8_t *)((seg << 4) + (reg & 0xffff)); +} + +static inline void pushw(struct vm86_regs *r, int val) +{ + r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff); + *(uint16_t *)seg_to_linear(r->ss, r->esp) = val; +} + +void dump_regs(struct vm86_regs *r) +{ + fprintf(stderr, + "EAX=%08lx EBX=%08lx ECX=%08lx EDX=%08lx\n" + "ESI=%08lx EDI=%08lx EBP=%08lx ESP=%08lx\n" + "EIP=%08lx EFL=%08lx\n" + "CS=%04x DS=%04x ES=%04x SS=%04x FS=%04x GS=%04x\n", + r->eax, r->ebx, r->ecx, r->edx, r->esi, r->edi, r->ebp, r->esp, + r->eip, r->eflags, + r->cs, r->ds, r->es, r->ss, r->fs, r->gs); +} + +#ifdef SIGTEST +void alarm_handler(int sig) +{ + fprintf(stderr, "alarm signal=%d\n", sig); + alarm(1); +} +#endif + +int main(int argc, char **argv) +{ + uint8_t *vm86_mem; + const char *filename; + int fd, ret, seg; + struct vm86plus_struct ctx; + struct vm86_regs *r; + + if (argc != 2) + usage(); + filename = argv[1]; + + vm86_mem = mmap((void *)0x00000000, 0x110000, + PROT_WRITE | PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + if (vm86_mem == MAP_FAILED) { + perror("mmap"); + exit(1); + } +#ifdef SIGTEST + { + struct sigaction act; + + act.sa_handler = alarm_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGALRM, &act, NULL); + alarm(1); + } +#endif + + /* load the MSDOS .com executable */ + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror(filename); + exit(1); + } + ret = read(fd, vm86_mem + COM_BASE_ADDR, 65536 - 256); + if (ret < 0) { + perror("read"); + exit(1); + } + close(fd); + + memset(&ctx, 0, sizeof(ctx)); + /* init basic registers */ + r = &ctx.regs; + r->eip = 0x100; + r->esp = 0xfffe; + seg = (COM_BASE_ADDR - 0x100) >> 4; + r->cs = seg; + r->ss = seg; + r->ds = seg; + r->es = seg; + r->fs = seg; + r->gs = seg; + r->eflags = VIF_MASK; + + /* put return code */ + set_bit((uint8_t *)&ctx.int_revectored, 0x21); + *seg_to_linear(r->cs, 0) = 0xb4; /* mov ah, $0 */ + *seg_to_linear(r->cs, 1) = 0x00; + *seg_to_linear(r->cs, 2) = 0xcd; /* int $0x21 */ + *seg_to_linear(r->cs, 3) = 0x21; + pushw(&ctx.regs, 0x0000); + + /* the value of these registers seem to be assumed by pi_10.com */ + r->esi = 0x100; + r->ecx = 0xff; + r->ebp = 0x0900; + r->edi = 0xfffe; + + for(;;) { + ret = vm86(VM86_ENTER, &ctx); + switch(VM86_TYPE(ret)) { + case VM86_INTx: + { + int int_num, ah; + + int_num = VM86_ARG(ret); + if (int_num != 0x21) + goto unknown_int; + ah = (r->eax >> 8) & 0xff; + switch(ah) { + case 0x00: /* exit */ + exit(0); + case 0x02: /* write char */ + { + uint8_t c = r->edx; + write(1, &c, 1); + } + break; + case 0x09: /* write string */ + { + uint8_t c; + for(;;) { + c = *seg_to_linear(r->ds, r->edx); + if (c == '$') + break; + write(1, &c, 1); + } + r->eax = (r->eax & ~0xff) | '$'; + } + break; + default: + unknown_int: + fprintf(stderr, "unsupported int 0x%02x\n", int_num); + dump_regs(&ctx.regs); + // exit(1); + } + } + break; + case VM86_SIGNAL: + /* a signal came, we just ignore that */ + break; + case VM86_STI: + break; + default: + fprintf(stderr, "unhandled vm86 return code (0x%x)\n", ret); + dump_regs(&ctx.regs); + exit(1); + } + } +} diff --git a/src/recompiler/tests/sha1.c b/src/recompiler/tests/sha1.c new file mode 100644 index 00000000..93b7c8e8 --- /dev/null +++ b/src/recompiler/tests/sha1.c @@ -0,0 +1,240 @@ + +/* from valgrind tests */ + +/* ================ sha1.c ================ */ +/* +SHA-1 in C +By Steve Reid <steve@edmweb.com> +100% Public Domain + +Test Vectors (from FIPS PUB 180-1) +"abc" + A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D +"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 +A million repetitions of "a" + 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F +*/ + +/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */ +/* #define SHA1HANDSOFF * Copies data before messing with it. */ + +#define SHA1HANDSOFF + +#include <stdio.h> +#include <string.h> +#include <stdint.h> + +/* ================ sha1.h ================ */ +/* +SHA-1 in C +By Steve Reid <steve@edmweb.com> +100% Public Domain +*/ + +typedef struct { + uint32_t state[5]; + uint32_t count[2]; + unsigned char buffer[64]; +} SHA1_CTX; + +void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]); +void SHA1Init(SHA1_CTX* context); +void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len); +void SHA1Final(unsigned char digest[20], SHA1_CTX* context); +/* ================ end of sha1.h ================ */ +#include <endian.h> + +#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) + +/* blk0() and blk() perform the initial expand. */ +/* I got the idea of expanding during the round function from SSLeay */ +#if BYTE_ORDER == LITTLE_ENDIAN +#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \ + |(rol(block->l[i],8)&0x00FF00FF)) +#elif BYTE_ORDER == BIG_ENDIAN +#define blk0(i) block->l[i] +#else +#error "Endianness not defined!" +#endif +#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ + ^block->l[(i+2)&15]^block->l[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); +#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); +#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); + + +/* Hash a single 512-bit block. This is the core of the algorithm. */ + +void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]) +{ +uint32_t a, b, c, d, e; +typedef union { + unsigned char c[64]; + uint32_t l[16]; +} CHAR64LONG16; +#ifdef SHA1HANDSOFF +CHAR64LONG16 block[1]; /* use array to appear as a pointer */ + memcpy(block, buffer, 64); +#else + /* The following had better never be used because it causes the + * pointer-to-const buffer to be cast into a pointer to non-const. + * And the result is written through. I threw a "const" in, hoping + * this will cause a diagnostic. + */ +CHAR64LONG16* block = (const CHAR64LONG16*)buffer; +#endif + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); + R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); + R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); + R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); + R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); + R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); + R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); + R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); + R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); + R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); + R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); + R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + /* Wipe variables */ + a = b = c = d = e = 0; +#ifdef SHA1HANDSOFF + memset(block, '\0', sizeof(block)); +#endif +} + + +/* SHA1Init - Initialize new context */ + +void SHA1Init(SHA1_CTX* context) +{ + /* SHA1 initialization constants */ + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; + context->count[0] = context->count[1] = 0; +} + + +/* Run your data through this. */ + +void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len) +{ +uint32_t i; +uint32_t j; + + j = context->count[0]; + if ((context->count[0] += len << 3) < j) + context->count[1]++; + context->count[1] += (len>>29); + j = (j >> 3) & 63; + if ((j + len) > 63) { + memcpy(&context->buffer[j], data, (i = 64-j)); + SHA1Transform(context->state, context->buffer); + for ( ; i + 63 < len; i += 64) { + SHA1Transform(context->state, &data[i]); + } + j = 0; + } + else i = 0; + memcpy(&context->buffer[j], &data[i], len - i); +} + + +/* Add padding and return the message digest. */ + +void SHA1Final(unsigned char digest[20], SHA1_CTX* context) +{ +unsigned i; +unsigned char finalcount[8]; +unsigned char c; + +#if 0 /* untested "improvement" by DHR */ + /* Convert context->count to a sequence of bytes + * in finalcount. Second element first, but + * big-endian order within element. + * But we do it all backwards. + */ + unsigned char *fcp = &finalcount[8]; + + for (i = 0; i < 2; i++) + { + uint32_t t = context->count[i]; + int j; + + for (j = 0; j < 4; t >>= 8, j++) + *--fcp = (unsigned char) t; + } +#else + for (i = 0; i < 8; i++) { + finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] + >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ + } +#endif + c = 0200; + SHA1Update(context, &c, 1); + while ((context->count[0] & 504) != 448) { + c = 0000; + SHA1Update(context, &c, 1); + } + SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */ + for (i = 0; i < 20; i++) { + digest[i] = (unsigned char) + ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); + } + /* Wipe variables */ + memset(context, '\0', sizeof(*context)); + memset(&finalcount, '\0', sizeof(finalcount)); +} +/* ================ end of sha1.c ================ */ + +#define BUFSIZE 4096 + +int +main(int argc, char **argv) +{ + SHA1_CTX ctx; + unsigned char hash[20], buf[BUFSIZE]; + int i; + + for(i=0;i<BUFSIZE;i++) + buf[i] = i; + + SHA1Init(&ctx); + for(i=0;i<1000;i++) + SHA1Update(&ctx, buf, BUFSIZE); + SHA1Final(hash, &ctx); + + printf("SHA1="); + for(i=0;i<20;i++) + printf("%02x", hash[i]); + printf("\n"); + return 0; +} diff --git a/src/recompiler/tests/test-i386-code16.S b/src/recompiler/tests/test-i386-code16.S new file mode 100644 index 00000000..816c24b9 --- /dev/null +++ b/src/recompiler/tests/test-i386-code16.S @@ -0,0 +1,79 @@ + .code16 + .globl code16_start + .globl code16_end + +CS_SEG = 0xf + +code16_start: + + .globl code16_func1 + + /* basic test */ +code16_func1 = . - code16_start + mov $1, %eax + data32 lret + +/* test push/pop in 16 bit mode */ + .globl code16_func2 +code16_func2 = . - code16_start + xor %eax, %eax + mov $0x12345678, %ebx + movl %esp, %ecx + push %bx + subl %esp, %ecx + pop %ax + data32 lret + +/* test various jmp opcodes */ + .globl code16_func3 +code16_func3 = . - code16_start + jmp 1f + nop +1: + mov $4, %eax + mov $0x12345678, %ebx + xor %bx, %bx + jz 2f + add $2, %ax +2: + + call myfunc + + lcall $CS_SEG, $(myfunc2 - code16_start) + + ljmp $CS_SEG, $(myjmp1 - code16_start) +myjmp1_next: + + cs lcall *myfunc2_addr - code16_start + + cs ljmp *myjmp2_addr - code16_start +myjmp2_next: + + data32 lret + +myfunc2_addr: + .short myfunc2 - code16_start + .short CS_SEG + +myjmp2_addr: + .short myjmp2 - code16_start + .short CS_SEG + +myjmp1: + add $8, %ax + jmp myjmp1_next + +myjmp2: + add $16, %ax + jmp myjmp2_next + +myfunc: + add $1, %ax + ret + +myfunc2: + add $4, %ax + lret + + +code16_end: diff --git a/src/recompiler/tests/test-i386-muldiv.h b/src/recompiler/tests/test-i386-muldiv.h new file mode 100644 index 00000000..015f59e1 --- /dev/null +++ b/src/recompiler/tests/test-i386-muldiv.h @@ -0,0 +1,76 @@ + +void glue(glue(test_, OP), b)(long op0, long op1) +{ + long res, s1, s0, flags; + s0 = op0; + s1 = op1; + res = s0; + flags = 0; + asm ("push %4\n\t" + "popf\n\t" + stringify(OP)"b %b2\n\t" + "pushf\n\t" + "pop %1\n\t" + : "=a" (res), "=g" (flags) + : "q" (s1), "0" (res), "1" (flags)); + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n", + stringify(OP) "b", s0, s1, res, flags & CC_MASK); +} + +void glue(glue(test_, OP), w)(long op0h, long op0, long op1) +{ + long res, s1, flags, resh; + s1 = op1; + resh = op0h; + res = op0; + flags = 0; + asm ("push %5\n\t" + "popf\n\t" + stringify(OP) "w %w3\n\t" + "pushf\n\t" + "pop %1\n\t" + : "=a" (res), "=g" (flags), "=d" (resh) + : "q" (s1), "0" (res), "1" (flags), "2" (resh)); + printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n", + stringify(OP) "w", op0h, op0, s1, resh, res, flags & CC_MASK); +} + +void glue(glue(test_, OP), l)(long op0h, long op0, long op1) +{ + long res, s1, flags, resh; + s1 = op1; + resh = op0h; + res = op0; + flags = 0; + asm ("push %5\n\t" + "popf\n\t" + stringify(OP) "l %k3\n\t" + "pushf\n\t" + "pop %1\n\t" + : "=a" (res), "=g" (flags), "=d" (resh) + : "q" (s1), "0" (res), "1" (flags), "2" (resh)); + printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n", + stringify(OP) "l", op0h, op0, s1, resh, res, flags & CC_MASK); +} + +#if defined(__x86_64__) +void glue(glue(test_, OP), q)(long op0h, long op0, long op1) +{ + long res, s1, flags, resh; + s1 = op1; + resh = op0h; + res = op0; + flags = 0; + asm ("push %5\n\t" + "popf\n\t" + stringify(OP) "q %3\n\t" + "pushf\n\t" + "pop %1\n\t" + : "=a" (res), "=g" (flags), "=d" (resh) + : "q" (s1), "0" (res), "1" (flags), "2" (resh)); + printf("%-10s AH=" FMTLX " AL=" FMTLX " B=" FMTLX " RH=" FMTLX " RL=" FMTLX " CC=%04lx\n", + stringify(OP) "q", op0h, op0, s1, resh, res, flags & CC_MASK); +} +#endif + +#undef OP diff --git a/src/recompiler/tests/test-i386-shift.h b/src/recompiler/tests/test-i386-shift.h new file mode 100644 index 00000000..3d8f84bf --- /dev/null +++ b/src/recompiler/tests/test-i386-shift.h @@ -0,0 +1,185 @@ + +#define exec_op glue(exec_, OP) +#define exec_opq glue(glue(exec_, OP), q) +#define exec_opl glue(glue(exec_, OP), l) +#define exec_opw glue(glue(exec_, OP), w) +#define exec_opb glue(glue(exec_, OP), b) + +#ifndef OP_SHIFTD + +#ifdef OP_NOBYTE +#define EXECSHIFT(size, rsize, res, s1, s2, flags) \ + asm ("push %4\n\t"\ + "popf\n\t"\ + stringify(OP) size " %" rsize "2, %" rsize "0\n\t" \ + "pushf\n\t"\ + "pop %1\n\t"\ + : "=g" (res), "=g" (flags)\ + : "r" (s1), "0" (res), "1" (flags)); +#else +#define EXECSHIFT(size, rsize, res, s1, s2, flags) \ + asm ("push %4\n\t"\ + "popf\n\t"\ + stringify(OP) size " %%cl, %" rsize "0\n\t" \ + "pushf\n\t"\ + "pop %1\n\t"\ + : "=q" (res), "=g" (flags)\ + : "c" (s1), "0" (res), "1" (flags)); +#endif + +#if defined(__x86_64__) +void exec_opq(long s2, long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECSHIFT("q", "", res, s1, s2, flags); + /* overflow is undefined if count != 1 */ + if (s1 != 1) + flags &= ~CC_O; + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", + stringify(OP) "q", s0, s1, res, iflags, flags & CC_MASK); +} +#endif + +void exec_opl(long s2, long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECSHIFT("l", "k", res, s1, s2, flags); + /* overflow is undefined if count != 1 */ + if (s1 != 1) + flags &= ~CC_O; + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", + stringify(OP) "l", s0, s1, res, iflags, flags & CC_MASK); +} + +void exec_opw(long s2, long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECSHIFT("w", "w", res, s1, s2, flags); + /* overflow is undefined if count != 1 */ + if (s1 != 1) + flags &= ~CC_O; + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", + stringify(OP) "w", s0, s1, res, iflags, flags & CC_MASK); +} + +#else +#define EXECSHIFT(size, rsize, res, s1, s2, flags) \ + asm ("push %4\n\t"\ + "popf\n\t"\ + stringify(OP) size " %%cl, %" rsize "5, %" rsize "0\n\t" \ + "pushf\n\t"\ + "pop %1\n\t"\ + : "=g" (res), "=g" (flags)\ + : "c" (s1), "0" (res), "1" (flags), "r" (s2)); + +#if defined(__x86_64__) +void exec_opq(long s2, long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECSHIFT("q", "", res, s1, s2, flags); + /* overflow is undefined if count != 1 */ + if (s1 != 1) + flags &= ~CC_O; + printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", + stringify(OP) "q", s0, s2, s1, res, iflags, flags & CC_MASK); +} +#endif + +void exec_opl(long s2, long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECSHIFT("l", "k", res, s1, s2, flags); + /* overflow is undefined if count != 1 */ + if (s1 != 1) + flags &= ~CC_O; + printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", + stringify(OP) "l", s0, s2, s1, res, iflags, flags & CC_MASK); +} + +void exec_opw(long s2, long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECSHIFT("w", "w", res, s1, s2, flags); + /* overflow is undefined if count != 1 */ + if (s1 != 1) + flags &= ~CC_O; + printf("%-10s A=" FMTLX " B=" FMTLX " C=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", + stringify(OP) "w", s0, s2, s1, res, iflags, flags & CC_MASK); +} + +#endif + +#ifndef OP_NOBYTE +void exec_opb(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECSHIFT("b", "b", res, s1, 0, flags); + /* overflow is undefined if count != 1 */ + if (s1 != 1) + flags &= ~CC_O; + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", + stringify(OP) "b", s0, s1, res, iflags, flags & CC_MASK); +} +#endif + +void exec_op(long s2, long s0, long s1) +{ + s2 = i2l(s2); + s0 = i2l(s0); +#if defined(__x86_64__) + exec_opq(s2, s0, s1, 0); +#endif + exec_opl(s2, s0, s1, 0); +#ifdef OP_SHIFTD + exec_opw(s2, s0, s1, 0); +#else + exec_opw(s2, s0, s1, 0); +#endif +#ifndef OP_NOBYTE + exec_opb(s0, s1, 0); +#endif +#ifdef OP_CC +#if defined(__x86_64__) + exec_opq(s2, s0, s1, CC_C); +#endif + exec_opl(s2, s0, s1, CC_C); + exec_opw(s2, s0, s1, CC_C); + exec_opb(s0, s1, CC_C); +#endif +} + +void glue(test_, OP)(void) +{ + int i, n; +#if defined(__x86_64__) + n = 64; +#else + n = 32; +#endif + for(i = 0; i < n; i++) + exec_op(0x21ad3d34, 0x12345678, i); + for(i = 0; i < n; i++) + exec_op(0x813f3421, 0x82345679, i); +} + +void *glue(_test_, OP) __init_call = glue(test_, OP); + +#undef OP +#undef OP_CC +#undef OP_SHIFTD +#undef OP_NOBYTE +#undef EXECSHIFT diff --git a/src/recompiler/tests/test-i386-ssse3.c b/src/recompiler/tests/test-i386-ssse3.c new file mode 100644 index 00000000..0a42bd03 --- /dev/null +++ b/src/recompiler/tests/test-i386-ssse3.c @@ -0,0 +1,57 @@ +/* See if various MMX/SSE SSSE3 instructions give expected results */ +#include <stdio.h> +#include <string.h> +#include <stdint.h> + +int main(int argc, char *argv[]) { + char hello[16]; + const char ehlo[8] = "EHLO "; + uint64_t mask = 0x8080800302020001; + + uint64_t a = 0x0000000000090007; + uint64_t b = 0x0000000000000000; + uint32_t c; + uint16_t d; + + const char e[16] = "LLOaaaaaaaaaaaaa"; + const char f[16] = "aaaaaaaaaaaaaaHE"; + + /* pshufb mm1/xmm1, mm2/xmm2 */ + asm volatile ("movq (%0), %%mm0" : : "r" (ehlo) : "mm0", "mm1"); + asm volatile ("movq %0, %%mm1" : : "m" (mask)); + asm volatile ("pshufb %mm1, %mm0"); + asm volatile ("movq %%mm0, %0" : "=m" (hello)); + printf("%s\n", hello); + + /* pshufb mm1/xmm1, m64/m128 */ + asm volatile ("movq (%0), %%mm0" : : "r" (ehlo) : "mm0"); + asm volatile ("pshufb %0, %%mm0" : : "m" (mask)); + asm volatile ("movq %%mm0, %0" : "=m" (hello)); + printf("%s\n", hello); + + /* psubsw mm1/xmm1, m64/m128 */ + asm volatile ("movq %0, %%mm0" : : "r" (a) : "mm0"); + asm volatile ("phsubsw %0, %%mm0" : : "m" (b)); + asm volatile ("movq %%mm0, %0" : "=m" (a)); + printf("%i - %i = %i\n", 9, 7, -(int16_t) a); + + /* palignr mm1/xmm1, m64/m128, imm8 */ + asm volatile ("movdqa (%0), %%xmm0" : : "r" (e) : "xmm0"); + asm volatile ("palignr $14, (%0), %%xmm0" : : "r" (f)); + asm volatile ("movdqa %%xmm0, (%0)" : : "r" (hello)); + printf("%5.5s\n", hello); + +#if 1 /* SSE4 */ + /* popcnt r64, r/m64 */ + asm volatile ("movq $0x8421000010009c63, %%rax" : : : "rax"); + asm volatile ("popcnt %%ax, %%dx" : : : "dx"); + asm volatile ("popcnt %%eax, %%ecx" : : : "ecx"); + asm volatile ("popcnt %rax, %rax"); + asm volatile ("movq %%rax, %0" : "=m" (a)); + asm volatile ("movl %%ecx, %0" : "=m" (c)); + asm volatile ("movw %%dx, %0" : "=m" (d)); + printf("%i = %i\n%i = %i = %i\n", 13, (int) a, 9, c, d + 1); +#endif + + return 0; +} diff --git a/src/recompiler/tests/test-i386-vm86.S b/src/recompiler/tests/test-i386-vm86.S new file mode 100644 index 00000000..3bb96c99 --- /dev/null +++ b/src/recompiler/tests/test-i386-vm86.S @@ -0,0 +1,103 @@ + .code16 + .globl vm86_code_start + .globl vm86_code_end + +#define GET_OFFSET(x) ((x) - vm86_code_start + 0x100) + +vm86_code_start: + movw $GET_OFFSET(hello_world), %dx + movb $0x09, %ah + int $0x21 + + /* prepare int 0x90 vector */ + xorw %ax, %ax + movw %ax, %es + es movw $GET_OFFSET(int90_test), 0x90 * 4 + es movw %cs, 0x90 * 4 + 2 + + /* launch int 0x90 */ + + int $0x90 + + /* test IF support */ + movw $GET_OFFSET(IF_msg), %dx + movb $0x09, %ah + int $0x21 + + pushf + popw %dx + movb $0xff, %ah + int $0x21 + + cli + pushf + popw %dx + movb $0xff, %ah + int $0x21 + + sti + pushfl + popl %edx + movb $0xff, %ah + int $0x21 + +#if 0 + movw $GET_OFFSET(IF_msg1), %dx + movb $0x09, %ah + int $0x21 + + pushf + movw %sp, %bx + andw $~0x200, (%bx) + popf +#else + cli +#endif + + pushf + popw %dx + movb $0xff, %ah + int $0x21 + + pushfl + movw %sp, %bx + orw $0x200, (%bx) + popfl + + pushfl + popl %edx + movb $0xff, %ah + int $0x21 + + movb $0x00, %ah + int $0x21 + +int90_test: + pushf + pop %dx + movb $0xff, %ah + int $0x21 + + movw %sp, %bx + movw 4(%bx), %dx + movb $0xff, %ah + int $0x21 + + movw $GET_OFFSET(int90_msg), %dx + movb $0x09, %ah + int $0x21 + iret + +int90_msg: + .string "INT90 started\n$" + +hello_world: + .string "Hello VM86 world\n$" + +IF_msg: + .string "VM86 IF test\n$" + +IF_msg1: + .string "If you see a diff here, your Linux kernel is buggy, please update to 2.4.20 kernel\n$" + +vm86_code_end: diff --git a/src/recompiler/tests/test-i386.c b/src/recompiler/tests/test-i386.c new file mode 100644 index 00000000..c8f21a95 --- /dev/null +++ b/src/recompiler/tests/test-i386.c @@ -0,0 +1,2769 @@ +/* + * x86 CPU test + * + * Copyright (c) 2003 Fabrice Bellard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle GPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the General Public License version 2 (GPLv2) at this time for any software where + * a choice of GPL license versions is made available with the language indicating + * that GPLv2 or any later version may be used, or where a choice of which version + * of the GPL is applied is otherwise unspecified. + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <math.h> +#include <signal.h> +#include <setjmp.h> +#include <errno.h> +#include <sys/ucontext.h> +#include <sys/mman.h> + +#if !defined(__x86_64__) +//#define TEST_VM86 +#define TEST_SEGS +#endif +//#define LINUX_VM86_IOPL_FIX +//#define TEST_P4_FLAGS +#ifdef __SSE__ +#define TEST_SSE +#define TEST_CMOV 1 +#define TEST_FCOMI 1 +#else +#undef TEST_SSE +#define TEST_CMOV 1 +#define TEST_FCOMI 1 +#endif + +#if defined(__x86_64__) +#define FMT64X "%016lx" +#define FMTLX "%016lx" +#define X86_64_ONLY(x) x +#else +#define FMT64X "%016" PRIx64 +#define FMTLX "%08lx" +#define X86_64_ONLY(x) +#endif + +#ifdef TEST_VM86 +#include <asm/vm86.h> +#endif + +#define xglue(x, y) x ## y +#define glue(x, y) xglue(x, y) +#define stringify(s) tostring(s) +#define tostring(s) #s + +#define CC_C 0x0001 +#define CC_P 0x0004 +#define CC_A 0x0010 +#define CC_Z 0x0040 +#define CC_S 0x0080 +#define CC_O 0x0800 + +#define __init_call __attribute__ ((unused,__section__ ("initcall"))) + +#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) + +#if defined(__x86_64__) +static inline long i2l(long v) +{ + return v | ((v ^ 0xabcd) << 32); +} +#else +static inline long i2l(long v) +{ + return v; +} +#endif + +#define OP add +#include "test-i386.h" + +#define OP sub +#include "test-i386.h" + +#define OP xor +#include "test-i386.h" + +#define OP and +#include "test-i386.h" + +#define OP or +#include "test-i386.h" + +#define OP cmp +#include "test-i386.h" + +#define OP adc +#define OP_CC +#include "test-i386.h" + +#define OP sbb +#define OP_CC +#include "test-i386.h" + +#define OP inc +#define OP_CC +#define OP1 +#include "test-i386.h" + +#define OP dec +#define OP_CC +#define OP1 +#include "test-i386.h" + +#define OP neg +#define OP_CC +#define OP1 +#include "test-i386.h" + +#define OP not +#define OP_CC +#define OP1 +#include "test-i386.h" + +#undef CC_MASK +#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O) + +#define OP shl +#include "test-i386-shift.h" + +#define OP shr +#include "test-i386-shift.h" + +#define OP sar +#include "test-i386-shift.h" + +#define OP rol +#include "test-i386-shift.h" + +#define OP ror +#include "test-i386-shift.h" + +#define OP rcr +#define OP_CC +#include "test-i386-shift.h" + +#define OP rcl +#define OP_CC +#include "test-i386-shift.h" + +#define OP shld +#define OP_SHIFTD +#define OP_NOBYTE +#include "test-i386-shift.h" + +#define OP shrd +#define OP_SHIFTD +#define OP_NOBYTE +#include "test-i386-shift.h" + +/* XXX: should be more precise ? */ +#undef CC_MASK +#define CC_MASK (CC_C) + +#define OP bt +#define OP_NOBYTE +#include "test-i386-shift.h" + +#define OP bts +#define OP_NOBYTE +#include "test-i386-shift.h" + +#define OP btr +#define OP_NOBYTE +#include "test-i386-shift.h" + +#define OP btc +#define OP_NOBYTE +#include "test-i386-shift.h" + +/* lea test (modrm support) */ +#define TEST_LEAQ(STR)\ +{\ + asm("lea " STR ", %0"\ + : "=r" (res)\ + : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ + printf("lea %s = " FMTLX "\n", STR, res);\ +} + +#define TEST_LEA(STR)\ +{\ + asm("lea " STR ", %0"\ + : "=r" (res)\ + : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ + printf("lea %s = " FMTLX "\n", STR, res);\ +} + +#define TEST_LEA16(STR)\ +{\ + asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\ + : "=wq" (res)\ + : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ + printf("lea %s = %08lx\n", STR, res);\ +} + + +void test_lea(void) +{ + long eax, ebx, ecx, edx, esi, edi, res; + eax = i2l(0x0001); + ebx = i2l(0x0002); + ecx = i2l(0x0004); + edx = i2l(0x0008); + esi = i2l(0x0010); + edi = i2l(0x0020); + + TEST_LEA("0x4000"); + + TEST_LEA("(%%eax)"); + TEST_LEA("(%%ebx)"); + TEST_LEA("(%%ecx)"); + TEST_LEA("(%%edx)"); + TEST_LEA("(%%esi)"); + TEST_LEA("(%%edi)"); + + TEST_LEA("0x40(%%eax)"); + TEST_LEA("0x40(%%ebx)"); + TEST_LEA("0x40(%%ecx)"); + TEST_LEA("0x40(%%edx)"); + TEST_LEA("0x40(%%esi)"); + TEST_LEA("0x40(%%edi)"); + + TEST_LEA("0x4000(%%eax)"); + TEST_LEA("0x4000(%%ebx)"); + TEST_LEA("0x4000(%%ecx)"); + TEST_LEA("0x4000(%%edx)"); + TEST_LEA("0x4000(%%esi)"); + TEST_LEA("0x4000(%%edi)"); + + TEST_LEA("(%%eax, %%ecx)"); + TEST_LEA("(%%ebx, %%edx)"); + TEST_LEA("(%%ecx, %%ecx)"); + TEST_LEA("(%%edx, %%ecx)"); + TEST_LEA("(%%esi, %%ecx)"); + TEST_LEA("(%%edi, %%ecx)"); + + TEST_LEA("0x40(%%eax, %%ecx)"); + TEST_LEA("0x4000(%%ebx, %%edx)"); + + TEST_LEA("(%%ecx, %%ecx, 2)"); + TEST_LEA("(%%edx, %%ecx, 4)"); + TEST_LEA("(%%esi, %%ecx, 8)"); + + TEST_LEA("(,%%eax, 2)"); + TEST_LEA("(,%%ebx, 4)"); + TEST_LEA("(,%%ecx, 8)"); + + TEST_LEA("0x40(,%%eax, 2)"); + TEST_LEA("0x40(,%%ebx, 4)"); + TEST_LEA("0x40(,%%ecx, 8)"); + + + TEST_LEA("-10(%%ecx, %%ecx, 2)"); + TEST_LEA("-10(%%edx, %%ecx, 4)"); + TEST_LEA("-10(%%esi, %%ecx, 8)"); + + TEST_LEA("0x4000(%%ecx, %%ecx, 2)"); + TEST_LEA("0x4000(%%edx, %%ecx, 4)"); + TEST_LEA("0x4000(%%esi, %%ecx, 8)"); + +#if defined(__x86_64__) + TEST_LEAQ("0x4000"); + TEST_LEAQ("0x4000(%%rip)"); + + TEST_LEAQ("(%%rax)"); + TEST_LEAQ("(%%rbx)"); + TEST_LEAQ("(%%rcx)"); + TEST_LEAQ("(%%rdx)"); + TEST_LEAQ("(%%rsi)"); + TEST_LEAQ("(%%rdi)"); + + TEST_LEAQ("0x40(%%rax)"); + TEST_LEAQ("0x40(%%rbx)"); + TEST_LEAQ("0x40(%%rcx)"); + TEST_LEAQ("0x40(%%rdx)"); + TEST_LEAQ("0x40(%%rsi)"); + TEST_LEAQ("0x40(%%rdi)"); + + TEST_LEAQ("0x4000(%%rax)"); + TEST_LEAQ("0x4000(%%rbx)"); + TEST_LEAQ("0x4000(%%rcx)"); + TEST_LEAQ("0x4000(%%rdx)"); + TEST_LEAQ("0x4000(%%rsi)"); + TEST_LEAQ("0x4000(%%rdi)"); + + TEST_LEAQ("(%%rax, %%rcx)"); + TEST_LEAQ("(%%rbx, %%rdx)"); + TEST_LEAQ("(%%rcx, %%rcx)"); + TEST_LEAQ("(%%rdx, %%rcx)"); + TEST_LEAQ("(%%rsi, %%rcx)"); + TEST_LEAQ("(%%rdi, %%rcx)"); + + TEST_LEAQ("0x40(%%rax, %%rcx)"); + TEST_LEAQ("0x4000(%%rbx, %%rdx)"); + + TEST_LEAQ("(%%rcx, %%rcx, 2)"); + TEST_LEAQ("(%%rdx, %%rcx, 4)"); + TEST_LEAQ("(%%rsi, %%rcx, 8)"); + + TEST_LEAQ("(,%%rax, 2)"); + TEST_LEAQ("(,%%rbx, 4)"); + TEST_LEAQ("(,%%rcx, 8)"); + + TEST_LEAQ("0x40(,%%rax, 2)"); + TEST_LEAQ("0x40(,%%rbx, 4)"); + TEST_LEAQ("0x40(,%%rcx, 8)"); + + + TEST_LEAQ("-10(%%rcx, %%rcx, 2)"); + TEST_LEAQ("-10(%%rdx, %%rcx, 4)"); + TEST_LEAQ("-10(%%rsi, %%rcx, 8)"); + + TEST_LEAQ("0x4000(%%rcx, %%rcx, 2)"); + TEST_LEAQ("0x4000(%%rdx, %%rcx, 4)"); + TEST_LEAQ("0x4000(%%rsi, %%rcx, 8)"); +#else + /* limited 16 bit addressing test */ + TEST_LEA16("0x4000"); + TEST_LEA16("(%%bx)"); + TEST_LEA16("(%%si)"); + TEST_LEA16("(%%di)"); + TEST_LEA16("0x40(%%bx)"); + TEST_LEA16("0x40(%%si)"); + TEST_LEA16("0x40(%%di)"); + TEST_LEA16("0x4000(%%bx)"); + TEST_LEA16("0x4000(%%si)"); + TEST_LEA16("(%%bx,%%si)"); + TEST_LEA16("(%%bx,%%di)"); + TEST_LEA16("0x40(%%bx,%%si)"); + TEST_LEA16("0x40(%%bx,%%di)"); + TEST_LEA16("0x4000(%%bx,%%si)"); + TEST_LEA16("0x4000(%%bx,%%di)"); +#endif +} + +#define TEST_JCC(JCC, v1, v2)\ +{\ + int res;\ + asm("movl $1, %0\n\t"\ + "cmpl %2, %1\n\t"\ + "j" JCC " 1f\n\t"\ + "movl $0, %0\n\t"\ + "1:\n\t"\ + : "=r" (res)\ + : "r" (v1), "r" (v2));\ + printf("%-10s %d\n", "j" JCC, res);\ +\ + asm("movl $0, %0\n\t"\ + "cmpl %2, %1\n\t"\ + "set" JCC " %b0\n\t"\ + : "=r" (res)\ + : "r" (v1), "r" (v2));\ + printf("%-10s %d\n", "set" JCC, res);\ + if (TEST_CMOV) {\ + long val = i2l(1);\ + long res = i2l(0x12345678);\ +X86_64_ONLY(\ + asm("cmpl %2, %1\n\t"\ + "cmov" JCC "q %3, %0\n\t"\ + : "=r" (res)\ + : "r" (v1), "r" (v2), "m" (val), "0" (res));\ + printf("%-10s R=" FMTLX "\n", "cmov" JCC "q", res);)\ + asm("cmpl %2, %1\n\t"\ + "cmov" JCC "l %k3, %k0\n\t"\ + : "=r" (res)\ + : "r" (v1), "r" (v2), "m" (val), "0" (res));\ + printf("%-10s R=" FMTLX "\n", "cmov" JCC "l", res);\ + asm("cmpl %2, %1\n\t"\ + "cmov" JCC "w %w3, %w0\n\t"\ + : "=r" (res)\ + : "r" (v1), "r" (v2), "r" (1), "0" (res));\ + printf("%-10s R=" FMTLX "\n", "cmov" JCC "w", res);\ + } \ +} + +/* various jump tests */ +void test_jcc(void) +{ + TEST_JCC("ne", 1, 1); + TEST_JCC("ne", 1, 0); + + TEST_JCC("e", 1, 1); + TEST_JCC("e", 1, 0); + + TEST_JCC("l", 1, 1); + TEST_JCC("l", 1, 0); + TEST_JCC("l", 1, -1); + + TEST_JCC("le", 1, 1); + TEST_JCC("le", 1, 0); + TEST_JCC("le", 1, -1); + + TEST_JCC("ge", 1, 1); + TEST_JCC("ge", 1, 0); + TEST_JCC("ge", -1, 1); + + TEST_JCC("g", 1, 1); + TEST_JCC("g", 1, 0); + TEST_JCC("g", 1, -1); + + TEST_JCC("b", 1, 1); + TEST_JCC("b", 1, 0); + TEST_JCC("b", 1, -1); + + TEST_JCC("be", 1, 1); + TEST_JCC("be", 1, 0); + TEST_JCC("be", 1, -1); + + TEST_JCC("ae", 1, 1); + TEST_JCC("ae", 1, 0); + TEST_JCC("ae", 1, -1); + + TEST_JCC("a", 1, 1); + TEST_JCC("a", 1, 0); + TEST_JCC("a", 1, -1); + + + TEST_JCC("p", 1, 1); + TEST_JCC("p", 1, 0); + + TEST_JCC("np", 1, 1); + TEST_JCC("np", 1, 0); + + TEST_JCC("o", 0x7fffffff, 0); + TEST_JCC("o", 0x7fffffff, -1); + + TEST_JCC("no", 0x7fffffff, 0); + TEST_JCC("no", 0x7fffffff, -1); + + TEST_JCC("s", 0, 1); + TEST_JCC("s", 0, -1); + TEST_JCC("s", 0, 0); + + TEST_JCC("ns", 0, 1); + TEST_JCC("ns", 0, -1); + TEST_JCC("ns", 0, 0); +} + +#define TEST_LOOP(insn) \ +{\ + for(i = 0; i < sizeof(ecx_vals) / sizeof(long); i++) {\ + ecx = ecx_vals[i];\ + for(zf = 0; zf < 2; zf++) {\ + asm("test %2, %2\n\t"\ + "movl $1, %0\n\t"\ + insn " 1f\n\t" \ + "movl $0, %0\n\t"\ + "1:\n\t"\ + : "=a" (res)\ + : "c" (ecx), "b" (!zf)); \ + printf("%-10s ECX=" FMTLX " ZF=%ld r=%d\n", insn, ecx, zf, res); \ + }\ + }\ +} + +void test_loop(void) +{ + long ecx, zf; + const long ecx_vals[] = { + 0, + 1, + 0x10000, + 0x10001, +#if defined(__x86_64__) + 0x100000000L, + 0x100000001L, +#endif + }; + int i, res; + +#if !defined(__x86_64__) + TEST_LOOP("jcxz"); + TEST_LOOP("loopw"); + TEST_LOOP("loopzw"); + TEST_LOOP("loopnzw"); +#endif + + TEST_LOOP("jecxz"); + TEST_LOOP("loopl"); + TEST_LOOP("loopzl"); + TEST_LOOP("loopnzl"); +} + +#undef CC_MASK +#ifdef TEST_P4_FLAGS +#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) +#else +#define CC_MASK (CC_O | CC_C) +#endif + +#define OP mul +#include "test-i386-muldiv.h" + +#define OP imul +#include "test-i386-muldiv.h" + +void test_imulw2(long op0, long op1) +{ + long res, s1, s0, flags; + s0 = op0; + s1 = op1; + res = s0; + flags = 0; + asm volatile ("push %4\n\t" + "popf\n\t" + "imulw %w2, %w0\n\t" + "pushf\n\t" + "pop %1\n\t" + : "=q" (res), "=g" (flags) + : "q" (s1), "0" (res), "1" (flags)); + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n", + "imulw", s0, s1, res, flags & CC_MASK); +} + +void test_imull2(long op0, long op1) +{ + long res, s1, s0, flags; + s0 = op0; + s1 = op1; + res = s0; + flags = 0; + asm volatile ("push %4\n\t" + "popf\n\t" + "imull %k2, %k0\n\t" + "pushf\n\t" + "pop %1\n\t" + : "=q" (res), "=g" (flags) + : "q" (s1), "0" (res), "1" (flags)); + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n", + "imull", s0, s1, res, flags & CC_MASK); +} + +#if defined(__x86_64__) +void test_imulq2(long op0, long op1) +{ + long res, s1, s0, flags; + s0 = op0; + s1 = op1; + res = s0; + flags = 0; + asm volatile ("push %4\n\t" + "popf\n\t" + "imulq %2, %0\n\t" + "pushf\n\t" + "pop %1\n\t" + : "=q" (res), "=g" (flags) + : "q" (s1), "0" (res), "1" (flags)); + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n", + "imulq", s0, s1, res, flags & CC_MASK); +} +#endif + +#define TEST_IMUL_IM(size, rsize, op0, op1)\ +{\ + long res, flags, s1;\ + flags = 0;\ + res = 0;\ + s1 = op1;\ + asm volatile ("push %3\n\t"\ + "popf\n\t"\ + "imul" size " $" #op0 ", %" rsize "2, %" rsize "0\n\t" \ + "pushf\n\t"\ + "pop %1\n\t"\ + : "=r" (res), "=g" (flags)\ + : "r" (s1), "1" (flags), "0" (res));\ + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",\ + "imul" size " im", (long)op0, (long)op1, res, flags & CC_MASK);\ +} + + +#undef CC_MASK +#define CC_MASK (0) + +#define OP div +#include "test-i386-muldiv.h" + +#define OP idiv +#include "test-i386-muldiv.h" + +void test_mul(void) +{ + test_imulb(0x1234561d, 4); + test_imulb(3, -4); + test_imulb(0x80, 0x80); + test_imulb(0x10, 0x10); + + test_imulw(0, 0x1234001d, 45); + test_imulw(0, 23, -45); + test_imulw(0, 0x8000, 0x8000); + test_imulw(0, 0x100, 0x100); + + test_imull(0, 0x1234001d, 45); + test_imull(0, 23, -45); + test_imull(0, 0x80000000, 0x80000000); + test_imull(0, 0x10000, 0x10000); + + test_mulb(0x1234561d, 4); + test_mulb(3, -4); + test_mulb(0x80, 0x80); + test_mulb(0x10, 0x10); + + test_mulw(0, 0x1234001d, 45); + test_mulw(0, 23, -45); + test_mulw(0, 0x8000, 0x8000); + test_mulw(0, 0x100, 0x100); + + test_mull(0, 0x1234001d, 45); + test_mull(0, 23, -45); + test_mull(0, 0x80000000, 0x80000000); + test_mull(0, 0x10000, 0x10000); + + test_imulw2(0x1234001d, 45); + test_imulw2(23, -45); + test_imulw2(0x8000, 0x8000); + test_imulw2(0x100, 0x100); + + test_imull2(0x1234001d, 45); + test_imull2(23, -45); + test_imull2(0x80000000, 0x80000000); + test_imull2(0x10000, 0x10000); + + TEST_IMUL_IM("w", "w", 45, 0x1234); + TEST_IMUL_IM("w", "w", -45, 23); + TEST_IMUL_IM("w", "w", 0x8000, 0x80000000); + TEST_IMUL_IM("w", "w", 0x7fff, 0x1000); + + TEST_IMUL_IM("l", "k", 45, 0x1234); + TEST_IMUL_IM("l", "k", -45, 23); + TEST_IMUL_IM("l", "k", 0x8000, 0x80000000); + TEST_IMUL_IM("l", "k", 0x7fff, 0x1000); + + test_idivb(0x12341678, 0x127e); + test_idivb(0x43210123, -5); + test_idivb(0x12340004, -1); + + test_idivw(0, 0x12345678, 12347); + test_idivw(0, -23223, -45); + test_idivw(0, 0x12348000, -1); + test_idivw(0x12343, 0x12345678, 0x81238567); + + test_idivl(0, 0x12345678, 12347); + test_idivl(0, -233223, -45); + test_idivl(0, 0x80000000, -1); + test_idivl(0x12343, 0x12345678, 0x81234567); + + test_divb(0x12341678, 0x127e); + test_divb(0x43210123, -5); + test_divb(0x12340004, -1); + + test_divw(0, 0x12345678, 12347); + test_divw(0, -23223, -45); + test_divw(0, 0x12348000, -1); + test_divw(0x12343, 0x12345678, 0x81238567); + + test_divl(0, 0x12345678, 12347); + test_divl(0, -233223, -45); + test_divl(0, 0x80000000, -1); + test_divl(0x12343, 0x12345678, 0x81234567); + +#if defined(__x86_64__) + test_imulq(0, 0x1234001d1234001d, 45); + test_imulq(0, 23, -45); + test_imulq(0, 0x8000000000000000, 0x8000000000000000); + test_imulq(0, 0x100000000, 0x100000000); + + test_mulq(0, 0x1234001d1234001d, 45); + test_mulq(0, 23, -45); + test_mulq(0, 0x8000000000000000, 0x8000000000000000); + test_mulq(0, 0x100000000, 0x100000000); + + test_imulq2(0x1234001d1234001d, 45); + test_imulq2(23, -45); + test_imulq2(0x8000000000000000, 0x8000000000000000); + test_imulq2(0x100000000, 0x100000000); + + TEST_IMUL_IM("q", "", 45, 0x12341234); + TEST_IMUL_IM("q", "", -45, 23); + TEST_IMUL_IM("q", "", 0x8000, 0x8000000000000000); + TEST_IMUL_IM("q", "", 0x7fff, 0x10000000); + + test_idivq(0, 0x12345678abcdef, 12347); + test_idivq(0, -233223, -45); + test_idivq(0, 0x8000000000000000, -1); + test_idivq(0x12343, 0x12345678, 0x81234567); + + test_divq(0, 0x12345678abcdef, 12347); + test_divq(0, -233223, -45); + test_divq(0, 0x8000000000000000, -1); + test_divq(0x12343, 0x12345678, 0x81234567); +#endif +} + +#define TEST_BSX(op, size, op0)\ +{\ + long res, val, resz;\ + val = op0;\ + asm("xor %1, %1\n"\ + "mov $0x12345678, %0\n"\ + #op " %" size "2, %" size "0 ; setz %b1" \ + : "=&r" (res), "=&q" (resz)\ + : "r" (val));\ + printf("%-10s A=" FMTLX " R=" FMTLX " %ld\n", #op, val, res, resz);\ +} + +void test_bsx(void) +{ + TEST_BSX(bsrw, "w", 0); + TEST_BSX(bsrw, "w", 0x12340128); + TEST_BSX(bsfw, "w", 0); + TEST_BSX(bsfw, "w", 0x12340128); + TEST_BSX(bsrl, "k", 0); + TEST_BSX(bsrl, "k", 0x00340128); + TEST_BSX(bsfl, "k", 0); + TEST_BSX(bsfl, "k", 0x00340128); +#if defined(__x86_64__) + TEST_BSX(bsrq, "", 0); + TEST_BSX(bsrq, "", 0x003401281234); + TEST_BSX(bsfq, "", 0); + TEST_BSX(bsfq, "", 0x003401281234); +#endif +} + +/**********************************************/ + +union float64u { + double d; + uint64_t l; +}; + +union float64u q_nan = { .l = 0xFFF8000000000000LL }; +union float64u s_nan = { .l = 0xFFF0000000000000LL }; + +void test_fops(double a, double b) +{ + printf("a=%f b=%f a+b=%f\n", a, b, a + b); + printf("a=%f b=%f a-b=%f\n", a, b, a - b); + printf("a=%f b=%f a*b=%f\n", a, b, a * b); + printf("a=%f b=%f a/b=%f\n", a, b, a / b); + printf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b)); + printf("a=%f sqrt(a)=%f\n", a, sqrt(a)); + printf("a=%f sin(a)=%f\n", a, sin(a)); + printf("a=%f cos(a)=%f\n", a, cos(a)); + printf("a=%f tan(a)=%f\n", a, tan(a)); + printf("a=%f log(a)=%f\n", a, log(a)); + printf("a=%f exp(a)=%f\n", a, exp(a)); + printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b)); + /* just to test some op combining */ + printf("a=%f asin(sin(a))=%f\n", a, asin(sin(a))); + printf("a=%f acos(cos(a))=%f\n", a, acos(cos(a))); + printf("a=%f atan(tan(a))=%f\n", a, atan(tan(a))); + +} + +void fpu_clear_exceptions(void) +{ + struct __attribute__((packed)) { + uint16_t fpuc; + uint16_t dummy1; + uint16_t fpus; + uint16_t dummy2; + uint16_t fptag; + uint16_t dummy3; + uint32_t ignored[4]; + long double fpregs[8]; + } float_env32; + + asm volatile ("fnstenv %0\n" : : "m" (float_env32)); + float_env32.fpus &= ~0x7f; + asm volatile ("fldenv %0\n" : : "m" (float_env32)); +} + +/* XXX: display exception bits when supported */ +#define FPUS_EMASK 0x0000 +//#define FPUS_EMASK 0x007f + +void test_fcmp(double a, double b) +{ + long eflags, fpus; + + fpu_clear_exceptions(); + asm("fcom %2\n" + "fstsw %%ax\n" + : "=a" (fpus) + : "t" (a), "u" (b)); + printf("fcom(%f %f)=%04lx \n", + a, b, fpus & (0x4500 | FPUS_EMASK)); + fpu_clear_exceptions(); + asm("fucom %2\n" + "fstsw %%ax\n" + : "=a" (fpus) + : "t" (a), "u" (b)); + printf("fucom(%f %f)=%04lx\n", + a, b, fpus & (0x4500 | FPUS_EMASK)); + if (TEST_FCOMI) { + /* test f(u)comi instruction */ + fpu_clear_exceptions(); + asm("fcomi %3, %2\n" + "fstsw %%ax\n" + "pushf\n" + "pop %0\n" + : "=r" (eflags), "=a" (fpus) + : "t" (a), "u" (b)); + printf("fcomi(%f %f)=%04lx %02lx\n", + a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C)); + fpu_clear_exceptions(); + asm("fucomi %3, %2\n" + "fstsw %%ax\n" + "pushf\n" + "pop %0\n" + : "=r" (eflags), "=a" (fpus) + : "t" (a), "u" (b)); + printf("fucomi(%f %f)=%04lx %02lx\n", + a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C)); + } + fpu_clear_exceptions(); + asm volatile("fxam\n" + "fstsw %%ax\n" + : "=a" (fpus) + : "t" (a)); + printf("fxam(%f)=%04lx\n", a, fpus & 0x4700); + fpu_clear_exceptions(); +} + +void test_fcvt(double a) +{ + float fa; + long double la; + int16_t fpuc; + int i; + int64_t lla; + int ia; + int16_t wa; + double ra; + + fa = a; + la = a; + printf("(float)%f = %f\n", a, fa); + printf("(long double)%f = %Lf\n", a, la); + printf("a=" FMT64X "\n", *(uint64_t *)&a); + printf("la=" FMT64X " %04x\n", *(uint64_t *)&la, + *(unsigned short *)((char *)(&la) + 8)); + + /* test all roundings */ + asm volatile ("fstcw %0" : "=m" (fpuc)); + for(i=0;i<4;i++) { + uint16_t val16; + val16 = (fpuc & ~0x0c00) | (i << 10); + asm volatile ("fldcw %0" : : "m" (val16)); + asm volatile ("fist %0" : "=m" (wa) : "t" (a)); + asm volatile ("fistl %0" : "=m" (ia) : "t" (a)); + asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st"); + asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a)); + asm volatile ("fldcw %0" : : "m" (fpuc)); + printf("(short)a = %d\n", wa); + printf("(int)a = %d\n", ia); + printf("(int64_t)a = " FMT64X "\n", lla); + printf("rint(a) = %f\n", ra); + } +} + +#define TEST(N) \ + asm("fld" #N : "=t" (a)); \ + printf("fld" #N "= %f\n", a); + +void test_fconst(void) +{ + double a; + TEST(1); + TEST(l2t); + TEST(l2e); + TEST(pi); + TEST(lg2); + TEST(ln2); + TEST(z); +} + +void test_fbcd(double a) +{ + unsigned short bcd[5]; + double b; + + asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st"); + asm("fbld %1" : "=t" (b) : "m" (bcd[0])); + printf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n", + a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b); +} + +#define TEST_ENV(env, save, restore)\ +{\ + memset((env), 0xaa, sizeof(*(env)));\ + for(i=0;i<5;i++)\ + asm volatile ("fldl %0" : : "m" (dtab[i]));\ + asm volatile (save " %0\n" : : "m" (*(env)));\ + asm volatile (restore " %0\n": : "m" (*(env)));\ + for(i=0;i<5;i++)\ + asm volatile ("fstpl %0" : "=m" (rtab[i]));\ + for(i=0;i<5;i++)\ + printf("res[%d]=%f\n", i, rtab[i]);\ + printf("fpuc=%04x fpus=%04x fptag=%04x\n",\ + (env)->fpuc,\ + (env)->fpus & 0xff00,\ + (env)->fptag);\ +} + +void test_fenv(void) +{ + struct __attribute__((packed)) { + uint16_t fpuc; + uint16_t dummy1; + uint16_t fpus; + uint16_t dummy2; + uint16_t fptag; + uint16_t dummy3; + uint32_t ignored[4]; + long double fpregs[8]; + } float_env32; + struct __attribute__((packed)) { + uint16_t fpuc; + uint16_t fpus; + uint16_t fptag; + uint16_t ignored[4]; + long double fpregs[8]; + } float_env16; + double dtab[8]; + double rtab[8]; + int i; + + for(i=0;i<8;i++) + dtab[i] = i + 1; + + TEST_ENV(&float_env16, "data16 fnstenv", "data16 fldenv"); + TEST_ENV(&float_env16, "data16 fnsave", "data16 frstor"); + TEST_ENV(&float_env32, "fnstenv", "fldenv"); + TEST_ENV(&float_env32, "fnsave", "frstor"); + + /* test for ffree */ + for(i=0;i<5;i++) + asm volatile ("fldl %0" : : "m" (dtab[i])); + asm volatile("ffree %st(2)"); + asm volatile ("fnstenv %0\n" : : "m" (float_env32)); + asm volatile ("fninit"); + printf("fptag=%04x\n", float_env32.fptag); +} + + +#define TEST_FCMOV(a, b, eflags, CC)\ +{\ + double res;\ + asm("push %3\n"\ + "popf\n"\ + "fcmov" CC " %2, %0\n"\ + : "=t" (res)\ + : "0" (a), "u" (b), "g" (eflags));\ + printf("fcmov%s eflags=0x%04lx-> %f\n", \ + CC, (long)eflags, res);\ +} + +void test_fcmov(void) +{ + double a, b; + long eflags, i; + + a = 1.0; + b = 2.0; + for(i = 0; i < 4; i++) { + eflags = 0; + if (i & 1) + eflags |= CC_C; + if (i & 2) + eflags |= CC_Z; + TEST_FCMOV(a, b, eflags, "b"); + TEST_FCMOV(a, b, eflags, "e"); + TEST_FCMOV(a, b, eflags, "be"); + TEST_FCMOV(a, b, eflags, "nb"); + TEST_FCMOV(a, b, eflags, "ne"); + TEST_FCMOV(a, b, eflags, "nbe"); + } + TEST_FCMOV(a, b, 0, "u"); + TEST_FCMOV(a, b, CC_P, "u"); + TEST_FCMOV(a, b, 0, "nu"); + TEST_FCMOV(a, b, CC_P, "nu"); +} + +void test_floats(void) +{ + test_fops(2, 3); + test_fops(1.4, -5); + test_fcmp(2, -1); + test_fcmp(2, 2); + test_fcmp(2, 3); + test_fcmp(2, q_nan.d); + test_fcmp(q_nan.d, -1); + test_fcmp(-1.0/0.0, -1); + test_fcmp(1.0/0.0, -1); + test_fcvt(0.5); + test_fcvt(-0.5); + test_fcvt(1.0/7.0); + test_fcvt(-1.0/9.0); + test_fcvt(32768); + test_fcvt(-1e20); + test_fcvt(-1.0/0.0); + test_fcvt(1.0/0.0); + test_fcvt(q_nan.d); + test_fconst(); + test_fbcd(1234567890123456.0); + test_fbcd(-123451234567890.0); + test_fenv(); + if (TEST_CMOV) { + test_fcmov(); + } +} + +/**********************************************/ +#if !defined(__x86_64__) + +#define TEST_BCD(op, op0, cc_in, cc_mask)\ +{\ + int res, flags;\ + res = op0;\ + flags = cc_in;\ + asm ("push %3\n\t"\ + "popf\n\t"\ + #op "\n\t"\ + "pushf\n\t"\ + "pop %1\n\t"\ + : "=a" (res), "=g" (flags)\ + : "0" (res), "1" (flags));\ + printf("%-10s A=%08x R=%08x CCIN=%04x CC=%04x\n",\ + #op, op0, res, cc_in, flags & cc_mask);\ +} + +void test_bcd(void) +{ + TEST_BCD(daa, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(daa, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + + TEST_BCD(das, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + TEST_BCD(das, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A)); + + TEST_BCD(aaa, 0x12340205, CC_A, (CC_C | CC_A)); + TEST_BCD(aaa, 0x12340306, CC_A, (CC_C | CC_A)); + TEST_BCD(aaa, 0x1234040a, CC_A, (CC_C | CC_A)); + TEST_BCD(aaa, 0x123405fa, CC_A, (CC_C | CC_A)); + TEST_BCD(aaa, 0x12340205, 0, (CC_C | CC_A)); + TEST_BCD(aaa, 0x12340306, 0, (CC_C | CC_A)); + TEST_BCD(aaa, 0x1234040a, 0, (CC_C | CC_A)); + TEST_BCD(aaa, 0x123405fa, 0, (CC_C | CC_A)); + + TEST_BCD(aas, 0x12340205, CC_A, (CC_C | CC_A)); + TEST_BCD(aas, 0x12340306, CC_A, (CC_C | CC_A)); + TEST_BCD(aas, 0x1234040a, CC_A, (CC_C | CC_A)); + TEST_BCD(aas, 0x123405fa, CC_A, (CC_C | CC_A)); + TEST_BCD(aas, 0x12340205, 0, (CC_C | CC_A)); + TEST_BCD(aas, 0x12340306, 0, (CC_C | CC_A)); + TEST_BCD(aas, 0x1234040a, 0, (CC_C | CC_A)); + TEST_BCD(aas, 0x123405fa, 0, (CC_C | CC_A)); + + TEST_BCD(aam, 0x12340547, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)); + TEST_BCD(aad, 0x12340407, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)); +} +#endif + +#define TEST_XCHG(op, size, opconst)\ +{\ + long op0, op1;\ + op0 = i2l(0x12345678);\ + op1 = i2l(0xfbca7654);\ + asm(#op " %" size "0, %" size "1" \ + : "=q" (op0), opconst (op1) \ + : "0" (op0));\ + printf("%-10s A=" FMTLX " B=" FMTLX "\n",\ + #op, op0, op1);\ +} + +#define TEST_CMPXCHG(op, size, opconst, eax)\ +{\ + long op0, op1, op2;\ + op0 = i2l(0x12345678);\ + op1 = i2l(0xfbca7654);\ + op2 = i2l(eax);\ + asm(#op " %" size "0, %" size "1" \ + : "=q" (op0), opconst (op1) \ + : "0" (op0), "a" (op2));\ + printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX "\n",\ + #op, op2, op0, op1);\ +} + +void test_xchg(void) +{ +#if defined(__x86_64__) + TEST_XCHG(xchgq, "", "+q"); +#endif + TEST_XCHG(xchgl, "k", "+q"); + TEST_XCHG(xchgw, "w", "+q"); + TEST_XCHG(xchgb, "b", "+q"); + +#if defined(__x86_64__) + TEST_XCHG(xchgq, "", "=m"); +#endif + TEST_XCHG(xchgl, "k", "+m"); + TEST_XCHG(xchgw, "w", "+m"); + TEST_XCHG(xchgb, "b", "+m"); + +#if defined(__x86_64__) + TEST_XCHG(xaddq, "", "+q"); +#endif + TEST_XCHG(xaddl, "k", "+q"); + TEST_XCHG(xaddw, "w", "+q"); + TEST_XCHG(xaddb, "b", "+q"); + + { + int res; + res = 0x12345678; + asm("xaddl %1, %0" : "=r" (res) : "0" (res)); + printf("xaddl same res=%08x\n", res); + } + +#if defined(__x86_64__) + TEST_XCHG(xaddq, "", "+m"); +#endif + TEST_XCHG(xaddl, "k", "+m"); + TEST_XCHG(xaddw, "w", "+m"); + TEST_XCHG(xaddb, "b", "+m"); + +#if defined(__x86_64__) + TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfbca7654); +#endif + TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfbca7654); + TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfbca7654); + TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfbca7654); + +#if defined(__x86_64__) + TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfffefdfc); +#endif + TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfffefdfc); + TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfffefdfc); + TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfffefdfc); + +#if defined(__x86_64__) + TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfbca7654); +#endif + TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfbca7654); + TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfbca7654); + TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfbca7654); + +#if defined(__x86_64__) + TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfffefdfc); +#endif + TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfffefdfc); + TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfffefdfc); + TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfffefdfc); + + { + uint64_t op0, op1, op2; + long eax, edx; + long i, eflags; + + for(i = 0; i < 2; i++) { + op0 = 0x123456789abcdLL; + eax = i2l(op0 & 0xffffffff); + edx = i2l(op0 >> 32); + if (i == 0) + op1 = 0xfbca765423456LL; + else + op1 = op0; + op2 = 0x6532432432434LL; + asm("cmpxchg8b %2\n" + "pushf\n" + "pop %3\n" + : "=a" (eax), "=d" (edx), "=m" (op1), "=g" (eflags) + : "0" (eax), "1" (edx), "m" (op1), "b" ((int)op2), "c" ((int)(op2 >> 32))); + printf("cmpxchg8b: eax=" FMTLX " edx=" FMTLX " op1=" FMT64X " CC=%02lx\n", + eax, edx, op1, eflags & CC_Z); + } + } +} + +#ifdef TEST_SEGS +/**********************************************/ +/* segmentation tests */ + +#include <sys/syscall.h> +#include <unistd.h> +#include <asm/ldt.h> +#include <linux/version.h> + +static inline int modify_ldt(int func, void * ptr, unsigned long bytecount) +{ + return syscall(__NR_modify_ldt, func, ptr, bytecount); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66) +#define modify_ldt_ldt_s user_desc +#endif + +#define MK_SEL(n) (((n) << 3) | 7) + +uint8_t seg_data1[4096]; +uint8_t seg_data2[4096]; + +#define TEST_LR(op, size, seg, mask)\ +{\ + int res, res2;\ + uint16_t mseg = seg;\ + res = 0x12345678;\ + asm (op " %" size "2, %" size "0\n" \ + "movl $0, %1\n"\ + "jnz 1f\n"\ + "movl $1, %1\n"\ + "1:\n"\ + : "=r" (res), "=r" (res2) : "m" (mseg), "0" (res));\ + printf(op ": Z=%d %08x\n", res2, res & ~(mask));\ +} + +#define TEST_ARPL(op, size, op1, op2)\ +{\ + long a, b, c; \ + a = (op1); \ + b = (op2); \ + asm volatile(op " %" size "3, %" size "0\n"\ + "movl $0,%1\n"\ + "jnz 1f\n"\ + "movl $1,%1\n"\ + "1:\n"\ + : "=r" (a), "=r" (c) : "0" (a), "r" (b)); \ + printf(op size " A=" FMTLX " B=" FMTLX " R=" FMTLX " z=%ld\n",\ + (long)(op1), (long)(op2), a, c);\ +} + +/* NOTE: we use Linux modify_ldt syscall */ +void test_segs(void) +{ + struct modify_ldt_ldt_s ldt; + long long ldt_table[3]; + int res, res2; + char tmp; + struct { + uint32_t offset; + uint16_t seg; + } __attribute__((packed)) segoff; + + ldt.entry_number = 1; + ldt.base_addr = (unsigned long)&seg_data1; + ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12; + ldt.seg_32bit = 1; + ldt.contents = MODIFY_LDT_CONTENTS_DATA; + ldt.read_exec_only = 0; + ldt.limit_in_pages = 1; + ldt.seg_not_present = 0; + ldt.useable = 1; + modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ + + ldt.entry_number = 2; + ldt.base_addr = (unsigned long)&seg_data2; + ldt.limit = (sizeof(seg_data2) + 0xfff) >> 12; + ldt.seg_32bit = 1; + ldt.contents = MODIFY_LDT_CONTENTS_DATA; + ldt.read_exec_only = 0; + ldt.limit_in_pages = 1; + ldt.seg_not_present = 0; + ldt.useable = 1; + modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ + + modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */ +#if 0 + { + int i; + for(i=0;i<3;i++) + printf("%d: %016Lx\n", i, ldt_table[i]); + } +#endif + /* do some tests with fs or gs */ + asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1))); + + seg_data1[1] = 0xaa; + seg_data2[1] = 0x55; + + asm volatile ("fs movzbl 0x1, %0" : "=r" (res)); + printf("FS[1] = %02x\n", res); + + asm volatile ("pushl %%gs\n" + "movl %1, %%gs\n" + "gs movzbl 0x1, %0\n" + "popl %%gs\n" + : "=r" (res) + : "r" (MK_SEL(2))); + printf("GS[1] = %02x\n", res); + + /* tests with ds/ss (implicit segment case) */ + tmp = 0xa5; + asm volatile ("pushl %%ebp\n\t" + "pushl %%ds\n\t" + "movl %2, %%ds\n\t" + "movl %3, %%ebp\n\t" + "movzbl 0x1, %0\n\t" + "movzbl (%%ebp), %1\n\t" + "popl %%ds\n\t" + "popl %%ebp\n\t" + : "=r" (res), "=r" (res2) + : "r" (MK_SEL(1)), "r" (&tmp)); + printf("DS[1] = %02x\n", res); + printf("SS[tmp] = %02x\n", res2); + + segoff.seg = MK_SEL(2); + segoff.offset = 0xabcdef12; + asm volatile("lfs %2, %0\n\t" + "movl %%fs, %1\n\t" + : "=r" (res), "=g" (res2) + : "m" (segoff)); + printf("FS:reg = %04x:%08x\n", res2, res); + + TEST_LR("larw", "w", MK_SEL(2), 0x0100); + TEST_LR("larl", "", MK_SEL(2), 0x0100); + TEST_LR("lslw", "w", MK_SEL(2), 0); + TEST_LR("lsll", "", MK_SEL(2), 0); + + TEST_LR("larw", "w", 0xfff8, 0); + TEST_LR("larl", "", 0xfff8, 0); + TEST_LR("lslw", "w", 0xfff8, 0); + TEST_LR("lsll", "", 0xfff8, 0); + + TEST_ARPL("arpl", "w", 0x12345678 | 3, 0x762123c | 1); + TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 3); + TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 1); +} + +/* 16 bit code test */ +extern char code16_start, code16_end; +extern char code16_func1; +extern char code16_func2; +extern char code16_func3; + +void test_code16(void) +{ + struct modify_ldt_ldt_s ldt; + int res, res2; + + /* build a code segment */ + ldt.entry_number = 1; + ldt.base_addr = (unsigned long)&code16_start; + ldt.limit = &code16_end - &code16_start; + ldt.seg_32bit = 0; + ldt.contents = MODIFY_LDT_CONTENTS_CODE; + ldt.read_exec_only = 0; + ldt.limit_in_pages = 0; + ldt.seg_not_present = 0; + ldt.useable = 1; + modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ + + /* call the first function */ + asm volatile ("lcall %1, %2" + : "=a" (res) + : "i" (MK_SEL(1)), "i" (&code16_func1): "memory", "cc"); + printf("func1() = 0x%08x\n", res); + asm volatile ("lcall %2, %3" + : "=a" (res), "=c" (res2) + : "i" (MK_SEL(1)), "i" (&code16_func2): "memory", "cc"); + printf("func2() = 0x%08x spdec=%d\n", res, res2); + asm volatile ("lcall %1, %2" + : "=a" (res) + : "i" (MK_SEL(1)), "i" (&code16_func3): "memory", "cc"); + printf("func3() = 0x%08x\n", res); +} +#endif + +#if defined(__x86_64__) +asm(".globl func_lret\n" + "func_lret:\n" + "movl $0x87654641, %eax\n" + "lretq\n"); +#else +asm(".globl func_lret\n" + "func_lret:\n" + "movl $0x87654321, %eax\n" + "lret\n" + + ".globl func_iret\n" + "func_iret:\n" + "movl $0xabcd4321, %eax\n" + "iret\n"); +#endif + +extern char func_lret; +extern char func_iret; + +void test_misc(void) +{ + char table[256]; + long res, i; + + for(i=0;i<256;i++) table[i] = 256 - i; + res = 0x12345678; + asm ("xlat" : "=a" (res) : "b" (table), "0" (res)); + printf("xlat: EAX=" FMTLX "\n", res); + +#if defined(__x86_64__) +#if 0 + { + /* XXX: see if Intel Core2 and AMD64 behavior really + differ. Here we implemented the Intel way which is not + compatible yet with QEMU. */ + static struct __attribute__((packed)) { + uint64_t offset; + uint16_t seg; + } desc; + long cs_sel; + + asm volatile ("mov %%cs, %0" : "=r" (cs_sel)); + + asm volatile ("push %1\n" + "call func_lret\n" + : "=a" (res) + : "r" (cs_sel) : "memory", "cc"); + printf("func_lret=" FMTLX "\n", res); + + desc.offset = (long)&func_lret; + desc.seg = cs_sel; + + asm volatile ("xor %%rax, %%rax\n" + "rex64 lcall *(%%rcx)\n" + : "=a" (res) + : "c" (&desc) + : "memory", "cc"); + printf("func_lret2=" FMTLX "\n", res); + + asm volatile ("push %2\n" + "mov $ 1f, %%rax\n" + "push %%rax\n" + "rex64 ljmp *(%%rcx)\n" + "1:\n" + : "=a" (res) + : "c" (&desc), "b" (cs_sel) + : "memory", "cc"); + printf("func_lret3=" FMTLX "\n", res); + } +#endif +#else + asm volatile ("push %%cs ; call %1" + : "=a" (res) + : "m" (func_lret): "memory", "cc"); + printf("func_lret=" FMTLX "\n", res); + + asm volatile ("pushf ; push %%cs ; call %1" + : "=a" (res) + : "m" (func_iret): "memory", "cc"); + printf("func_iret=" FMTLX "\n", res); +#endif + +#if defined(__x86_64__) + /* specific popl test */ + asm volatile ("push $12345432 ; push $0x9abcdef ; pop (%%rsp) ; pop %0" + : "=g" (res)); + printf("popl esp=" FMTLX "\n", res); +#else + /* specific popl test */ + asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popl (%%esp) ; popl %0" + : "=g" (res)); + printf("popl esp=" FMTLX "\n", res); + + /* specific popw test */ + asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popw (%%esp) ; addl $2, %%esp ; popl %0" + : "=g" (res)); + printf("popw esp=" FMTLX "\n", res); +#endif +} + +uint8_t str_buffer[4096]; + +#define TEST_STRING1(OP, size, DF, REP)\ +{\ + long esi, edi, eax, ecx, eflags;\ +\ + esi = (long)(str_buffer + sizeof(str_buffer) / 2);\ + edi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\ + eax = i2l(0x12345678);\ + ecx = 17;\ +\ + asm volatile ("push $0\n\t"\ + "popf\n\t"\ + DF "\n\t"\ + REP #OP size "\n\t"\ + "cld\n\t"\ + "pushf\n\t"\ + "pop %4\n\t"\ + : "=S" (esi), "=D" (edi), "=a" (eax), "=c" (ecx), "=g" (eflags)\ + : "0" (esi), "1" (edi), "2" (eax), "3" (ecx));\ + printf("%-10s ESI=" FMTLX " EDI=" FMTLX " EAX=" FMTLX " ECX=" FMTLX " EFL=%04x\n",\ + REP #OP size, esi, edi, eax, ecx,\ + (int)(eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)));\ +} + +#define TEST_STRING(OP, REP)\ + TEST_STRING1(OP, "b", "", REP);\ + TEST_STRING1(OP, "w", "", REP);\ + TEST_STRING1(OP, "l", "", REP);\ + X86_64_ONLY(TEST_STRING1(OP, "q", "", REP));\ + TEST_STRING1(OP, "b", "std", REP);\ + TEST_STRING1(OP, "w", "std", REP);\ + TEST_STRING1(OP, "l", "std", REP);\ + X86_64_ONLY(TEST_STRING1(OP, "q", "std", REP)) + +void test_string(void) +{ + int i; + for(i = 0;i < sizeof(str_buffer); i++) + str_buffer[i] = i + 0x56; + TEST_STRING(stos, ""); + TEST_STRING(stos, "rep "); + TEST_STRING(lods, ""); /* to verify stos */ + TEST_STRING(lods, "rep "); + TEST_STRING(movs, ""); + TEST_STRING(movs, "rep "); + TEST_STRING(lods, ""); /* to verify stos */ + + /* XXX: better tests */ + TEST_STRING(scas, ""); + TEST_STRING(scas, "repz "); + TEST_STRING(scas, "repnz "); + TEST_STRING(cmps, ""); + TEST_STRING(cmps, "repz "); + TEST_STRING(cmps, "repnz "); +} + +#ifdef TEST_VM86 +/* VM86 test */ + +static inline void set_bit(uint8_t *a, unsigned int bit) +{ + a[bit / 8] |= (1 << (bit % 8)); +} + +static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg) +{ + return (uint8_t *)((seg << 4) + (reg & 0xffff)); +} + +static inline void pushw(struct vm86_regs *r, int val) +{ + r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff); + *(uint16_t *)seg_to_linear(r->ss, r->esp) = val; +} + +static inline int vm86(int func, struct vm86plus_struct *v86) +{ + return syscall(__NR_vm86, func, v86); +} + +extern char vm86_code_start; +extern char vm86_code_end; + +#define VM86_CODE_CS 0x100 +#define VM86_CODE_IP 0x100 + +void test_vm86(void) +{ + struct vm86plus_struct ctx; + struct vm86_regs *r; + uint8_t *vm86_mem; + int seg, ret; + + vm86_mem = mmap((void *)0x00000000, 0x110000, + PROT_WRITE | PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + if (vm86_mem == MAP_FAILED) { + printf("ERROR: could not map vm86 memory"); + return; + } + memset(&ctx, 0, sizeof(ctx)); + + /* init basic registers */ + r = &ctx.regs; + r->eip = VM86_CODE_IP; + r->esp = 0xfffe; + seg = VM86_CODE_CS; + r->cs = seg; + r->ss = seg; + r->ds = seg; + r->es = seg; + r->fs = seg; + r->gs = seg; + r->eflags = VIF_MASK; + + /* move code to proper address. We use the same layout as a .com + dos program. */ + memcpy(vm86_mem + (VM86_CODE_CS << 4) + VM86_CODE_IP, + &vm86_code_start, &vm86_code_end - &vm86_code_start); + + /* mark int 0x21 as being emulated */ + set_bit((uint8_t *)&ctx.int_revectored, 0x21); + + for(;;) { + ret = vm86(VM86_ENTER, &ctx); + switch(VM86_TYPE(ret)) { + case VM86_INTx: + { + int int_num, ah, v; + + int_num = VM86_ARG(ret); + if (int_num != 0x21) + goto unknown_int; + ah = (r->eax >> 8) & 0xff; + switch(ah) { + case 0x00: /* exit */ + goto the_end; + case 0x02: /* write char */ + { + uint8_t c = r->edx; + putchar(c); + } + break; + case 0x09: /* write string */ + { + uint8_t c, *ptr; + ptr = seg_to_linear(r->ds, r->edx); + for(;;) { + c = *ptr++; + if (c == '$') + break; + putchar(c); + } + r->eax = (r->eax & ~0xff) | '$'; + } + break; + case 0xff: /* extension: write eflags number in edx */ + v = (int)r->edx; +#ifndef LINUX_VM86_IOPL_FIX + v &= ~0x3000; +#endif + printf("%08x\n", v); + break; + default: + unknown_int: + printf("unsupported int 0x%02x\n", int_num); + goto the_end; + } + } + break; + case VM86_SIGNAL: + /* a signal came, we just ignore that */ + break; + case VM86_STI: + break; + default: + printf("ERROR: unhandled vm86 return code (0x%x)\n", ret); + goto the_end; + } + } + the_end: + printf("VM86 end\n"); + munmap(vm86_mem, 0x110000); +} +#endif + +/* exception tests */ +#if defined(__i386__) && !defined(REG_EAX) +#define REG_EAX EAX +#define REG_EBX EBX +#define REG_ECX ECX +#define REG_EDX EDX +#define REG_ESI ESI +#define REG_EDI EDI +#define REG_EBP EBP +#define REG_ESP ESP +#define REG_EIP EIP +#define REG_EFL EFL +#define REG_TRAPNO TRAPNO +#define REG_ERR ERR +#endif + +#if defined(__x86_64__) +#define REG_EIP REG_RIP +#endif + +jmp_buf jmp_env; +int v1; +int tab[2]; + +void sig_handler(int sig, siginfo_t *info, void *puc) +{ + struct ucontext *uc = puc; + + printf("si_signo=%d si_errno=%d si_code=%d", + info->si_signo, info->si_errno, info->si_code); + printf(" si_addr=0x%08lx", + (unsigned long)info->si_addr); + printf("\n"); + + printf("trapno=" FMTLX " err=" FMTLX, + (long)uc->uc_mcontext.gregs[REG_TRAPNO], + (long)uc->uc_mcontext.gregs[REG_ERR]); + printf(" EIP=" FMTLX, (long)uc->uc_mcontext.gregs[REG_EIP]); + printf("\n"); + longjmp(jmp_env, 1); +} + +void test_exceptions(void) +{ + struct sigaction act; + volatile int val; + + act.sa_sigaction = sig_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO | SA_NODEFER; + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGTRAP, &act, NULL); + + /* test division by zero reporting */ + printf("DIVZ exception:\n"); + if (setjmp(jmp_env) == 0) { + /* now divide by zero */ + v1 = 0; + v1 = 2 / v1; + } + +#if !defined(__x86_64__) + printf("BOUND exception:\n"); + if (setjmp(jmp_env) == 0) { + /* bound exception */ + tab[0] = 1; + tab[1] = 10; + asm volatile ("bound %0, %1" : : "r" (11), "m" (tab[0])); + } +#endif + +#ifdef TEST_SEGS + printf("segment exceptions:\n"); + if (setjmp(jmp_env) == 0) { + /* load an invalid segment */ + asm volatile ("movl %0, %%fs" : : "r" ((0x1234 << 3) | 1)); + } + if (setjmp(jmp_env) == 0) { + /* null data segment is valid */ + asm volatile ("movl %0, %%fs" : : "r" (3)); + /* null stack segment */ + asm volatile ("movl %0, %%ss" : : "r" (3)); + } + + { + struct modify_ldt_ldt_s ldt; + ldt.entry_number = 1; + ldt.base_addr = (unsigned long)&seg_data1; + ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12; + ldt.seg_32bit = 1; + ldt.contents = MODIFY_LDT_CONTENTS_DATA; + ldt.read_exec_only = 0; + ldt.limit_in_pages = 1; + ldt.seg_not_present = 1; + ldt.useable = 1; + modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */ + + if (setjmp(jmp_env) == 0) { + /* segment not present */ + asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1))); + } + } +#endif + + /* test SEGV reporting */ + printf("PF exception:\n"); + if (setjmp(jmp_env) == 0) { + val = 1; + /* we add a nop to test a weird PC retrieval case */ + asm volatile ("nop"); + /* now store in an invalid address */ + *(char *)0x1234 = 1; + } + + /* test SEGV reporting */ + printf("PF exception:\n"); + if (setjmp(jmp_env) == 0) { + val = 1; + /* read from an invalid address */ + v1 = *(char *)0x1234; + } + + /* test illegal instruction reporting */ + printf("UD2 exception:\n"); + if (setjmp(jmp_env) == 0) { + /* now execute an invalid instruction */ + asm volatile("ud2"); + } + printf("lock nop exception:\n"); + if (setjmp(jmp_env) == 0) { + /* now execute an invalid instruction */ + asm volatile("lock nop"); + } + + printf("INT exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("int $0xfd"); + } + if (setjmp(jmp_env) == 0) { + asm volatile ("int $0x01"); + } + if (setjmp(jmp_env) == 0) { + asm volatile (".byte 0xcd, 0x03"); + } + if (setjmp(jmp_env) == 0) { + asm volatile ("int $0x04"); + } + if (setjmp(jmp_env) == 0) { + asm volatile ("int $0x05"); + } + + printf("INT3 exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("int3"); + } + + printf("CLI exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("cli"); + } + + printf("STI exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("cli"); + } + +#if !defined(__x86_64__) + printf("INTO exception:\n"); + if (setjmp(jmp_env) == 0) { + /* overflow exception */ + asm volatile ("addl $1, %0 ; into" : : "r" (0x7fffffff)); + } +#endif + + printf("OUTB exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("outb %%al, %%dx" : : "d" (0x4321), "a" (0)); + } + + printf("INB exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("inb %%dx, %%al" : "=a" (val) : "d" (0x4321)); + } + + printf("REP OUTSB exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("rep outsb" : : "d" (0x4321), "S" (tab), "c" (1)); + } + + printf("REP INSB exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("rep insb" : : "d" (0x4321), "D" (tab), "c" (1)); + } + + printf("HLT exception:\n"); + if (setjmp(jmp_env) == 0) { + asm volatile ("hlt"); + } + + printf("single step exception:\n"); + val = 0; + if (setjmp(jmp_env) == 0) { + asm volatile ("pushf\n" + "orl $0x00100, (%%esp)\n" + "popf\n" + "movl $0xabcd, %0\n" + "movl $0x0, %0\n" : "=m" (val) : : "cc", "memory"); + } + printf("val=0x%x\n", val); +} + +#if !defined(__x86_64__) +/* specific precise single step test */ +void sig_trap_handler(int sig, siginfo_t *info, void *puc) +{ + struct ucontext *uc = puc; + printf("EIP=" FMTLX "\n", (long)uc->uc_mcontext.gregs[REG_EIP]); +} + +const uint8_t sstep_buf1[4] = { 1, 2, 3, 4}; +uint8_t sstep_buf2[4]; + +void test_single_step(void) +{ + struct sigaction act; + volatile int val; + int i; + + val = 0; + act.sa_sigaction = sig_trap_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + sigaction(SIGTRAP, &act, NULL); + asm volatile ("pushf\n" + "orl $0x00100, (%%esp)\n" + "popf\n" + "movl $0xabcd, %0\n" + + /* jmp test */ + "movl $3, %%ecx\n" + "1:\n" + "addl $1, %0\n" + "decl %%ecx\n" + "jnz 1b\n" + + /* movsb: the single step should stop at each movsb iteration */ + "movl $sstep_buf1, %%esi\n" + "movl $sstep_buf2, %%edi\n" + "movl $0, %%ecx\n" + "rep movsb\n" + "movl $3, %%ecx\n" + "rep movsb\n" + "movl $1, %%ecx\n" + "rep movsb\n" + + /* cmpsb: the single step should stop at each cmpsb iteration */ + "movl $sstep_buf1, %%esi\n" + "movl $sstep_buf2, %%edi\n" + "movl $0, %%ecx\n" + "rep cmpsb\n" + "movl $4, %%ecx\n" + "rep cmpsb\n" + + /* getpid() syscall: single step should skip one + instruction */ + "movl $20, %%eax\n" + "int $0x80\n" + "movl $0, %%eax\n" + + /* when modifying SS, trace is not done on the next + instruction */ + "movl %%ss, %%ecx\n" + "movl %%ecx, %%ss\n" + "addl $1, %0\n" + "movl $1, %%eax\n" + "movl %%ecx, %%ss\n" + "jmp 1f\n" + "addl $1, %0\n" + "1:\n" + "movl $1, %%eax\n" + "pushl %%ecx\n" + "popl %%ss\n" + "addl $1, %0\n" + "movl $1, %%eax\n" + + "pushf\n" + "andl $~0x00100, (%%esp)\n" + "popf\n" + : "=m" (val) + : + : "cc", "memory", "eax", "ecx", "esi", "edi"); + printf("val=%d\n", val); + for(i = 0; i < 4; i++) + printf("sstep_buf2[%d] = %d\n", i, sstep_buf2[i]); +} + +/* self modifying code test */ +uint8_t code[] = { + 0xb8, 0x1, 0x00, 0x00, 0x00, /* movl $1, %eax */ + 0xc3, /* ret */ +}; + +asm(".section \".data\"\n" + "smc_code2:\n" + "movl 4(%esp), %eax\n" + "movl %eax, smc_patch_addr2 + 1\n" + "nop\n" + "nop\n" + "nop\n" + "nop\n" + "nop\n" + "nop\n" + "nop\n" + "nop\n" + "smc_patch_addr2:\n" + "movl $1, %eax\n" + "ret\n" + ".previous\n" + ); + +typedef int FuncType(void); +extern int smc_code2(int); +void test_self_modifying_code(void) +{ + int i; + printf("self modifying code:\n"); + printf("func1 = 0x%x\n", ((FuncType *)code)()); + for(i = 2; i <= 4; i++) { + code[1] = i; + printf("func%d = 0x%x\n", i, ((FuncType *)code)()); + } + + /* more difficult test : the modified code is just after the + modifying instruction. It is forbidden in Intel specs, but it + is used by old DOS programs */ + for(i = 2; i <= 4; i++) { + printf("smc_code2(%d) = %d\n", i, smc_code2(i)); + } +} +#endif + +long enter_stack[4096]; + +#if defined(__x86_64__) +#define RSP "%%rsp" +#define RBP "%%rbp" +#else +#define RSP "%%esp" +#define RBP "%%ebp" +#endif + +#define TEST_ENTER(size, stack_type, level)\ +{\ + long esp_save, esp_val, ebp_val, ebp_save, i;\ + stack_type *ptr, *stack_end, *stack_ptr;\ + memset(enter_stack, 0, sizeof(enter_stack));\ + stack_end = stack_ptr = (stack_type *)(enter_stack + 4096);\ + ebp_val = (long)stack_ptr;\ + for(i=1;i<=32;i++)\ + *--stack_ptr = i;\ + esp_val = (long)stack_ptr;\ + asm("mov " RSP ", %[esp_save]\n"\ + "mov " RBP ", %[ebp_save]\n"\ + "mov %[esp_val], " RSP "\n"\ + "mov %[ebp_val], " RBP "\n"\ + "enter" size " $8, $" #level "\n"\ + "mov " RSP ", %[esp_val]\n"\ + "mov " RBP ", %[ebp_val]\n"\ + "mov %[esp_save], " RSP "\n"\ + "mov %[ebp_save], " RBP "\n"\ + : [esp_save] "=r" (esp_save),\ + [ebp_save] "=r" (ebp_save),\ + [esp_val] "=r" (esp_val),\ + [ebp_val] "=r" (ebp_val)\ + : "[esp_val]" (esp_val),\ + "[ebp_val]" (ebp_val));\ + printf("level=%d:\n", level);\ + printf("esp_val=" FMTLX "\n", esp_val - (long)stack_end);\ + printf("ebp_val=" FMTLX "\n", ebp_val - (long)stack_end);\ + for(ptr = (stack_type *)esp_val; ptr < stack_end; ptr++)\ + printf(FMTLX "\n", (long)ptr[0]);\ +} + +static void test_enter(void) +{ +#if defined(__x86_64__) + TEST_ENTER("q", uint64_t, 0); + TEST_ENTER("q", uint64_t, 1); + TEST_ENTER("q", uint64_t, 2); + TEST_ENTER("q", uint64_t, 31); +#else + TEST_ENTER("l", uint32_t, 0); + TEST_ENTER("l", uint32_t, 1); + TEST_ENTER("l", uint32_t, 2); + TEST_ENTER("l", uint32_t, 31); +#endif + + TEST_ENTER("w", uint16_t, 0); + TEST_ENTER("w", uint16_t, 1); + TEST_ENTER("w", uint16_t, 2); + TEST_ENTER("w", uint16_t, 31); +} + +#ifdef TEST_SSE + +typedef int __m64 __attribute__ ((__mode__ (__V2SI__))); +typedef float __m128 __attribute__ ((__mode__(__V4SF__))); + +typedef union { + double d[2]; + float s[4]; + uint32_t l[4]; + uint64_t q[2]; + __m128 dq; +} XMMReg; + +static uint64_t __attribute__((aligned(16))) test_values[4][2] = { + { 0x456723c698694873, 0xdc515cff944a58ec }, + { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 }, + { 0x007c62c2085427f8, 0x231be9e8cde7438d }, + { 0x0f76255a085427f8, 0xc233e9e8c4c9439a }, +}; + +#define SSE_OP(op)\ +{\ + asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.q[1], a.q[0],\ + b.q[1], b.q[0],\ + r.q[1], r.q[0]);\ +} + +#define SSE_OP2(op)\ +{\ + int i;\ + for(i=0;i<2;i++) {\ + a.q[0] = test_values[2*i][0];\ + a.q[1] = test_values[2*i][1];\ + b.q[0] = test_values[2*i+1][0];\ + b.q[1] = test_values[2*i+1][1];\ + SSE_OP(op);\ + }\ +} + +#define MMX_OP2(op)\ +{\ + int i;\ + for(i=0;i<2;i++) {\ + a.q[0] = test_values[2*i][0];\ + b.q[0] = test_values[2*i+1][0];\ + asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\ + printf("%-9s: a=" FMT64X " b=" FMT64X " r=" FMT64X "\n",\ + #op,\ + a.q[0],\ + b.q[0],\ + r.q[0]);\ + }\ + SSE_OP2(op);\ +} + +#define SHUF_OP(op, ib)\ +{\ + a.q[0] = test_values[0][0];\ + a.q[1] = test_values[0][1];\ + b.q[0] = test_values[1][0];\ + b.q[1] = test_values[1][1];\ + asm volatile (#op " $" #ib ", %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.q[1], a.q[0],\ + b.q[1], b.q[0],\ + ib,\ + r.q[1], r.q[0]);\ +} + +#define PSHUF_OP(op, ib)\ +{\ + int i;\ + for(i=0;i<2;i++) {\ + a.q[0] = test_values[2*i][0];\ + a.q[1] = test_values[2*i][1];\ + asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.q[1], a.q[0],\ + ib,\ + r.q[1], r.q[0]);\ + }\ +} + +#define SHIFT_IM(op, ib)\ +{\ + int i;\ + for(i=0;i<2;i++) {\ + a.q[0] = test_values[2*i][0];\ + a.q[1] = test_values[2*i][1];\ + asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.q[1], a.q[0],\ + ib,\ + r.q[1], r.q[0]);\ + }\ +} + +#define SHIFT_OP(op, ib)\ +{\ + int i;\ + SHIFT_IM(op, ib);\ + for(i=0;i<2;i++) {\ + a.q[0] = test_values[2*i][0];\ + a.q[1] = test_values[2*i][1];\ + b.q[0] = ib;\ + b.q[1] = 0;\ + asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.q[1], a.q[0],\ + b.q[1], b.q[0],\ + r.q[1], r.q[0]);\ + }\ +} + +#define MOVMSK(op)\ +{\ + int i, reg;\ + for(i=0;i<2;i++) {\ + a.q[0] = test_values[2*i][0];\ + a.q[1] = test_values[2*i][1];\ + asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\ + #op,\ + a.q[1], a.q[0],\ + reg);\ + }\ +} + +#define SSE_OPS(a) \ +SSE_OP(a ## ps);\ +SSE_OP(a ## ss); + +#define SSE_OPD(a) \ +SSE_OP(a ## pd);\ +SSE_OP(a ## sd); + +#define SSE_COMI(op, field)\ +{\ + unsigned int eflags;\ + XMMReg a, b;\ + a.field[0] = a1;\ + b.field[0] = b1;\ + asm volatile (#op " %2, %1\n"\ + "pushf\n"\ + "pop %0\n"\ + : "=m" (eflags)\ + : "x" (a.dq), "x" (b.dq));\ + printf("%-9s: a=%f b=%f cc=%04x\n",\ + #op, a1, b1,\ + eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\ +} + +void test_sse_comi(double a1, double b1) +{ + SSE_COMI(ucomiss, s); + SSE_COMI(ucomisd, d); + SSE_COMI(comiss, s); + SSE_COMI(comisd, d); +} + +#define CVT_OP_XMM(op)\ +{\ + asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.q[1], a.q[0],\ + r.q[1], r.q[0]);\ +} + +/* Force %xmm0 usage to avoid the case where both register index are 0 + to test intruction decoding more extensively */ +#define CVT_OP_XMM2MMX(op)\ +{\ + asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq) \ + : "%xmm0"); \ + asm volatile("emms\n"); \ + printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n",\ + #op,\ + a.q[1], a.q[0],\ + r.q[0]);\ +} + +#define CVT_OP_MMX2XMM(op)\ +{\ + asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\ + asm volatile("emms\n"); \ + printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.q[0],\ + r.q[1], r.q[0]);\ +} + +#define CVT_OP_REG2XMM(op)\ +{\ + asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\ + printf("%-9s: a=%08x r=" FMT64X "" FMT64X "\n",\ + #op,\ + a.l[0],\ + r.q[1], r.q[0]);\ +} + +#define CVT_OP_XMM2REG(op)\ +{\ + asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\ + printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\ + #op,\ + a.q[1], a.q[0],\ + r.l[0]);\ +} + +struct fpxstate { + uint16_t fpuc; + uint16_t fpus; + uint16_t fptag; + uint16_t fop; + uint32_t fpuip; + uint16_t cs_sel; + uint16_t dummy0; + uint32_t fpudp; + uint16_t ds_sel; + uint16_t dummy1; + uint32_t mxcsr; + uint32_t mxcsr_mask; + uint8_t fpregs1[8 * 16]; + uint8_t xmm_regs[8 * 16]; + uint8_t dummy2[224]; +}; + +static struct fpxstate fpx_state __attribute__((aligned(16))); +static struct fpxstate fpx_state2 __attribute__((aligned(16))); + +void test_fxsave(void) +{ + struct fpxstate *fp = &fpx_state; + struct fpxstate *fp2 = &fpx_state2; + int i, nb_xmm; + XMMReg a, b; + a.q[0] = test_values[0][0]; + a.q[1] = test_values[0][1]; + b.q[0] = test_values[1][0]; + b.q[1] = test_values[1][1]; + + asm("movdqa %2, %%xmm0\n" + "movdqa %3, %%xmm7\n" +#if defined(__x86_64__) + "movdqa %2, %%xmm15\n" +#endif + " fld1\n" + " fldpi\n" + " fldln2\n" + " fxsave %0\n" + " fxrstor %0\n" + " fxsave %1\n" + " fninit\n" + : "=m" (*(uint32_t *)fp2), "=m" (*(uint32_t *)fp) + : "m" (a), "m" (b)); + printf("fpuc=%04x\n", fp->fpuc); + printf("fpus=%04x\n", fp->fpus); + printf("fptag=%04x\n", fp->fptag); + for(i = 0; i < 3; i++) { + printf("ST%d: " FMT64X " %04x\n", + i, + *(uint64_t *)&fp->fpregs1[i * 16], + *(uint16_t *)&fp->fpregs1[i * 16 + 8]); + } + printf("mxcsr=%08x\n", fp->mxcsr & 0x1f80); +#if defined(__x86_64__) + nb_xmm = 16; +#else + nb_xmm = 8; +#endif + for(i = 0; i < nb_xmm; i++) { + printf("xmm%d: " FMT64X "" FMT64X "\n", + i, + *(uint64_t *)&fp->xmm_regs[i * 16], + *(uint64_t *)&fp->xmm_regs[i * 16 + 8]); + } +} + +void test_sse(void) +{ + XMMReg r, a, b; + int i; + + MMX_OP2(punpcklbw); + MMX_OP2(punpcklwd); + MMX_OP2(punpckldq); + MMX_OP2(packsswb); + MMX_OP2(pcmpgtb); + MMX_OP2(pcmpgtw); + MMX_OP2(pcmpgtd); + MMX_OP2(packuswb); + MMX_OP2(punpckhbw); + MMX_OP2(punpckhwd); + MMX_OP2(punpckhdq); + MMX_OP2(packssdw); + MMX_OP2(pcmpeqb); + MMX_OP2(pcmpeqw); + MMX_OP2(pcmpeqd); + + MMX_OP2(paddq); + MMX_OP2(pmullw); + MMX_OP2(psubusb); + MMX_OP2(psubusw); + MMX_OP2(pminub); + MMX_OP2(pand); + MMX_OP2(paddusb); + MMX_OP2(paddusw); + MMX_OP2(pmaxub); + MMX_OP2(pandn); + + MMX_OP2(pmulhuw); + MMX_OP2(pmulhw); + + MMX_OP2(psubsb); + MMX_OP2(psubsw); + MMX_OP2(pminsw); + MMX_OP2(por); + MMX_OP2(paddsb); + MMX_OP2(paddsw); + MMX_OP2(pmaxsw); + MMX_OP2(pxor); + MMX_OP2(pmuludq); + MMX_OP2(pmaddwd); + MMX_OP2(psadbw); + MMX_OP2(psubb); + MMX_OP2(psubw); + MMX_OP2(psubd); + MMX_OP2(psubq); + MMX_OP2(paddb); + MMX_OP2(paddw); + MMX_OP2(paddd); + + MMX_OP2(pavgb); + MMX_OP2(pavgw); + + asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678)); + printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]); + + asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678)); + printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]); + + a.q[0] = test_values[0][0]; + a.q[1] = test_values[0][1]; + asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); + printf("%-9s: r=%08x\n", "pextrw", r.l[0]); + + asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); + printf("%-9s: r=%08x\n", "pextrw", r.l[0]); + + asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0])); + printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); + + asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq)); + printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]); + + { + r.q[0] = -1; + r.q[1] = -1; + + a.q[0] = test_values[0][0]; + a.q[1] = test_values[0][1]; + b.q[0] = test_values[1][0]; + b.q[1] = test_values[1][1]; + asm volatile("maskmovq %1, %0" : + : "y" (a.q[0]), "y" (b.q[0]), "D" (&r) + : "memory"); + printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n", + "maskmov", + r.q[0], + a.q[0], + b.q[0]); + asm volatile("maskmovdqu %1, %0" : + : "x" (a.dq), "x" (b.dq), "D" (&r) + : "memory"); + printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n", + "maskmov", + r.q[1], r.q[0], + a.q[1], a.q[0], + b.q[1], b.q[0]); + } + + asm volatile ("emms"); + + SSE_OP2(punpcklqdq); + SSE_OP2(punpckhqdq); + SSE_OP2(andps); + SSE_OP2(andpd); + SSE_OP2(andnps); + SSE_OP2(andnpd); + SSE_OP2(orps); + SSE_OP2(orpd); + SSE_OP2(xorps); + SSE_OP2(xorpd); + + SSE_OP2(unpcklps); + SSE_OP2(unpcklpd); + SSE_OP2(unpckhps); + SSE_OP2(unpckhpd); + + SHUF_OP(shufps, 0x78); + SHUF_OP(shufpd, 0x02); + + PSHUF_OP(pshufd, 0x78); + PSHUF_OP(pshuflw, 0x78); + PSHUF_OP(pshufhw, 0x78); + + SHIFT_OP(psrlw, 7); + SHIFT_OP(psrlw, 16); + SHIFT_OP(psraw, 7); + SHIFT_OP(psraw, 16); + SHIFT_OP(psllw, 7); + SHIFT_OP(psllw, 16); + + SHIFT_OP(psrld, 7); + SHIFT_OP(psrld, 32); + SHIFT_OP(psrad, 7); + SHIFT_OP(psrad, 32); + SHIFT_OP(pslld, 7); + SHIFT_OP(pslld, 32); + + SHIFT_OP(psrlq, 7); + SHIFT_OP(psrlq, 32); + SHIFT_OP(psllq, 7); + SHIFT_OP(psllq, 32); + + SHIFT_IM(psrldq, 16); + SHIFT_IM(psrldq, 7); + SHIFT_IM(pslldq, 16); + SHIFT_IM(pslldq, 7); + + MOVMSK(movmskps); + MOVMSK(movmskpd); + + /* FPU specific ops */ + + { + uint32_t mxcsr; + asm volatile("stmxcsr %0" : "=m" (mxcsr)); + printf("mxcsr=%08x\n", mxcsr & 0x1f80); + asm volatile("ldmxcsr %0" : : "m" (mxcsr)); + } + + test_sse_comi(2, -1); + test_sse_comi(2, 2); + test_sse_comi(2, 3); + test_sse_comi(2, q_nan.d); + test_sse_comi(q_nan.d, -1); + + for(i = 0; i < 2; i++) { + a.s[0] = 2.7; + a.s[1] = 3.4; + a.s[2] = 4; + a.s[3] = -6.3; + b.s[0] = 45.7; + b.s[1] = 353.4; + b.s[2] = 4; + b.s[3] = 56.3; + if (i == 1) { + a.s[0] = q_nan.d; + b.s[3] = q_nan.d; + } + + SSE_OPS(add); + SSE_OPS(mul); + SSE_OPS(sub); + SSE_OPS(min); + SSE_OPS(div); + SSE_OPS(max); + SSE_OPS(sqrt); + SSE_OPS(cmpeq); + SSE_OPS(cmplt); + SSE_OPS(cmple); + SSE_OPS(cmpunord); + SSE_OPS(cmpneq); + SSE_OPS(cmpnlt); + SSE_OPS(cmpnle); + SSE_OPS(cmpord); + + + a.d[0] = 2.7; + a.d[1] = -3.4; + b.d[0] = 45.7; + b.d[1] = -53.4; + if (i == 1) { + a.d[0] = q_nan.d; + b.d[1] = q_nan.d; + } + SSE_OPD(add); + SSE_OPD(mul); + SSE_OPD(sub); + SSE_OPD(min); + SSE_OPD(div); + SSE_OPD(max); + SSE_OPD(sqrt); + SSE_OPD(cmpeq); + SSE_OPD(cmplt); + SSE_OPD(cmple); + SSE_OPD(cmpunord); + SSE_OPD(cmpneq); + SSE_OPD(cmpnlt); + SSE_OPD(cmpnle); + SSE_OPD(cmpord); + } + + /* float to float/int */ + a.s[0] = 2.7; + a.s[1] = 3.4; + a.s[2] = 4; + a.s[3] = -6.3; + CVT_OP_XMM(cvtps2pd); + CVT_OP_XMM(cvtss2sd); + CVT_OP_XMM2MMX(cvtps2pi); + CVT_OP_XMM2MMX(cvttps2pi); + CVT_OP_XMM2REG(cvtss2si); + CVT_OP_XMM2REG(cvttss2si); + CVT_OP_XMM(cvtps2dq); + CVT_OP_XMM(cvttps2dq); + + a.d[0] = 2.6; + a.d[1] = -3.4; + CVT_OP_XMM(cvtpd2ps); + CVT_OP_XMM(cvtsd2ss); + CVT_OP_XMM2MMX(cvtpd2pi); + CVT_OP_XMM2MMX(cvttpd2pi); + CVT_OP_XMM2REG(cvtsd2si); + CVT_OP_XMM2REG(cvttsd2si); + CVT_OP_XMM(cvtpd2dq); + CVT_OP_XMM(cvttpd2dq); + + /* sse/mmx moves */ + CVT_OP_XMM2MMX(movdq2q); + CVT_OP_MMX2XMM(movq2dq); + + /* int to float */ + a.l[0] = -6; + a.l[1] = 2; + a.l[2] = 100; + a.l[3] = -60000; + CVT_OP_MMX2XMM(cvtpi2ps); + CVT_OP_MMX2XMM(cvtpi2pd); + CVT_OP_REG2XMM(cvtsi2ss); + CVT_OP_REG2XMM(cvtsi2sd); + CVT_OP_XMM(cvtdq2ps); + CVT_OP_XMM(cvtdq2pd); + + /* XXX: test PNI insns */ +#if 0 + SSE_OP2(movshdup); +#endif + asm volatile ("emms"); +} + +#endif + +#define TEST_CONV_RAX(op)\ +{\ + unsigned long a, r;\ + a = i2l(0x8234a6f8);\ + r = a;\ + asm volatile(#op : "=a" (r) : "0" (r));\ + printf("%-10s A=" FMTLX " R=" FMTLX "\n", #op, a, r);\ +} + +#define TEST_CONV_RAX_RDX(op)\ +{\ + unsigned long a, d, r, rh; \ + a = i2l(0x8234a6f8);\ + d = i2l(0x8345a1f2);\ + r = a;\ + rh = d;\ + asm volatile(#op : "=a" (r), "=d" (rh) : "0" (r), "1" (rh)); \ + printf("%-10s A=" FMTLX " R=" FMTLX ":" FMTLX "\n", #op, a, r, rh); \ +} + +void test_conv(void) +{ + TEST_CONV_RAX(cbw); + TEST_CONV_RAX(cwde); +#if defined(__x86_64__) + TEST_CONV_RAX(cdqe); +#endif + + TEST_CONV_RAX_RDX(cwd); + TEST_CONV_RAX_RDX(cdq); +#if defined(__x86_64__) + TEST_CONV_RAX_RDX(cqo); +#endif + + { + unsigned long a, r; + a = i2l(0x12345678); + asm volatile("bswapl %k0" : "=r" (r) : "0" (a)); + printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapl", a, r); + } +#if defined(__x86_64__) + { + unsigned long a, r; + a = i2l(0x12345678); + asm volatile("bswapq %0" : "=r" (r) : "0" (a)); + printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapq", a, r); + } +#endif +} + +extern void *__start_initcall; +extern void *__stop_initcall; + + +int main(int argc, char **argv) +{ + void **ptr; + void (*func)(void); + + ptr = &__start_initcall; + while (ptr != &__stop_initcall) { + func = *ptr++; + func(); + } + test_bsx(); + test_mul(); + test_jcc(); + test_loop(); + test_floats(); +#if !defined(__x86_64__) + test_bcd(); +#endif + test_xchg(); + test_string(); + test_misc(); + test_lea(); +#ifdef TEST_SEGS + test_segs(); + test_code16(); +#endif +#ifdef TEST_VM86 + test_vm86(); +#endif +#if !defined(__x86_64__) + test_exceptions(); + test_self_modifying_code(); + test_single_step(); +#endif + test_enter(); + test_conv(); +#ifdef TEST_SSE + test_sse(); + test_fxsave(); +#endif + return 0; +} diff --git a/src/recompiler/tests/test-i386.h b/src/recompiler/tests/test-i386.h new file mode 100644 index 00000000..75106b8c --- /dev/null +++ b/src/recompiler/tests/test-i386.h @@ -0,0 +1,152 @@ + +#define exec_op glue(exec_, OP) +#define exec_opq glue(glue(exec_, OP), q) +#define exec_opl glue(glue(exec_, OP), l) +#define exec_opw glue(glue(exec_, OP), w) +#define exec_opb glue(glue(exec_, OP), b) + +#define EXECOP2(size, rsize, res, s1, flags) \ + asm ("push %4\n\t"\ + "popf\n\t"\ + stringify(OP) size " %" rsize "2, %" rsize "0\n\t" \ + "pushf\n\t"\ + "pop %1\n\t"\ + : "=q" (res), "=g" (flags)\ + : "q" (s1), "0" (res), "1" (flags)); \ + printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", \ + stringify(OP) size, s0, s1, res, iflags, flags & CC_MASK); + +#define EXECOP1(size, rsize, res, flags) \ + asm ("push %3\n\t"\ + "popf\n\t"\ + stringify(OP) size " %" rsize "0\n\t" \ + "pushf\n\t"\ + "pop %1\n\t"\ + : "=q" (res), "=g" (flags)\ + : "0" (res), "1" (flags)); \ + printf("%-10s A=" FMTLX " R=" FMTLX " CCIN=%04lx CC=%04lx\n", \ + stringify(OP) size, s0, res, iflags, flags & CC_MASK); + +#ifdef OP1 +#if defined(__x86_64__) +void exec_opq(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP1("q", "", res, flags); +} +#endif + +void exec_opl(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP1("l", "k", res, flags); +} + +void exec_opw(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP1("w", "w", res, flags); +} + +void exec_opb(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP1("b", "b", res, flags); +} +#else +#if defined(__x86_64__) +void exec_opq(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP2("q", "", res, s1, flags); +} +#endif + +void exec_opl(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP2("l", "k", res, s1, flags); +} + +void exec_opw(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP2("w", "w", res, s1, flags); +} + +void exec_opb(long s0, long s1, long iflags) +{ + long res, flags; + res = s0; + flags = iflags; + EXECOP2("b", "b", res, s1, flags); +} +#endif + +void exec_op(long s0, long s1) +{ + s0 = i2l(s0); + s1 = i2l(s1); +#if defined(__x86_64__) + exec_opq(s0, s1, 0); +#endif + exec_opl(s0, s1, 0); + exec_opw(s0, s1, 0); + exec_opb(s0, s1, 0); +#ifdef OP_CC +#if defined(__x86_64__) + exec_opq(s0, s1, CC_C); +#endif + exec_opl(s0, s1, CC_C); + exec_opw(s0, s1, CC_C); + exec_opb(s0, s1, CC_C); +#endif +} + +void glue(test_, OP)(void) +{ + exec_op(0x12345678, 0x812FADA); + exec_op(0x12341, 0x12341); + exec_op(0x12341, -0x12341); + exec_op(0xffffffff, 0); + exec_op(0xffffffff, -1); + exec_op(0xffffffff, 1); + exec_op(0xffffffff, 2); + exec_op(0x7fffffff, 0); + exec_op(0x7fffffff, 1); + exec_op(0x7fffffff, -1); + exec_op(0x80000000, -1); + exec_op(0x80000000, 1); + exec_op(0x80000000, -2); + exec_op(0x12347fff, 0); + exec_op(0x12347fff, 1); + exec_op(0x12347fff, -1); + exec_op(0x12348000, -1); + exec_op(0x12348000, 1); + exec_op(0x12348000, -2); + exec_op(0x12347f7f, 0); + exec_op(0x12347f7f, 1); + exec_op(0x12347f7f, -1); + exec_op(0x12348080, -1); + exec_op(0x12348080, 1); + exec_op(0x12348080, -2); +} + +void *glue(_test_, OP) __init_call = glue(test_, OP); + +#undef OP +#undef OP_CC diff --git a/src/recompiler/tests/test-mmap.c b/src/recompiler/tests/test-mmap.c new file mode 100644 index 00000000..1d444b63 --- /dev/null +++ b/src/recompiler/tests/test-mmap.c @@ -0,0 +1,485 @@ +/* + * Small test program to verify simulated mmap behaviour. + * + * When running qemu-linux-user with the -p flag, you may need to tell + * this test program about the pagesize because getpagesize() will not reflect + * the -p choice. Simply pass one argument beeing the pagesize. + * + * Copyright (c) 2007 AXIS Communications AB + * Written by Edgar E. Iglesias. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle GPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the General Public License version 2 (GPLv2) at this time for any software where + * a choice of GPL license versions is made available with the language indicating + * that GPLv2 or any later version may be used, or where a choice of which version + * of the GPL is applied is otherwise unspecified. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +#include <sys/mman.h> + +#define D(x) + +#define fail_unless(x) \ +do \ +{ \ + if (!(x)) { \ + fprintf (stderr, "FAILED at %s:%d\n", __FILE__, __LINE__); \ + exit (EXIT_FAILURE); \ + } \ +} while (0); + +unsigned char *dummybuf; +static unsigned int pagesize; +static unsigned int pagemask; +int test_fd; +size_t test_fsize; + +void check_aligned_anonymous_unfixed_mmaps(void) +{ + void *p1; + void *p2; + void *p3; + void *p4; + void *p5; + uintptr_t p; + int i; + + fprintf (stderr, "%s", __func__); + for (i = 0; i < 0x1fff; i++) + { + size_t len; + + len = pagesize + (pagesize * i & 7); + p1 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + p2 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + p3 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + p4 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + p5 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + /* Make sure we get pages aligned with the pagesize. The + target expects this. */ + fail_unless (p1 != MAP_FAILED); + fail_unless (p2 != MAP_FAILED); + fail_unless (p3 != MAP_FAILED); + fail_unless (p4 != MAP_FAILED); + fail_unless (p5 != MAP_FAILED); + p = (uintptr_t) p1; + D(printf ("p=%x\n", p)); + fail_unless ((p & pagemask) == 0); + p = (uintptr_t) p2; + fail_unless ((p & pagemask) == 0); + p = (uintptr_t) p3; + fail_unless ((p & pagemask) == 0); + p = (uintptr_t) p4; + fail_unless ((p & pagemask) == 0); + p = (uintptr_t) p5; + fail_unless ((p & pagemask) == 0); + + /* Make sure we can read from the entire area. */ + memcpy (dummybuf, p1, pagesize); + memcpy (dummybuf, p2, pagesize); + memcpy (dummybuf, p3, pagesize); + memcpy (dummybuf, p4, pagesize); + memcpy (dummybuf, p5, pagesize); + + munmap (p1, len); + munmap (p2, len); + munmap (p3, len); + munmap (p4, len); + munmap (p5, len); + } + fprintf (stderr, " passed\n"); +} + +void check_large_anonymous_unfixed_mmap(void) +{ + void *p1; + uintptr_t p; + size_t len; + + fprintf (stderr, "%s", __func__); + + len = 0x02000000; + p1 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + /* Make sure we get pages aligned with the pagesize. The + target expects this. */ + fail_unless (p1 != MAP_FAILED); + p = (uintptr_t) p1; + fail_unless ((p & pagemask) == 0); + + /* Make sure we can read from the entire area. */ + memcpy (dummybuf, p1, pagesize); + munmap (p1, len); + fprintf (stderr, " passed\n"); +} + +void check_aligned_anonymous_unfixed_colliding_mmaps(void) +{ + char *p1; + char *p2; + char *p3; + uintptr_t p; + int i; + + fprintf (stderr, "%s", __func__); + for (i = 0; i < 0x2fff; i++) + { + int nlen; + p1 = mmap(NULL, pagesize, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + fail_unless (p1 != MAP_FAILED); + p = (uintptr_t) p1; + fail_unless ((p & pagemask) == 0); + memcpy (dummybuf, p1, pagesize); + + p2 = mmap(NULL, pagesize, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + fail_unless (p2 != MAP_FAILED); + p = (uintptr_t) p2; + fail_unless ((p & pagemask) == 0); + memcpy (dummybuf, p2, pagesize); + + + munmap (p1, pagesize); + nlen = pagesize * 8; + p3 = mmap(NULL, nlen, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + /* Check if the mmaped areas collide. */ + if (p3 < p2 + && (p3 + nlen) > p2) + fail_unless (0); + + memcpy (dummybuf, p3, pagesize); + + /* Make sure we get pages aligned with the pagesize. The + target expects this. */ + fail_unless (p3 != MAP_FAILED); + p = (uintptr_t) p3; + fail_unless ((p & pagemask) == 0); + munmap (p2, pagesize); + munmap (p3, nlen); + } + fprintf (stderr, " passed\n"); +} + +void check_aligned_anonymous_fixed_mmaps(void) +{ + char *addr; + void *p1; + uintptr_t p; + int i; + + /* Find a suitable address to start with. */ + addr = mmap(NULL, pagesize * 40, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + fprintf (stderr, "%s addr=%p", __func__, addr); + fail_unless (addr != MAP_FAILED); + + for (i = 0; i < 40; i++) + { + /* Create submaps within our unfixed map. */ + p1 = mmap(addr, pagesize, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + -1, 0); + /* Make sure we get pages aligned with the pagesize. + The target expects this. */ + p = (uintptr_t) p1; + fail_unless (p1 == addr); + fail_unless ((p & pagemask) == 0); + memcpy (dummybuf, p1, pagesize); + munmap (p1, pagesize); + addr += pagesize; + } + fprintf (stderr, " passed\n"); +} + +void check_aligned_anonymous_fixed_mmaps_collide_with_host(void) +{ + char *addr; + void *p1; + uintptr_t p; + int i; + + /* Find a suitable address to start with. Right were the x86 hosts + stack is. */ + addr = ((void *)0x80000000); + fprintf (stderr, "%s addr=%p", __func__, addr); + fprintf (stderr, "FIXME: QEMU fails to track pages used by the host."); + + for (i = 0; i < 20; i++) + { + /* Create submaps within our unfixed map. */ + p1 = mmap(addr, pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + -1, 0); + /* Make sure we get pages aligned with the pagesize. + The target expects this. */ + p = (uintptr_t) p1; + fail_unless (p1 == addr); + fail_unless ((p & pagemask) == 0); + memcpy (p1, dummybuf, pagesize); + munmap (p1, pagesize); + addr += pagesize; + } + fprintf (stderr, " passed\n"); +} + +void check_file_unfixed_mmaps(void) +{ + unsigned int *p1, *p2, *p3; + uintptr_t p; + int i; + + fprintf (stderr, "%s", __func__); + for (i = 0; i < 0x10; i++) + { + size_t len; + + len = pagesize; + p1 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE, + test_fd, 0); + p2 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE, + test_fd, pagesize); + p3 = mmap(NULL, len, PROT_READ, + MAP_PRIVATE, + test_fd, pagesize * 2); + + fail_unless (p1 != MAP_FAILED); + fail_unless (p2 != MAP_FAILED); + fail_unless (p3 != MAP_FAILED); + + /* Make sure we get pages aligned with the pagesize. The + target expects this. */ + p = (uintptr_t) p1; + fail_unless ((p & pagemask) == 0); + p = (uintptr_t) p2; + fail_unless ((p & pagemask) == 0); + p = (uintptr_t) p3; + fail_unless ((p & pagemask) == 0); + + /* Verify that the file maps was made correctly. */ + D(printf ("p1=%d p2=%d p3=%d\n", *p1, *p2, *p3)); + fail_unless (*p1 == 0); + fail_unless (*p2 == (pagesize / sizeof *p2)); + fail_unless (*p3 == ((pagesize * 2) / sizeof *p3)); + + memcpy (dummybuf, p1, pagesize); + memcpy (dummybuf, p2, pagesize); + memcpy (dummybuf, p3, pagesize); + munmap (p1, len); + munmap (p2, len); + munmap (p3, len); + } + fprintf (stderr, " passed\n"); +} + +void check_file_unfixed_eof_mmaps(void) +{ + char *cp; + unsigned int *p1; + uintptr_t p; + int i; + + fprintf (stderr, "%s", __func__); + for (i = 0; i < 0x10; i++) + { + p1 = mmap(NULL, pagesize, PROT_READ, + MAP_PRIVATE, + test_fd, + (test_fsize - sizeof *p1) & ~pagemask); + + fail_unless (p1 != MAP_FAILED); + + /* Make sure we get pages aligned with the pagesize. The + target expects this. */ + p = (uintptr_t) p1; + fail_unless ((p & pagemask) == 0); + /* Verify that the file maps was made correctly. */ + fail_unless (p1[(test_fsize & pagemask) / sizeof *p1 - 1] + == ((test_fsize - sizeof *p1) / sizeof *p1)); + + /* Verify that the end of page is accessable and zeroed. */ + cp = (void *) p1; + fail_unless (cp[pagesize - 4] == 0); + munmap (p1, pagesize); + } + fprintf (stderr, " passed\n"); +} + +void check_file_fixed_eof_mmaps(void) +{ + char *addr; + char *cp; + unsigned int *p1; + uintptr_t p; + int i; + + /* Find a suitable address to start with. */ + addr = mmap(NULL, pagesize * 44, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + + fprintf (stderr, "%s addr=%p", __func__, (void *)addr); + fail_unless (addr != MAP_FAILED); + + for (i = 0; i < 0x10; i++) + { + /* Create submaps within our unfixed map. */ + p1 = mmap(addr, pagesize, PROT_READ, + MAP_PRIVATE | MAP_FIXED, + test_fd, + (test_fsize - sizeof *p1) & ~pagemask); + + fail_unless (p1 != MAP_FAILED); + + /* Make sure we get pages aligned with the pagesize. The + target expects this. */ + p = (uintptr_t) p1; + fail_unless ((p & pagemask) == 0); + + /* Verify that the file maps was made correctly. */ + fail_unless (p1[(test_fsize & pagemask) / sizeof *p1 - 1] + == ((test_fsize - sizeof *p1) / sizeof *p1)); + + /* Verify that the end of page is accessable and zeroed. */ + cp = (void *)p1; + fail_unless (cp[pagesize - 4] == 0); + munmap (p1, pagesize); + addr += pagesize; + } + fprintf (stderr, " passed\n"); +} + +void check_file_fixed_mmaps(void) +{ + unsigned char *addr; + unsigned int *p1, *p2, *p3, *p4; + int i; + + /* Find a suitable address to start with. */ + addr = mmap(NULL, pagesize * 40 * 4, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + fprintf (stderr, "%s addr=%p", __func__, (void *)addr); + fail_unless (addr != MAP_FAILED); + + for (i = 0; i < 40; i++) + { + p1 = mmap(addr, pagesize, PROT_READ, + MAP_PRIVATE | MAP_FIXED, + test_fd, 0); + p2 = mmap(addr + pagesize, pagesize, PROT_READ, + MAP_PRIVATE | MAP_FIXED, + test_fd, pagesize); + p3 = mmap(addr + pagesize * 2, pagesize, PROT_READ, + MAP_PRIVATE | MAP_FIXED, + test_fd, pagesize * 2); + p4 = mmap(addr + pagesize * 3, pagesize, PROT_READ, + MAP_PRIVATE | MAP_FIXED, + test_fd, pagesize * 3); + + /* Make sure we get pages aligned with the pagesize. + The target expects this. */ + fail_unless (p1 == (void *)addr); + fail_unless (p2 == (void *)(addr + pagesize)); + fail_unless (p3 == (void *)(addr + pagesize * 2)); + fail_unless (p4 == (void *)(addr + pagesize * 3)); + + /* Verify that the file maps was made correctly. */ + fail_unless (*p1 == 0); + fail_unless (*p2 == (pagesize / sizeof *p2)); + fail_unless (*p3 == ((pagesize * 2) / sizeof *p3)); + fail_unless (*p4 == ((pagesize * 3) / sizeof *p4)); + + memcpy (dummybuf, p1, pagesize); + memcpy (dummybuf, p2, pagesize); + memcpy (dummybuf, p3, pagesize); + memcpy (dummybuf, p4, pagesize); + + munmap (p1, pagesize); + munmap (p2, pagesize); + munmap (p3, pagesize); + munmap (p4, pagesize); + addr += pagesize * 4; + } + fprintf (stderr, " passed\n"); +} + +int main(int argc, char **argv) +{ + char tempname[] = "/tmp/.cmmapXXXXXX"; + unsigned int i; + + /* Trust the first argument, otherwise probe the system for our + pagesize. */ + if (argc > 1) + pagesize = strtoul(argv[1], NULL, 0); + else + pagesize = sysconf(_SC_PAGESIZE); + + /* Assume pagesize is a power of two. */ + pagemask = pagesize - 1; + dummybuf = malloc (pagesize); + printf ("pagesize=%u pagemask=%x\n", pagesize, pagemask); + + test_fd = mkstemp(tempname); + unlink(tempname); + + /* Fill the file with int's counting from zero and up. */ + for (i = 0; i < (pagesize * 4) / sizeof i; i++) + write (test_fd, &i, sizeof i); + /* Append a few extra writes to make the file end at non + page boundary. */ + write (test_fd, &i, sizeof i); i++; + write (test_fd, &i, sizeof i); i++; + write (test_fd, &i, sizeof i); i++; + + test_fsize = lseek(test_fd, 0, SEEK_CUR); + + /* Run the tests. */ + check_aligned_anonymous_unfixed_mmaps(); + check_aligned_anonymous_unfixed_colliding_mmaps(); + check_aligned_anonymous_fixed_mmaps(); + check_file_unfixed_mmaps(); + check_file_fixed_mmaps(); + check_file_fixed_eof_mmaps(); + check_file_unfixed_eof_mmaps(); + + /* Fails at the moment. */ + /* check_aligned_anonymous_fixed_mmaps_collide_with_host(); */ + + return EXIT_SUCCESS; +} diff --git a/src/recompiler/tests/test_path.c b/src/recompiler/tests/test_path.c new file mode 100644 index 00000000..def7441c --- /dev/null +++ b/src/recompiler/tests/test_path.c @@ -0,0 +1,151 @@ +/* Test path override code */ +#define _GNU_SOURCE +#include "../path.c" +#include <stdarg.h> +#include <sys/stat.h> +#include <fcntl.h> + +/* Any log message kills the test. */ +void gemu_log(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "FATAL: "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +#define NO_CHANGE(_path) \ + do { \ + if (strcmp(path(_path), _path) != 0) return __LINE__; \ + } while(0) + +#define CHANGE_TO(_path, _newpath) \ + do { \ + if (strcmp(path(_path), _newpath) != 0) return __LINE__; \ + } while(0) + +static void cleanup(void) +{ + unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE"); + unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE2"); + unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE3"); + unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE4"); + unlink("/tmp/qemu-test_path/DIR1/DIR2/FILE5"); + rmdir("/tmp/qemu-test_path/DIR1/DIR2"); + rmdir("/tmp/qemu-test_path/DIR1/DIR3"); + rmdir("/tmp/qemu-test_path/DIR1"); + rmdir("/tmp/qemu-test_path"); +} + +static unsigned int do_test(void) +{ + if (mkdir("/tmp/qemu-test_path", 0700) != 0) + return __LINE__; + + if (mkdir("/tmp/qemu-test_path/DIR1", 0700) != 0) + return __LINE__; + + if (mkdir("/tmp/qemu-test_path/DIR1/DIR2", 0700) != 0) + return __LINE__; + + if (mkdir("/tmp/qemu-test_path/DIR1/DIR3", 0700) != 0) + return __LINE__; + + if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE", 0600)) != 0) + return __LINE__; + + if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE2", 0600)) != 0) + return __LINE__; + + if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE3", 0600)) != 0) + return __LINE__; + + if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE4", 0600)) != 0) + return __LINE__; + + if (close(creat("/tmp/qemu-test_path/DIR1/DIR2/FILE5", 0600)) != 0) + return __LINE__; + + init_paths("/tmp/qemu-test_path"); + + NO_CHANGE("/tmp"); + NO_CHANGE("/tmp/"); + NO_CHANGE("/tmp/qemu-test_path"); + NO_CHANGE("/tmp/qemu-test_path/"); + NO_CHANGE("/tmp/qemu-test_path/D"); + NO_CHANGE("/tmp/qemu-test_path/DI"); + NO_CHANGE("/tmp/qemu-test_path/DIR"); + NO_CHANGE("/tmp/qemu-test_path/DIR1"); + NO_CHANGE("/tmp/qemu-test_path/DIR1/"); + + NO_CHANGE("/D"); + NO_CHANGE("/DI"); + NO_CHANGE("/DIR"); + NO_CHANGE("/DIR2"); + NO_CHANGE("/DIR1."); + + CHANGE_TO("/DIR1", "/tmp/qemu-test_path/DIR1"); + CHANGE_TO("/DIR1/", "/tmp/qemu-test_path/DIR1"); + + NO_CHANGE("/DIR1/D"); + NO_CHANGE("/DIR1/DI"); + NO_CHANGE("/DIR1/DIR"); + NO_CHANGE("/DIR1/DIR1"); + + CHANGE_TO("/DIR1/DIR2", "/tmp/qemu-test_path/DIR1/DIR2"); + CHANGE_TO("/DIR1/DIR2/", "/tmp/qemu-test_path/DIR1/DIR2"); + + CHANGE_TO("/DIR1/DIR3", "/tmp/qemu-test_path/DIR1/DIR3"); + CHANGE_TO("/DIR1/DIR3/", "/tmp/qemu-test_path/DIR1/DIR3"); + + NO_CHANGE("/DIR1/DIR2/F"); + NO_CHANGE("/DIR1/DIR2/FI"); + NO_CHANGE("/DIR1/DIR2/FIL"); + NO_CHANGE("/DIR1/DIR2/FIL."); + + CHANGE_TO("/DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/DIR1/DIR2/FILE2", "/tmp/qemu-test_path/DIR1/DIR2/FILE2"); + CHANGE_TO("/DIR1/DIR2/FILE3", "/tmp/qemu-test_path/DIR1/DIR2/FILE3"); + CHANGE_TO("/DIR1/DIR2/FILE4", "/tmp/qemu-test_path/DIR1/DIR2/FILE4"); + CHANGE_TO("/DIR1/DIR2/FILE5", "/tmp/qemu-test_path/DIR1/DIR2/FILE5"); + + NO_CHANGE("/DIR1/DIR2/FILE6"); + NO_CHANGE("/DIR1/DIR2/FILE/X"); + + CHANGE_TO("/DIR1/../DIR1", "/tmp/qemu-test_path/DIR1"); + CHANGE_TO("/DIR1/../DIR1/", "/tmp/qemu-test_path/DIR1"); + CHANGE_TO("/../DIR1", "/tmp/qemu-test_path/DIR1"); + CHANGE_TO("/../DIR1/", "/tmp/qemu-test_path/DIR1"); + CHANGE_TO("/DIR1/DIR2/../DIR2", "/tmp/qemu-test_path/DIR1/DIR2"); + CHANGE_TO("/DIR1/DIR2/../DIR2/../../DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/DIR1/DIR2/../DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + + NO_CHANGE("/DIR1/DIR2/../DIR1"); + NO_CHANGE("/DIR1/DIR2/../FILE"); + + CHANGE_TO("/./DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/././DIR1/DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/DIR1/./DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/DIR1/././DIR2/FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/DIR1/DIR2/./FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/DIR1/DIR2/././FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + CHANGE_TO("/./DIR1/./DIR2/./FILE", "/tmp/qemu-test_path/DIR1/DIR2/FILE"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret; + + ret = do_test(); + cleanup(); + if (ret) { + fprintf(stderr, "test_path: failed on line %i\n", ret); + return 1; + } + return 0; +} diff --git a/src/recompiler/tests/testthread.c b/src/recompiler/tests/testthread.c new file mode 100644 index 00000000..27e4825b --- /dev/null +++ b/src/recompiler/tests/testthread.c @@ -0,0 +1,51 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <inttypes.h> +#include <pthread.h> +#include <sys/wait.h> +#include <sched.h> + +void *thread1_func(void *arg) +{ + int i; + char buf[512]; + + for(i=0;i<10;i++) { + snprintf(buf, sizeof(buf), "thread1: %d %s\n", i, (char *)arg); + write(1, buf, strlen(buf)); + usleep(100 * 1000); + } + return NULL; +} + +void *thread2_func(void *arg) +{ + int i; + char buf[512]; + for(i=0;i<20;i++) { + snprintf(buf, sizeof(buf), "thread2: %d %s\n", i, (char *)arg); + write(1, buf, strlen(buf)); + usleep(150 * 1000); + } + return NULL; +} + +void test_pthread(void) +{ + pthread_t tid1, tid2; + + pthread_create(&tid1, NULL, thread1_func, "hello1"); + pthread_create(&tid2, NULL, thread2_func, "hello2"); + pthread_join(tid1, NULL); + pthread_join(tid2, NULL); + printf("End of pthread test.\n"); +} + +int main(int argc, char **argv) +{ + test_pthread(); + return 0; +} diff --git a/src/recompiler/translate-all.c b/src/recompiler/translate-all.c new file mode 100644 index 00000000..b6882559 --- /dev/null +++ b/src/recompiler/translate-all.c @@ -0,0 +1,186 @@ +/* + * Host code generation + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice + * other than GPL or LGPL is available it will apply instead, Oracle elects to use only + * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where + * a choice of LGPL license versions is made available with the language indicating + * that LGPLv2 or any later version may be used, or where a choice of which version + * of the LGPL is applied is otherwise unspecified. + */ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + +#include "config.h" + +#define NO_CPU_IO_DEFS +#include "cpu.h" +#include "exec-all.h" +#include "disas.h" +#include "tcg.h" +#include "qemu-timer.h" + +/* code generation context */ +TCGContext tcg_ctx; + +uint16_t gen_opc_buf[OPC_BUF_SIZE]; +TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; + +target_ulong gen_opc_pc[OPC_BUF_SIZE]; +uint16_t gen_opc_icount[OPC_BUF_SIZE]; +uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; + +void cpu_gen_init(void) +{ + tcg_context_init(&tcg_ctx); + tcg_set_frame(&tcg_ctx, TCG_AREG0, offsetof(CPUState, temp_buf), + sizeof(((CPUState *)0)->temp_buf)); +} + +/* return non zero if the very first instruction is invalid so that + the virtual CPU can trigger an exception. + + '*gen_code_size_ptr' contains the size of the generated code (host + code). +*/ +int cpu_gen_code(CPUState *env, TranslationBlock *tb, int *gen_code_size_ptr) +{ + TCGContext *s = &tcg_ctx; + uint8_t *gen_code_buf; + int gen_code_size; +#ifdef CONFIG_PROFILER + int64_t ti; +#endif + +#ifdef CONFIG_PROFILER + s->tb_count1++; /* includes aborted translations because of + exceptions */ + ti = profile_getclock(); +#endif + +#ifdef VBOX + RAWEx_ProfileStart(env, STATS_QEMU_COMPILATION); +#endif + + tcg_func_start(s); + + gen_intermediate_code(env, tb); + + /* generate machine code */ + gen_code_buf = tb->tc_ptr; + tb->tb_next_offset[0] = 0xffff; + tb->tb_next_offset[1] = 0xffff; + s->tb_next_offset = tb->tb_next_offset; +#ifdef USE_DIRECT_JUMP + s->tb_jmp_offset = tb->tb_jmp_offset; + s->tb_next = NULL; +#else + s->tb_jmp_offset = NULL; + s->tb_next = tb->tb_next; +#endif + +#ifdef CONFIG_PROFILER + s->tb_count++; + s->interm_time += profile_getclock() - ti; + s->code_time -= profile_getclock(); +#endif + gen_code_size = tcg_gen_code(s, gen_code_buf); + *gen_code_size_ptr = gen_code_size; +#ifdef CONFIG_PROFILER + s->code_time += profile_getclock(); + s->code_in_len += tb->size; + s->code_out_len += gen_code_size; +#endif + +#ifdef VBOX + RAWEx_ProfileStop(env, STATS_QEMU_COMPILATION); +#endif + +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { + qemu_log("OUT: [size=%d]\n", *gen_code_size_ptr); + log_disas(tb->tc_ptr, *gen_code_size_ptr); + qemu_log("\n"); + qemu_log_flush(); + } +#endif + return 0; +} + +/* The cpu state corresponding to 'searched_pc' is restored. + */ +int cpu_restore_state(TranslationBlock *tb, + CPUState *env, uintptr_t searched_pc, + void *puc) +{ + TCGContext *s = &tcg_ctx; + int j; + uintptr_t tc_ptr; +#ifdef CONFIG_PROFILER + int64_t ti; +#endif + +#ifdef CONFIG_PROFILER + ti = profile_getclock(); +#endif + tcg_func_start(s); + + gen_intermediate_code_pc(env, tb); + + if (use_icount) { + /* Reset the cycle counter to the start of the block. */ + env->icount_decr.u16.low += tb->icount; + /* Clear the IO flag. */ + env->can_do_io = 0; + } + + /* find opc index corresponding to search_pc */ + tc_ptr = (uintptr_t)tb->tc_ptr; + if (searched_pc < tc_ptr) + return -1; + + s->tb_next_offset = tb->tb_next_offset; +#ifdef USE_DIRECT_JUMP + s->tb_jmp_offset = tb->tb_jmp_offset; + s->tb_next = NULL; +#else + s->tb_jmp_offset = NULL; + s->tb_next = tb->tb_next; +#endif + j = tcg_gen_code_search_pc(s, (uint8_t *)tc_ptr, searched_pc - tc_ptr); + if (j < 0) + return -1; + /* now find start of instruction before */ + while (gen_opc_instr_start[j] == 0) + j--; + env->icount_decr.u16.low -= gen_opc_icount[j]; + + gen_pc_load(env, tb, searched_pc, j, puc); + +#ifdef CONFIG_PROFILER + s->restore_time += profile_getclock() - ti; + s->restore_count++; +#endif + return 0; +} |