==== //depot/vendor/valgrind/ACKNOWLEDGEMENTS#1 - === Index: AUTHORS =========================================================================== --- AUTHORS 2012/05/26 10:11:27 #1 +++ AUTHORS 2012/05/26 10:11:27 @@ -1,41 +1,60 @@ -Cerion Armour-Brown worked on PowerPC instruction set support using -the Vex dynamic-translation framework. +Julian Seward was the original founder, designer and author of Valgrind, +created the dynamic translation frameworks, wrote Memcheck and 3.3.X +Helgrind, and did lots of other things. + +Nicholas Nethercote did the core/tool generalisation, wrote +Cachegrind and Massif, and tons of other stuff. -Jeremy Fitzhardinge wrote Helgrind and totally overhauled low-level -syscall/signal and address space layout stuff, among many other things. +Tom Hughes did a vast number of bug fixes, helped out with support for +more recent Linux/glibc versions, set up the present build system, and has +helped out with test and build machines. -Tom Hughes did a vast number of bug fixes, and helped out with support -for more recent Linux/glibc versions. +Jeremy Fitzhardinge wrote Helgrind (in the 2.X line) and totally +overhauled low-level syscall/signal and address space layout stuff, +among many other things. -Nicholas Nethercote did the core/tool generalisation, wrote -Cachegrind and Massif, and tons of other stuff. +Josef Weidendorfer wrote and maintains Callgrind and the associated +KCachegrind GUI. Paul Mackerras did a lot of the initial per-architecture factoring -that forms the basis of the 3.0 line and is also to be seen in 2.4.0. +that forms the basis of the 3.0 line and was also seen in 2.4.0. He also did UCode-based dynamic translation support for PowerPC, and created a set of ppc-linux derivatives of the 2.X release line. -Dirk Mueller contributed the malloc-free mismatch checking stuff -and other bits and pieces, and acted as our KDE liaison. +Greg Parker wrote the Mac OS X port. -Julian Seward was the original founder, designer and author, created -the dynamic translation frameworks, wrote Memcheck and Addrcheck, and -did lots of other things. +Dirk Mueller contributed the malloc/free mismatch checking +and other bits and pieces, and acts as our KDE liaison. Robert Walsh added file descriptor leakage checking, new library interception machinery, support for client allocation pools, and minor other tweakage. -Josef Weidendorfer wrote Callgrind and the associated KCachegrind GUI. +Bart Van Assche wrote and maintains DRD. + +Cerion Armour-Brown worked on PowerPC instruction set support in +the Vex dynamic-translation framework. + +Donna Robinson created and maintains the very excellent +http://www.valgrind.org. + +Vince Weaver wrote and maintains BBV. Frederic Gobry helped with autoconf and automake. Daniel Berlin modified readelf's dwarf2 source line reader, written by Nick -Clifton, for use in Valgrind. +Clifton, for use in Valgrind.o + +Michael Matz and Simon Hausmann modified the GNU binutils demangler(s) for +use in Valgrind. + +David Woodhouse has helped out with test and build machines over the course +of many releases. -Michael Matz and Simon Hausmann modified the GNU binutils -demangler(s) for use in Valgrind. +Many, many people sent bug reports, patches, and helpful feedback. -And lots and lots of other people sent bug reports, patches, and very -helpful feedback. Thank you all. +Development of Valgrind was supported in part by the Tri-Lab Partners +(Lawrence Livermore National Laboratory, Los Alamos National +Laboratory, and Sandia National Laboratories) of the U.S. Department +of Energy's Advanced Simulation & Computing (ASC) Program. ==== //depot/vendor/valgrind/INSTALL#1 - === Index: Makefile.all.am =========================================================================== --- Makefile.all.am 2012/05/26 10:11:27 #1 +++ Makefile.all.am 2012/05/26 10:11:27 @@ -1,13 +1,217 @@ -## This file should be included by *every* Makefile.am, except those for docs/ -## and tests/ subdirectories. +# This file should be included (directly or indirectly) by every +# Makefile.am that builds programs. And also the top-level Makefile.am. + +#---------------------------------------------------------------------------- +# Global stuff +#---------------------------------------------------------------------------- -valdir = $(libdir)/valgrind inplacedir = $(top_builddir)/.in_place +# This used to be required when Vex had a handwritten Makefile. It +# shouldn't be needed any more, though. +##.NOTPARALLEL: + +#---------------------------------------------------------------------------- +# noinst_PROGRAMS and noinst_DSYMS targets +#---------------------------------------------------------------------------- + +# On Darwin, for a program 'p', the DWARF debug info is stored in the +# directory 'p.dSYM'. This must be generated after the executable is +# created, with 'dsymutil p'. We could redefine LINK with a script that +# executes 'dsymutil' after linking, but that's a pain. Instead we use this +# hook so that every time "make check" is run, we subsequently invoke +# 'dsymutil' on all the executables that lack a .dSYM directory, or that are +# newer than their corresponding .dSYM directory. +build-noinst_DSYMS: $(noinst_DSYMS) + for f in $(noinst_DSYMS); do \ + if [ ! -e $$f.dSYM -o $$f -nt $$f.dSYM ] ; then \ + echo "dsymutil $$f"; \ + dsymutil $$f; \ + fi; \ + done + +# This is used by coregrind/Makefile.am and Makefile.tool.am for doing +# "in-place" installs. It copies $(noinst_PROGRAMS) into $inplacedir. +# It needs to be depended on by an 'all-local' rule. +inplace-noinst_PROGRAMS: $(noinst_PROGRAMS) + mkdir -p $(inplacedir); \ + for f in $(noinst_PROGRAMS) ; do \ + rm -f $(inplacedir)/$$f; \ + ln -f -s ../$(subdir)/$$f $(inplacedir); \ + done + +# Similar to inplace-noinst_PROGRAMS +inplace-noinst_DSYMS: build-noinst_DSYMS + mkdir -p $(inplacedir); \ + for f in $(noinst_DSYMS); do \ + rm -f $(inplacedir)/$$f.dSYM; \ + ln -f -s ../$(subdir)/$$f.dSYM $(inplacedir); \ + done + +# This is used by coregrind/Makefile.am and by /Makefile.am for doing +# "make install". It copies $(noinst_PROGRAMS) into $prefix/lib/valgrind/. +# It needs to be depended on by an 'install-exec-local' rule. +install-noinst_PROGRAMS: $(noinst_PROGRAMS) + $(mkinstalldirs) $(DESTDIR)$(pkglibdir); \ + for f in $(noinst_PROGRAMS); do \ + $(INSTALL_PROGRAM) $$f $(DESTDIR)$(pkglibdir); \ + done + +# Similar to install-noinst_PROGRAMS. +# Nb: we don't use $(INSTALL_PROGRAM) here because it doesn't work with +# directories. XXX: not sure whether the resulting permissions will be +# correct when using 'cp -R'... +install-noinst_DSYMS: build-noinst_DSYMS + $(mkinstalldirs) $(DESTDIR)$(pkglibdir); \ + for f in $(noinst_DSYMS); do \ + cp -R $$f.dSYM $(DESTDIR)$(pkglibdir); \ + done + +# This needs to be depended on by a 'clean-local' rule. +clean-noinst_DSYMS: + for f in $(noinst_DSYMS); do \ + rm -rf $$f.dSYM; \ + done + +#---------------------------------------------------------------------------- +# Flags +#---------------------------------------------------------------------------- + +# Baseline flags for all compilations. Aim here is to maximise +# performance and get whatever useful warnings we can out of gcc. +AM_CFLAGS_BASE = \ + -O2 -g \ + -Wall \ + -Wmissing-prototypes \ + -Wshadow \ + -Wpointer-arith \ + -Wstrict-prototypes \ + -Wmissing-declarations \ + @FLAG_W_NO_FORMAT_ZERO_LENGTH@ \ + -fno-strict-aliasing + +# These flags are used for building the preload shared objects. +# The aim is to give reasonable performance but also to have good +# stack traces, since users often see stack traces extending +# into (and through) the preloads. +if VGCONF_OS_IS_DARWIN +AM_CFLAGS_PIC = -dynamic -O -g -fno-omit-frame-pointer -fno-strict-aliasing -mno-dynamic-no-pic +else +AM_CFLAGS_PIC = -fpic -O -g -fno-omit-frame-pointer -fno-strict-aliasing +endif + + +# Flags for specific targets. +# +# Nb: the AM_CPPFLAGS_* values are suitable for building tools and auxprogs. +# For building the core, coregrind/Makefile.am files add some extra things. +# +# Also: in newer versions of automake (1.10 onwards?) asm files ending with +# '.S' are considered "pre-processed" (as opposed to those ending in '.s') +# and so the CPPFLAGS are passed to the assembler. But this is not true for +# older automakes (e.g. 1.8.5, 1.9.6), sigh. So we include +# AM_CPPFLAGS_ in each AM_CCASFLAGS_ variable. This +# means some of the flags are duplicated on systems with newer versions of +# automake, but this does not really matter and seems hard to avoid. + +AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@ = \ + -I$(top_srcdir) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/VEX/pub \ + -DVGA_@VGCONF_ARCH_PRI@=1 \ + -DVGO_@VGCONF_OS@=1 \ + -DVGP_@VGCONF_ARCH_PRI@_@VGCONF_OS@=1 +if VGCONF_HAVE_PLATFORM_SEC +AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@ = \ + -I$(top_srcdir) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/VEX/pub \ + -DVGA_@VGCONF_ARCH_SEC@=1 \ + -DVGO_@VGCONF_OS@=1 \ + -DVGP_@VGCONF_ARCH_SEC@_@VGCONF_OS@=1 +endif + +AM_FLAG_M3264_X86_LINUX = @FLAG_M32@ +AM_CFLAGS_X86_LINUX = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY@ \ + $(AM_CFLAGS_BASE) +AM_CCASFLAGS_X86_LINUX = $(AM_CPPFLAGS_X86_LINUX) @FLAG_M32@ -g -# The kludge that passes for vex's build system can't handle parallel -# builds. So, for the time being, serialise all Valgrind building. -# (this is equivalent to enforcing "make -j 1". -.NOTPARALLEL: +AM_FLAG_M3264_AMD64_LINUX = @FLAG_M64@ +AM_CFLAGS_AMD64_LINUX = @FLAG_M64@ -fomit-frame-pointer \ + @PREFERRED_STACK_BOUNDARY@ $(AM_CFLAGS_BASE) +AM_CCASFLAGS_AMD64_LINUX = $(AM_CPPFLAGS_AMD64_LINUX) @FLAG_M64@ -g + +AM_FLAG_M3264_PPC32_LINUX = @FLAG_M32@ +AM_CFLAGS_PPC32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE) +AM_CCASFLAGS_PPC32_LINUX = $(AM_CPPFLAGS_PPC32_LINUX) @FLAG_M32@ -g + +AM_FLAG_M3264_PPC64_LINUX = @FLAG_M64@ +AM_CFLAGS_PPC64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) +AM_CCASFLAGS_PPC64_LINUX = $(AM_CPPFLAGS_PPC64_LINUX) @FLAG_M64@ -g + +AM_FLAG_M3264_X86_FREEBSD = @FLAG_M32@ +AM_CFLAGS_X86_FREEBSD = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY@ \ + $(AM_CFLAGS_BASE) +AM_CCASFLAGS_X86_FREEBSD = $(AM_CPPFLAGS_X86_FREEBSD) @FLAG_M32@ -g + +AM_FLAG_M3264_AMD64_FREEBSD = @FLAG_M64@ +AM_CFLAGS_AMD64_FREEBSD = @FLAG_M64@ -fomit-frame-pointer \ + @PREFERRED_STACK_BOUNDARY@ $(AM_CFLAGS_BASE) +AM_CCASFLAGS_AMD64_FREEBSD = $(AM_CPPFLAGS_AMD64_FREEBSD) @FLAG_M64@ -g + +AM_FLAG_M3264_PPC32_AIX5 = @FLAG_MAIX32@ +AM_CFLAGS_PPC32_AIX5 = @FLAG_MAIX32@ -mcpu=powerpc $(AM_CFLAGS_BASE) +AM_CCASFLAGS_PPC32_AIX5 = $(AM_CPPFLAGS_PPC32_AIX5) \ + @FLAG_MAIX32@ -mcpu=powerpc -g + +AM_FLAG_M3264_PPC64_AIX5 = @FLAG_MAIX64@ +AM_CFLAGS_PPC64_AIX5 = @FLAG_MAIX64@ -mcpu=powerpc64 $(AM_CFLAGS_BASE) +AM_CCASFLAGS_PPC64_AIX5 = $(AM_CPPFLAGS_PPC64_AIX5) \ + @FLAG_MAIX64@ -mcpu=powerpc64 -g + +AM_FLAG_M3264_X86_DARWIN = -arch i386 +AM_CFLAGS_X86_DARWIN = $(WERROR) -arch i386 $(AM_CFLAGS_BASE) \ + -mmacosx-version-min=10.5 -fno-stack-protector \ + -mdynamic-no-pic +AM_CCASFLAGS_X86_DARWIN = $(AM_CPPFLAGS_X86_DARWIN) -arch i386 -g + +AM_FLAG_M3264_AMD64_DARWIN = -arch x86_64 +AM_CFLAGS_AMD64_DARWIN = $(WERROR) -arch x86_64 $(AM_CFLAGS_BASE) \ + -mmacosx-version-min=10.5 -fno-stack-protector +AM_CCASFLAGS_AMD64_DARWIN = $(AM_CPPFLAGS_AMD64_DARWIN) -arch x86_64 -g + +# Flags for the primary target. These must be used to build the +# regtests and performance tests. In fact, these must be used to +# build anything which is built only once on a dual-arch build. +# +AM_FLAG_M3264_PRI = $(AM_FLAG_M3264_@VGCONF_PLATFORM_PRI_CAPS@) +AM_CPPFLAGS_PRI = $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) +AM_CFLAGS_PRI = $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) +AM_CCASFLAGS_PRI = $(AM_CCASFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) + +if VGCONF_HAVE_PLATFORM_SEC + AM_FLAG_M3264_SEC = $(AM_FLAG_M3264_@VGCONF_PLATFORM_SEC_CAPS@) +else + AM_FLAG_M3264_SEC = +endif + + +# Baseline link flags for making vgpreload shared objects. +# +PRELOAD_LDFLAGS_COMMON_LINUX = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst +PRELOAD_LDFLAGS_COMMON_FREEBSD = -nodefaultlibs -shared -Wl,-z,interpose,-z,initfirst +PRELOAD_LDFLAGS_COMMON_AIX5 = -nodefaultlibs -shared -Wl,-G -Wl,-bnogc +PRELOAD_LDFLAGS_COMMON_DARWIN = -dynamic -dynamiclib -all_load + +PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@ +PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@ +PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@ +PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@ +PRELOAD_LDFLAGS_X86_FREEBSD = $(PRELOAD_LDFLAGS_COMMON_FREEBSD) @FLAG_M32@ +PRELOAD_LDFLAGS_AMD64_FREEBSD= $(PRELOAD_LDFLAGS_COMMON_FREEBSD) @FLAG_M64@ +PRELOAD_LDFLAGS_PPC32_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5) @FLAG_MAIX32@ +PRELOAD_LDFLAGS_PPC64_AIX5 = $(PRELOAD_LDFLAGS_COMMON_AIX5) @FLAG_MAIX64@ +PRELOAD_LDFLAGS_X86_DARWIN = $(PRELOAD_LDFLAGS_COMMON_DARWIN) -arch i386 +PRELOAD_LDFLAGS_AMD64_DARWIN = $(PRELOAD_LDFLAGS_COMMON_DARWIN) -arch x86_64 Index: Makefile.am =========================================================================== --- Makefile.am 2012/05/26 10:11:27 #2 +++ Makefile.am 2012/05/26 10:11:27 @@ -8,204 +8,102 @@ callgrind \ massif \ lackey \ - none + none \ + helgrind \ + drd + +EXP_TOOLS = exp-ptrcheck \ + exp-bbv + +# DDD: once all tools work on Darwin, TEST_TOOLS and TEST_EXP_TOOLS can be +# replaced with TOOLS and EXP_TOOLS. +TEST_TOOLS = $(TOOLS) +if !VGCONF_OS_IS_DARWIN + TEST_EXP_TOOLS = $(EXP_TOOLS) +else + TEST_EXP_TOOLS = exp-bbv +endif -# Temporary: we want to compile Helgrind, but not regtest it. # Put docs last because building the HTML is slow and we want to get # everything else working before we try it. -SUBDIRS = include coregrind . tests perf auxprogs $(TOOLS) helgrind docs +SUBDIRS = \ + include \ + VEX \ + coregrind \ + . \ + $(TOOLS) \ + $(EXP_TOOLS) \ + tests \ + perf \ + auxprogs \ + mpi \ + docs DIST_SUBDIRS = $(SUBDIRS) SUPP_FILES = \ glibc-2.2.supp glibc-2.3.supp glibc-2.4.supp glibc-2.5.supp \ - glibc-2.6.supp glibc-2.7.supp \ - xfree-3.supp xfree-4.supp + glibc-2.6.supp glibc-2.7.supp glibc-2.X.supp.in \ + aix5libc.supp xfree-3.supp xfree-4.supp \ + glibc-2.34567-NPTL-helgrind.supp \ + glibc-2.2-LinuxThreads-helgrind.supp \ + glibc-2.X-drd.supp \ + exp-ptrcheck.supp \ + darwin9.supp darwin9-drd.supp \ + freebsd.supp +DEFAULT_SUPP_FILES = @DEFAULT_SUPP@ + +# We include all the base .supp files in the distribution, but not +# default.supp, as it is built from the base .supp files at compile-time. +dist_noinst_DATA = $(SUPP_FILES) -dist_val_DATA = $(SUPP_FILES) default.supp +pkglib_DATA = default.supp pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = valgrind.pc -incincdir = $(includedir)/valgrind -nodist_incinc_HEADERS = $(VEX_PUBLIC_HDRS) - BUILT_SOURCES = default.supp valgrind.pc -CLEANFILES = -DISTCLEANFILES = default.supp +CLEANFILES = default.supp -if VG_X86_LINUX -BUILT_SOURCES += valt_load_address_x86_linux.lds -CLEANFILES += valt_load_address_x86_linux.lds -endif -if VG_AMD64_LINUX -BUILT_SOURCES += valt_load_address_amd64_linux.lds -CLEANFILES += valt_load_address_amd64_linux.lds -endif -if VG_PPC32_LINUX -BUILT_SOURCES += valt_load_address_ppc32_linux.lds -CLEANFILES += valt_load_address_ppc32_linux.lds -endif -if VG_PPC64_LINUX -BUILT_SOURCES += valt_load_address_ppc64_linux.lds -CLEANFILES += valt_load_address_ppc64_linux.lds -endif +default.supp: $(DEFAULT_SUPP_FILES) + echo "# This is a generated file, composed of the following suppression rules:" > default.supp + echo "# " $(DEFAULT_SUPP_FILES) >> default.supp + cat $(DEFAULT_SUPP_FILES) >> default.supp -default.supp: $(SUPP_FILES) - ## Preprend @PERL@ because tests/vg_regtest isn't executable regtest: check - @PERL@ tests/vg_regtest $(TOOLS) + @PERL@ tests/vg_regtest $(TEST_TOOLS) $(TEST_EXP_TOOLS) +nonexp-regtest: check + @PERL@ tests/vg_regtest $(TEST_TOOLS) +exp-regtest: check + @PERL@ tests/vg_regtest $(TEST_EXP_TOOLS) -## Preprend @PERL@ because tests/vg_per isn't executable +## Preprend @PERL@ because tests/vg_perf isn't executable perf: check @PERL@ perf/vg_perf perf +# Nb: no need to include any Makefile.am files here, or files included from +# them, as automake includes them automatically. Also not COPYING, README +# or NEWS. +# We include valgrind.spec as well as valgrind.spec.in to save packagers +# from having to run configure (bug 188560). EXTRA_DIST = \ - ACKNOWLEDGEMENTS \ + COPYING.DOCS \ README_DEVELOPERS \ README_PACKAGERS \ README_MISSING_SYSCALL_OR_IOCTL \ - valgrind.spec.in valgrind.pc.in \ - Makefile.all.am Makefile.tool.am Makefile.core.am \ - Makefile.tool-inplace.am \ - $(vex_primary_sources) + valgrind.pc.in \ + valgrind.spec.in \ + valgrind.spec -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) +dist_noinst_SCRIPTS = \ + vg-in-place -all-local: +all-local: default.supp mkdir -p $(inplacedir) - rm -f $(addprefix $(inplacedir)/,default.supp $(SUPP_FILES)) + rm -f $(inplacedir)/default.supp ln -s ../default.supp $(inplacedir) - ln -s $(addprefix ../$(top_srcdir)/,$(SUPP_FILES)) $(inplacedir) -distclean-local: +clean-local: rm -rf $(inplacedir) -# These list the bits of vex we need to install - -VEX_PUBLIC_HDRS = \ - @VEX_DIR@/pub/libvex_guest_amd64.h \ - @VEX_DIR@/pub/libvex_ir.h \ - @VEX_DIR@/pub/libvex_guest_ppc32.h \ - @VEX_DIR@/pub/libvex_guest_ppc64.h \ - @VEX_DIR@/pub/libvex_basictypes.h \ - @VEX_DIR@/pub/libvex_guest_offsets.h \ - @VEX_DIR@/pub/libvex_emwarn.h \ - @VEX_DIR@/pub/libvex.h \ - @VEX_DIR@/pub/libvex_trc_values.h \ - @VEX_DIR@/pub/libvex_guest_arm.h \ - @VEX_DIR@/pub/libvex_guest_x86.h - -# These list the bits of vex we need to copy into the tarball -vex_primary_sources = \ - VEX/HACKING.README \ - VEX/LICENSE.README \ - VEX/LICENSE.GPL \ - VEX/Makefile \ - VEX/auxprogs/genoffsets.c \ - VEX/pub/libvex_guest_amd64.h \ - VEX/pub/libvex_ir.h \ - VEX/pub/libvex_guest_ppc32.h \ - VEX/pub/libvex_guest_ppc64.h \ - VEX/pub/libvex_basictypes.h \ - VEX/pub/libvex_guest_offsets.h \ - VEX/pub/libvex_emwarn.h \ - VEX/pub/libvex.h \ - VEX/pub/libvex_trc_values.h \ - VEX/pub/libvex_guest_arm.h \ - VEX/pub/libvex_guest_x86.h \ - VEX/priv/ir/irmatch.c \ - VEX/priv/ir/irmatch.h \ - VEX/priv/ir/irdefs.c \ - VEX/priv/ir/iropt.c \ - VEX/priv/ir/iropt.h \ - VEX/priv/host-ppc/isel.c \ - VEX/priv/host-ppc/hdefs.c \ - VEX/priv/host-ppc/hdefs.h \ - VEX/priv/main/vex_svnversion.h \ - VEX/priv/main/vex_globals.c \ - VEX/priv/main/vex_globals.h \ - VEX/priv/main/vex_main.c \ - VEX/priv/main/vex_util.c \ - VEX/priv/main/vex_util.h \ - VEX/priv/guest-arm/ghelpers.c \ - VEX/priv/guest-arm/gdefs.h \ - VEX/priv/guest-arm/toIR.c \ - VEX/priv/guest-x86/ghelpers.c \ - VEX/priv/guest-x86/gdefs.h \ - VEX/priv/guest-x86/toIR.c \ - VEX/priv/guest-generic/g_generic_x87.c \ - VEX/priv/guest-generic/g_generic_x87.h \ - VEX/priv/guest-generic/bb_to_IR.c \ - VEX/priv/guest-generic/bb_to_IR.h \ - VEX/priv/host-arm/isel.c \ - VEX/priv/host-arm/hdefs.c \ - VEX/priv/host-arm/hdefs.h \ - VEX/priv/host-x86/isel.c \ - VEX/priv/host-x86/hdefs.c \ - VEX/priv/host-x86/hdefs.h \ - VEX/priv/guest-amd64/ghelpers.c \ - VEX/priv/guest-amd64/gdefs.h \ - VEX/priv/guest-amd64/toIR.c \ - VEX/priv/guest-ppc/ghelpers.c \ - VEX/priv/guest-ppc/gdefs.h \ - VEX/priv/guest-ppc/toIR.c \ - VEX/priv/host-generic/reg_alloc2.c \ - VEX/priv/host-generic/h_generic_regs.c \ - VEX/priv/host-generic/h_generic_regs.h \ - VEX/priv/host-generic/h_generic_simd64.c \ - VEX/priv/host-generic/h_generic_simd64.h \ - VEX/priv/host-amd64/isel.c \ - VEX/priv/host-amd64/hdefs.c \ - VEX/priv/host-amd64/hdefs.h - - -# Generate a linker script for linking the binaries. This is the -# standard gcc linker script, except hacked so that an alternative -# load address can be specified by (1) asking gcc to use this script -# (-Wl,-T,valt_load_address.lds) and (2) setting the symbol -# valt_load_address to the required value -# (-Wl,-defsym,valt_load_address=0x70000000). -# -# Extract ld's default linker script and hack it to our needs. -# First we cut everything above and below the "=====..." lines at the top -# and bottom. -# Then we have to replace the load address with "valt_load_address". -# The line to replace in has one of the following two forms: -# -# . = 0x08048000 + SIZEOF_HEADERS; -# -# or -# PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS; -# -# So we search for the line with a hex value "+ SIZEOF_HEADERS", and replace -# all the hex values in that line with "valt_load_address". -valt_load_address_x86_linux.lds: Makefile - $(CC) @FLAG_M32@ -Wl,--verbose -nostdlib 2>&1 | sed \ - -e '1,/^=====\+$$/d' \ - -e '/^=====\+$$/d' \ - -e '/\. = 0x[0-9A-Fa-f]\+ + SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ - || rm -f $@ - -valt_load_address_amd64_linux.lds: Makefile - $(CC) -m64 -Wl,--verbose -nostdlib 2>&1 | sed \ - -e '1,/^=====\+$$/d' \ - -e '/^=====\+$$/d' \ - -e '/\. = 0x[0-9A-Fa-f]\+ + SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ - || rm -f $@ - -valt_load_address_ppc32_linux.lds: Makefile - $(CC) @FLAG_M32@ -Wl,--verbose -nostdlib 2>&1 | sed \ - -e '1,/^=====\+$$/d' \ - -e '/^=====\+$$/d' \ - -e '/\. = 0x[0-9A-Fa-f]\+ + SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ - || rm -f $@ - -valt_load_address_ppc64_linux.lds: Makefile - $(CC) -m64 -Wl,--verbose -nostdlib 2>&1 | sed \ - -e '1,/^=====\+$$/d' \ - -e '/^=====\+$$/d' \ - -e '/\. = 0x[0-9A-Fa-f]\+ + SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ - || rm -f $@ ==== //depot/vendor/valgrind/Makefile.core.am#1 - === ==== //depot/vendor/valgrind/Makefile.flags.am#1 - === ==== //depot/vendor/valgrind/Makefile.install.am#1 - === ==== //depot/vendor/valgrind/Makefile.tool-flags.am#1 - === ==== //depot/vendor/valgrind/Makefile.tool-inplace.am#1 - === Index: Makefile.tool-tests.am =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- Makefile.tool-tests.am Sat May 26 10:11:27 2012 *************** *** 0 **** --- 1,25 ---- + # This file is used for tool tests, and also in perf/Makefile.am. + + include $(top_srcdir)/Makefile.all.am + + AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/include \ + -I$(top_srcdir)/coregrind -I$(top_builddir)/include \ + -I$(top_srcdir)/VEX/pub \ + -DVGA_$(VGCONF_ARCH_PRI)=1 \ + -DVGO_$(VGCONF_OS)=1 \ + -DVGP_$(VGCONF_ARCH_PRI)_$(VGCONF_OS)=1 + # Nb: Tools need to augment these flags with an arch-selection option, such + # as $(AM_FLAG_M3264_PRI). + AM_CFLAGS = -Winline -Wall -Wshadow -g + AM_CXXFLAGS = -Winline -Wall -Wshadow -g + # Include AM_CPPFLAGS in AM_CCASFLAGS to allow for older versions of + # automake; see comments in Makefile.all.am for more detail. + AM_CCASFLAGS = $(AM_CPPFLAGS) + + if VGCONF_OS_IS_DARWIN + noinst_DSYMS = $(check_PROGRAMS) + endif + + check-local: build-noinst_DSYMS + + clean-local: clean-noinst_DSYMS Index: Makefile.tool.am =========================================================================== --- Makefile.tool.am 2012/05/26 10:11:27 #1 +++ Makefile.tool.am 2012/05/26 10:11:27 @@ -1,108 +1,271 @@ -SUBDIRS = . tests docs +SUBDIRS = . tests include $(top_srcdir)/Makefile.all.am -include $(top_srcdir)/Makefile.install.am -include $(top_srcdir)/Makefile.tool-flags.am -include $(top_srcdir)/Makefile.tool-inplace.am -LIBREPLACEMALLOC_X86_LINUX = \ - $(top_builddir)/coregrind/libreplacemalloc_toolpreload_x86_linux.a +#---------------------------------------------------------------------------- +# - stuff +#---------------------------------------------------------------------------- -LIBREPLACEMALLOC_AMD64_LINUX = \ - $(top_builddir)/coregrind/libreplacemalloc_toolpreload_amd64_linux.a +TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@ = \ + $(top_builddir)/coregrind/libcoregrind-@VGCONF_ARCH_PRI@-@VGCONF_OS@.a \ + $(top_builddir)/VEX/libvex-@VGCONF_ARCH_PRI@-@VGCONF_OS@.a +if VGCONF_HAVE_PLATFORM_SEC +TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@ = \ + $(top_builddir)/coregrind/libcoregrind-@VGCONF_ARCH_SEC@-@VGCONF_OS@.a \ + $(top_builddir)/VEX/libvex-@VGCONF_ARCH_SEC@-@VGCONF_OS@.a +endif -LIBREPLACEMALLOC_PPC32_LINUX = \ - $(top_builddir)/coregrind/libreplacemalloc_toolpreload_ppc32_linux.a -LIBREPLACEMALLOC_PPC64_LINUX = \ - $(top_builddir)/coregrind/libreplacemalloc_toolpreload_ppc64_linux.a +TOOL_LDADD_COMMON = -lgcc +TOOL_LDADD_@VGCONF_PLATFORM_PRI_CAPS@ = \ + $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) $(TOOL_LDADD_COMMON) +if VGCONF_HAVE_PLATFORM_SEC +TOOL_LDADD_@VGCONF_PLATFORM_SEC_CAPS@ = \ + $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) $(TOOL_LDADD_COMMON) +endif -COREGRIND_LIBS_X86_LINUX = \ - $(top_builddir)/coregrind/libcoregrind_x86_linux.a \ - @VEX_DIR@/libvex_x86_linux.a -COREGRIND_LIBS_AMD64_LINUX = \ - $(top_builddir)/coregrind/libcoregrind_amd64_linux.a \ - @VEX_DIR@/libvex_amd64_linux.a - -COREGRIND_LIBS_PPC32_LINUX = \ - $(top_builddir)/coregrind/libcoregrind_ppc32_linux.a \ - @VEX_DIR@/libvex_ppc32_linux.a - -COREGRIND_LIBS_PPC64_LINUX = \ - $(top_builddir)/coregrind/libcoregrind_ppc64_linux.a \ - @VEX_DIR@/libvex_ppc64_linux.a - -##.PHONY: @VEX_DIR@/libvex.a - -@VEX_DIR@/libvex_x86_linux.a: @VEX_DIR@/priv/main/vex_svnversion.h - $(MAKE) -C @VEX_DIR@ CC="$(CC)" libvex_x86_linux.a \ - EXTRA_CFLAGS="$(AM_CFLAGS_X86_LINUX) @FLAG_WDECL_AFTER_STMT@ \ - @FLAG_FNO_STACK_PROTECTOR@" - -@VEX_DIR@/libvex_amd64_linux.a: @VEX_DIR@/priv/main/vex_svnversion.h - $(MAKE) -C @VEX_DIR@ CC="$(CC)" libvex_amd64_linux.a \ - EXTRA_CFLAGS="$(AM_CFLAGS_AMD64_LINUX) @FLAG_WDECL_AFTER_STMT@ \ - @FLAG_FNO_STACK_PROTECTOR@" - -@VEX_DIR@/libvex_ppc32_linux.a: @VEX_DIR@/priv/main/vex_svnversion.h - $(MAKE) -C @VEX_DIR@ CC="$(CC)" libvex_ppc32_linux.a \ - EXTRA_CFLAGS="$(AM_CFLAGS_PPC32_LINUX) @FLAG_WDECL_AFTER_STMT@ \ - @FLAG_FNO_STACK_PROTECTOR@" - -@VEX_DIR@/libvex_ppc64_linux.a: @VEX_DIR@/priv/main/vex_svnversion.h - $(MAKE) -C @VEX_DIR@ CC="$(CC)" libvex_ppc64_linux.a \ - EXTRA_CFLAGS="$(AM_CFLAGS_PPC64_LINUX) @FLAG_WDECL_AFTER_STMT@ \ - @FLAG_FNO_STACK_PROTECTOR@" - -@VEX_DIR@/priv/main/vex_svnversion.h: - $(MAKE) -C @VEX_DIR@ CC="$(CC)" version - -TOOL_LDADD_COMMON = -lgcc -TOOL_LDFLAGS_COMMON = -static \ +TOOL_LDFLAGS_COMMON_LINUX = -static \ + -Wl,-defsym,valt_load_address=@VALT_LOAD_ADDRESS@ \ + -nodefaultlibs -nostartfiles -u _start +TOOL_LDFLAGS_COMMON_FREEBSD = -static \ -Wl,-defsym,valt_load_address=@VALT_LOAD_ADDRESS@ \ -nodefaultlibs -nostartfiles -u _start +TOOL_LDFLAGS_COMMON_AIX5 = -static -Wl,-e_start_valgrind +TOOL_LDFLAGS_COMMON_DARWIN = -nodefaultlibs -nostartfiles \ + -Wl,-u,__start -Wl,-e,__start -Wl,-bind_at_load /usr/lib/dyld -TOOL_LDADD_X86_LINUX = $(COREGRIND_LIBS_X86_LINUX) $(TOOL_LDADD_COMMON) TOOL_LDFLAGS_X86_LINUX = \ - $(TOOL_LDFLAGS_COMMON) @FLAG_M32@ \ + $(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M32@ \ -Wl,-T,$(top_builddir)/valt_load_address_x86_linux.lds -TOOL_LDADD_AMD64_LINUX = $(COREGRIND_LIBS_AMD64_LINUX) $(TOOL_LDADD_COMMON) +TOOL_LDFLAGS_X86_FREEBSD = \ + $(TOOL_LDFLAGS_COMMON_FREEBSD) @FLAG_M32@ \ + -Wl,-T,$(top_builddir)/valt_load_address_x86_freebsd.lds + TOOL_LDFLAGS_AMD64_LINUX = \ - $(TOOL_LDFLAGS_COMMON) -m64 \ + $(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M64@ \ -Wl,-T,$(top_builddir)/valt_load_address_amd64_linux.lds -TOOL_LDADD_PPC32_LINUX = $(COREGRIND_LIBS_PPC32_LINUX) $(TOOL_LDADD_COMMON) +TOOL_LDFLAGS_AMD64_FREEBSD = \ + $(TOOL_LDFLAGS_COMMON_FREEBSD) @FLAG_M64@ \ + -Wl,-T,$(top_builddir)/valt_load_address_amd64_freebsd.lds + TOOL_LDFLAGS_PPC32_LINUX = \ - $(TOOL_LDFLAGS_COMMON) @FLAG_M32@ \ + $(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M32@ \ -Wl,-T,$(top_builddir)/valt_load_address_ppc32_linux.lds -TOOL_LDADD_PPC64_LINUX = $(COREGRIND_LIBS_PPC64_LINUX) $(TOOL_LDADD_COMMON) TOOL_LDFLAGS_PPC64_LINUX = \ - $(TOOL_LDFLAGS_COMMON) -m64 \ + $(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M64@ \ -Wl,-T,$(top_builddir)/valt_load_address_ppc64_linux.lds -PRELOAD_LDFLAGS_COMMON = -nostdlib -shared -Wl,-z,interpose,-z,initfirst -PRELOAD_LDFLAGS_X86_LINUX = $(PRELOAD_LDFLAGS_COMMON) @FLAG_M32@ -PRELOAD_LDFLAGS_AMD64_LINUX = $(PRELOAD_LDFLAGS_COMMON) -m64 -PRELOAD_LDFLAGS_PPC32_LINUX = $(PRELOAD_LDFLAGS_COMMON) @FLAG_M32@ -PRELOAD_LDFLAGS_PPC64_LINUX = $(PRELOAD_LDFLAGS_COMMON) -m64 +TOOL_LDFLAGS_PPC32_AIX5 = \ + $(TOOL_LDFLAGS_COMMON_AIX5) @FLAG_MAIX32@ + +TOOL_LDFLAGS_PPC64_AIX5 = \ + $(TOOL_LDFLAGS_COMMON_AIX5) @FLAG_MAIX64@ -Wl,-bbigtoc + +TOOL_LDFLAGS_X86_DARWIN = \ + $(TOOL_LDFLAGS_COMMON_DARWIN) -arch i386 \ + -Wl,-seg1addr,0xf0080000 \ + -Wl,-stack_addr,0xf0080000 -Wl,-stack_size,0x80000 \ + -Wl,-pagezero_size,0xf0000000 + +# pagezero can't be unmapped and remapped. Use stack instead. +# GrP fixme no stack guard +TOOL_LDFLAGS_AMD64_DARWIN = \ + $(TOOL_LDFLAGS_COMMON_DARWIN) -arch x86_64 \ + -Wl,-seg1addr,0x7fff55000000 \ + -Wl,-stack_addr,0x7fff50080000 -Wl,-stack_size,0x7ffe50080000 \ + -Wl,-pagezero_size,0x100000000 + + +BUILT_SOURCES = +CLEANFILES = +if VGCONF_PLATFORMS_INCLUDE_X86_LINUX +BUILT_SOURCES += $(top_builddir)/valt_load_address_x86_linux.lds +CLEANFILES += $(top_builddir)/valt_load_address_x86_linux.lds +endif +if VGCONF_PLATFORMS_INCLUDE_AMD64_LINUX +BUILT_SOURCES += $(top_builddir)/valt_load_address_amd64_linux.lds +CLEANFILES += $(top_builddir)/valt_load_address_amd64_linux.lds +endif +if VGCONF_PLATFORMS_INCLUDE_PPC32_LINUX +BUILT_SOURCES += $(top_builddir)/valt_load_address_ppc32_linux.lds +CLEANFILES += $(top_builddir)/valt_load_address_ppc32_linux.lds +endif +if VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX +BUILT_SOURCES += $(top_builddir)/valt_load_address_ppc64_linux.lds +CLEANFILES += $(top_builddir)/valt_load_address_ppc64_linux.lds +endif +if VGCONF_PLATFORMS_INCLUDE_X86_FREEBSD +BUILT_SOURCES += $(top_builddir)/valt_load_address_x86_freebsd.lds +CLEANFILES += $(top_builddir)/valt_load_address_x86_freebsd.lds +endif +if VGCONF_PLATFORMS_INCLUDE_AMD64_FREEBSD +BUILT_SOURCES += $(top_builddir)/valt_load_address_amd64_freebsd.lds +CLEANFILES += $(top_builddir)/valt_load_address_amd64_freebsd.lds +endif +if VGCONF_PLATFORMS_INCLUDE_PPC32_AIX5 +# No need to generate $(top_builddir)/valt_load_address*.lds; the final +# executables can be linked to be at any address. They will be relocated by +# AIX kernel when they are loaded. +endif +if VGCONF_PLATFORMS_INCLUDE_PPC64_AIX5 +# Ditto +endif +if VGCONF_OS_IS_DARWIN +# GrP untested, possibly hopeless +endif + + +# Generate a linker script for linking the binaries. This is the +# standard gcc linker script, except hacked so that an alternative +# load address can be specified by (1) asking gcc to use this script +# (-Wl,-T,valt_load_address.lds) and (2) setting the symbol +# valt_load_address to the required value +# (-Wl,-defsym,valt_load_address=0x70000000). +# +# Extract ld's default linker script and hack it to our needs. +# First we cut everything above and below the "=====..." lines at the top +# and bottom. +# Then we have to replace the load address with "valt_load_address". +# The line to replace in has one of the following two forms: +# +# . = 0x08048000 + SIZEOF_HEADERS; +# +# or +# PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS; +# +# So we search for the line with a hex value "+ SIZEOF_HEADERS", and replace +# all the hex values in that line with "valt_load_address". +$(top_builddir)/valt_load_address_x86_linux.lds: Makefile + $(CC) @FLAG_M32@ -Wl,--verbose -nostdlib 2>&1 | sed \ + -e '1,/^=====\+$$/d' \ + -e '/^=====\+$$/,/.\*/d' \ + -e '/\. = \(0x[0-9A-Fa-f]\+\|SEGMENT_START("[^"]\+", 0x[0-9A-Fa-f]\+)\) \+ SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ + || rm -f $@ + +$(top_builddir)/valt_load_address_amd64_linux.lds: Makefile + $(CC) -m64 -Wl,--verbose -nostdlib 2>&1 | sed \ + -e '1,/^=====\+$$/d' \ + -e '/^=====\+$$/,/.\*/d' \ + -e '/\. = \(0x[0-9A-Fa-f]\+\|SEGMENT_START("[^"]\+", 0x[0-9A-Fa-f]\+)\) \+ SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ + || rm -f $@ + +$(top_builddir)/valt_load_address_x86_freebsd.lds: Makefile + $(CC) @FLAG_M32@ -Wl,--verbose -nostdlib 2>&1 | sed -E \ + -e '1,/^=====+$$/d' \ + -e '/^=====+$$/,/.*/d' \ + -e '/. = (0x[0-9A-Fa-f]+|SEGMENT_START\("[^"]+", 0x[0-9A-Fa-f]+\)) \+ SIZEOF_HEADERS/s/0x[0-9A-Fa-f]+/valt_load_address/g' > $@ \ + || rm -f $@ + +$(top_builddir)/valt_load_address_amd64_freebsd.lds: Makefile + $(CC) -m64 -Wl,--verbose -nostdlib 2>&1 | sed -E \ + -e '1,/^=====+$$/d' \ + -e '/^=====+$$/,/.*/d' \ + -e '/\. = (0x[0-9A-Fa-f]+|SEGMENT_START\("[^"]+", 0x[0-9A-Fa-f]+\)) \+ SIZEOF_HEADERS/s/0x[0-9A-Fa-f]+/valt_load_address/g' > $@ \ + || rm -f $@ + +$(top_builddir)/valt_load_address_ppc32_linux.lds: Makefile + $(CC) @FLAG_M32@ -Wl,--verbose -nostdlib 2>&1 | sed \ + -e '1,/^=====\+$$/d' \ + -e '/^=====\+$$/,/.\*/d' \ + -e '/\. = \(0x[0-9A-Fa-f]\+\|SEGMENT_START("[^"]\+", 0x[0-9A-Fa-f]\+)\) \+ SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ + || rm -f $@ + +$(top_builddir)/valt_load_address_ppc64_linux.lds: Makefile + $(CC) -m64 -Wl,--verbose -nostdlib 2>&1 | sed \ + -e '1,/^=====\+$$/d' \ + -e '/^=====\+$$/,/.\*/d' \ + -e '/\. = \(0x[0-9A-Fa-f]\+\|SEGMENT_START("[^"]\+", 0x[0-9A-Fa-f]\+)\) \+ SIZEOF_HEADERS/s/0x[0-9A-Fa-f]\+/valt_load_address/g' > $@ \ + || rm -f $@ + +#---------------------------------------------------------------------------- +# vgpreload_-.a stuff +#---------------------------------------------------------------------------- + +LIBREPLACEMALLOC_X86_LINUX = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-linux.a + +LIBREPLACEMALLOC_AMD64_LINUX = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-amd64-linux.a + +LIBREPLACEMALLOC_PPC32_LINUX = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc32-linux.a + +LIBREPLACEMALLOC_PPC64_LINUX = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc64-linux.a + +LIBREPLACEMALLOC_X86_FREEBSD = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-freebsd.a + +LIBREPLACEMALLOC_AMD64_FREEBSD = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-amd64-freebsd.a + +LIBREPLACEMALLOC_PPC32_AIX5 = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc32-aix5.a + +LIBREPLACEMALLOC_PPC64_AIX5 = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-ppc64-aix5.a + +LIBREPLACEMALLOC_X86_DARWIN = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-darwin.a + +LIBREPLACEMALLOC_AMD64_DARWIN = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-amd64-darwin.a + LIBREPLACEMALLOC_LDFLAGS_X86_LINUX = \ -Wl,--whole-archive \ $(LIBREPLACEMALLOC_X86_LINUX) \ -Wl,--no-whole-archive + LIBREPLACEMALLOC_LDFLAGS_AMD64_LINUX = \ -Wl,--whole-archive \ $(LIBREPLACEMALLOC_AMD64_LINUX) \ -Wl,--no-whole-archive + LIBREPLACEMALLOC_LDFLAGS_PPC32_LINUX = \ -Wl,--whole-archive \ $(LIBREPLACEMALLOC_PPC32_LINUX) \ -Wl,--no-whole-archive + LIBREPLACEMALLOC_LDFLAGS_PPC64_LINUX = \ -Wl,--whole-archive \ $(LIBREPLACEMALLOC_PPC64_LINUX) \ -Wl,--no-whole-archive + +LIBREPLACEMALLOC_LDFLAGS_X86_FREEBSD = \ + -Wl,--whole-archive \ + $(LIBREPLACEMALLOC_X86_FREEBSD) \ + -Wl,--no-whole-archive + +LIBREPLACEMALLOC_LDFLAGS_AMD64_FREEBSD = \ + -Wl,--whole-archive \ + $(LIBREPLACEMALLOC_AMD64_FREEBSD) \ + -Wl,--no-whole-archive + +LIBREPLACEMALLOC_LDFLAGS_PPC32_AIX5 = \ + $(LIBREPLACEMALLOC_PPC32_AIX5) + +LIBREPLACEMALLOC_LDFLAGS_PPC64_AIX5 = \ + $(LIBREPLACEMALLOC_PPC64_AIX5) + +LIBREPLACEMALLOC_LDFLAGS_X86_DARWIN = \ + $(LIBREPLACEMALLOC_X86_DARWIN) + +LIBREPLACEMALLOC_LDFLAGS_AMD64_DARWIN = \ + $(LIBREPLACEMALLOC_AMD64_DARWIN) + +#---------------------------------------------------------------------------- +# General stuff +#---------------------------------------------------------------------------- + +all-local: inplace-noinst_PROGRAMS inplace-noinst_DSYMS + +clean-local: clean-noinst_DSYMS + +install-exec-local: install-noinst_PROGRAMS install-noinst_DSYMS + Index: Makefile.vex.am =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- Makefile.vex.am Sat May 26 10:11:27 2012 *************** *** 0 **** --- 1,119 ---- + # VEX/Makefile is generated from this file at configure-time. + + include $(top_srcdir)/Makefile.all.am + + #---------------------------------------------------------------------------- + # Headers, etc + #---------------------------------------------------------------------------- + + EXTRA_DIST = \ + HACKING.README \ + LICENSE.README \ + LICENSE.GPL \ + quote.txt \ + newline.txt \ + auxprogs/genoffsets.c + + pkginclude_HEADERS = \ + pub/libvex.h \ + pub/libvex_basictypes.h \ + pub/libvex_emwarn.h \ + pub/libvex_guest_offsets.h \ + pub/libvex_guest_x86.h \ + pub/libvex_guest_amd64.h \ + pub/libvex_guest_ppc32.h \ + pub/libvex_guest_ppc64.h \ + pub/libvex_guest_arm.h \ + pub/libvex_ir.h \ + pub/libvex_trc_values.h + + noinst_HEADERS = \ + priv/main_globals.h \ + priv/main_util.h \ + priv/ir_match.h \ + priv/ir_opt.h \ + priv/guest_generic_bb_to_IR.h \ + priv/guest_generic_x87.h \ + priv/guest_x86_defs.h \ + priv/guest_amd64_defs.h \ + priv/guest_ppc_defs.h \ + priv/guest_arm_defs.h \ + priv/host_generic_regs.h \ + priv/host_generic_simd64.h \ + priv/host_x86_defs.h \ + priv/host_amd64_defs.h \ + priv/host_ppc_defs.h \ + priv/host_arm_defs.h + + BUILT_SOURCES = pub/libvex_guest_offsets.h + CLEANFILES = pub/libvex_guest_offsets.h + + # This is very uggerly. Need to sed out both "xyzzyN" and + # "xyzzy$N" since gcc on different targets emits the constants + # differently -- with a leading $ on x86/amd64 but none on ppc32/64. + pub/libvex_guest_offsets.h: + rm -f auxprogs/genoffsets.s + $(CC) $(LIBVEX_CFLAGS) -O -S -o auxprogs/genoffsets.s \ + auxprogs/genoffsets.c + grep xyzzy auxprogs/genoffsets.s | grep define \ + | sed "s/xyzzy\\$$//g" | sed "s/xyzzy//g" \ + > pub/libvex_guest_offsets.h + rm -f auxprogs/genoffsets.s + + #---------------------------------------------------------------------------- + # libvex-.a + #---------------------------------------------------------------------------- + + pkglib_LIBRARIES = libvex-@VGCONF_ARCH_PRI@-@VGCONF_OS@.a + if VGCONF_HAVE_PLATFORM_SEC + pkglib_LIBRARIES += libvex-@VGCONF_ARCH_SEC@-@VGCONF_OS@.a + endif + + LIBVEX_SOURCES_COMMON = \ + priv/main_globals.c \ + priv/main_main.c \ + priv/main_util.c \ + priv/ir_defs.c \ + priv/ir_match.c \ + priv/ir_opt.c \ + priv/guest_generic_bb_to_IR.c \ + priv/guest_generic_x87.c \ + priv/guest_x86_helpers.c \ + priv/guest_x86_toIR.c \ + priv/guest_amd64_helpers.c \ + priv/guest_amd64_toIR.c \ + priv/guest_ppc_helpers.c \ + priv/guest_ppc_toIR.c \ + priv/guest_arm_helpers.c \ + priv/guest_arm_toIR.c \ + priv/host_generic_regs.c \ + priv/host_generic_simd64.c \ + priv/host_generic_reg_alloc2.c \ + priv/host_x86_defs.c \ + priv/host_x86_isel.c \ + priv/host_amd64_defs.c \ + priv/host_amd64_isel.c \ + priv/host_ppc_defs.c \ + priv/host_ppc_isel.c \ + priv/host_arm_defs.c \ + priv/host_arm_isel.c + + LIBVEX_CFLAGS = \ + -Wbad-function-cast \ + -Wcast-qual \ + -Wcast-align \ + -fstrict-aliasing + + libvex_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_SOURCES = $(LIBVEX_SOURCES_COMMON) + libvex_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CPPFLAGS = \ + $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -Ipriv + libvex_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CFLAGS = \ + $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(LIBVEX_CFLAGS) + if VGCONF_HAVE_PLATFORM_SEC + libvex_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_SOURCES = $(LIBVEX_SOURCES_COMMON) + libvex_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CPPFLAGS = \ + $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -Ipriv + libvex_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CFLAGS = \ + $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) $(LIBVEX_CFLAGS) + endif + Index: NEWS =========================================================================== --- NEWS 2012/05/26 10:11:27 #1 +++ NEWS 2012/05/26 10:11:27 @@ -1,4 +1,1090 @@ +Release 3.5.0 (19 August 2009) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +3.5.0 is a feature release with many significant improvements and the +usual collection of bug fixes. The main improvement is that Valgrind +now works on Mac OS X. + +This release supports X86/Linux, AMD64/Linux, PPC32/Linux, PPC64/Linux +and X86/Darwin. Support for recent distros and toolchain components +(glibc 2.10, gcc 4.5) has been added. + + ------------------------- + +Here is a short summary of the changes. Details are shown further +down: + +* Support for Mac OS X (10.5.x). + +* Improvements and simplifications to Memcheck's leak checker. + +* Clarification and simplifications in various aspects of Valgrind's + text output. + +* XML output for Helgrind and Ptrcheck. + +* Performance and stability improvements for Helgrind and DRD. + +* Genuinely atomic support for x86/amd64/ppc atomic instructions. + +* A new experimental tool, BBV, useful for computer architecture + research. + +* Improved Wine support, including ability to read Windows PDB + debuginfo. + + ------------------------- + +Here are details of the above changes, followed by descriptions of +many other minor changes, and a list of fixed bugs. + + +* Valgrind now runs on Mac OS X. (Note that Mac OS X is sometimes + called "Darwin" because that is the name of the OS core, which is the + level that Valgrind works at.) + + Supported systems: + + - It requires OS 10.5.x (Leopard). Porting to 10.4.x is not planned + because it would require work and 10.4 is only becoming less common. + + - 32-bit programs on x86 and AMD64 (a.k.a x86-64) machines are supported + fairly well. For 10.5.x, 32-bit programs are the default even on + 64-bit machines, so it handles most current programs. + + - 64-bit programs on x86 and AMD64 (a.k.a x86-64) machines are not + officially supported, but simple programs at least will probably work. + However, start-up is slow. + + - PowerPC machines are not supported. + + Things that don't work: + + - The Ptrcheck tool. + + - Objective-C garbage collection. + + - --db-attach=yes. + + - If you have Rogue Amoeba's "Instant Hijack" program installed, + Valgrind will fail with a SIGTRAP at start-up. See + https://bugs.kde.org/show_bug.cgi?id=193917 for details and a + simple work-around. + + Usage notes: + + - You will likely find --dsymutil=yes a useful option, as error + messages may be imprecise without it. + + - Mac OS X support is new and therefore will be less robust than the + Linux support. Please report any bugs you find. + + - Threaded programs may run more slowly than on Linux. + + Many thanks to Greg Parker for developing this port over several years. + + +* Memcheck's leak checker has been improved. + + - The results for --leak-check=summary now match the summary results + for --leak-check=full. Previously they could differ because + --leak-check=summary counted "indirectly lost" blocks and + "suppressed" blocks as "definitely lost". + + - Blocks that are only reachable via at least one interior-pointer, + but are directly pointed to by a start-pointer, were previously + marked as "still reachable". They are now correctly marked as + "possibly lost". + + - The default value for the --leak-resolution option has been + changed from "low" to "high". In general, this means that more + leak reports will be produced, but each leak report will describe + fewer leaked blocks. + + - With --leak-check=full, "definitely lost" and "possibly lost" + leaks are now considered as proper errors, ie. they are counted + for the "ERROR SUMMARY" and affect the behaviour of + --error-exitcode. These leaks are not counted as errors if + --leak-check=summary is specified, however. + + - Documentation for the leak checker has been improved. + + +* Various aspects of Valgrind's text output have changed. + + - Valgrind's start-up message has changed. It is shorter but also + includes the command being run, which makes it easier to use + --trace-children=yes. An example: + + - Valgrind's shut-down messages have also changed. This is most + noticeable with Memcheck, where the leak summary now occurs before + the error summary. This change was necessary to allow leaks to be + counted as proper errors (see the description of the leak checker + changes above for more details). This was also necessary to fix a + longstanding bug in which uses of suppressions against leaks were + not "counted", leading to difficulties in maintaining suppression + files (XXXX bug number). + + - Behavior of -v has changed. In previous versions, -v printed out + a mixture of marginally-user-useful information, and tool/core + statistics. The statistics printing has now been moved to its own + flag, --stats=yes. This means -v is less verbose and more likely + to convey useful end-user information. + + - The format of some (non-XML) stack trace entries has changed a + little. Previously there were six possible forms: + + 0x80483BF: really (a.c:20) + 0x80483BF: really (in /foo/a.out) + 0x80483BF: really + 0x80483BF: (within /foo/a.out) + 0x80483BF: ??? (a.c:20) + 0x80483BF: ??? + + The third and fourth of these forms have been made more consistent + with the others. The six possible forms are now: + + 0x80483BF: really (a.c:20) + 0x80483BF: really (in /foo/a.out) + 0x80483BF: really (in ???) + 0x80483BF: ??? (in /foo/a.out) + 0x80483BF: ??? (a.c:20) + 0x80483BF: ??? + + Stack traces produced when --xml=yes is specified are different + and unchanged. + + +* Helgrind and Ptrcheck now support XML output, so they can be used + from GUI tools. Also, the XML output mechanism has been + overhauled. + + - The XML format has been overhauled and generalised, so it is more + suitable for error reporting tools in general. The Memcheck + specific aspects of it have been removed. The new format, which + is an evolution of the old format, is described in + docs/internals/xml-output-protocol4.txt. + + - Memcheck has been updated to use the new format. + + - Helgrind and Ptrcheck are now able to emit output in this format. + + - The XML output mechanism has been overhauled. XML is now output + to its own file descriptor, which means that: + + * Valgrind can output text and XML independently. + + * The longstanding problem of XML output being corrupted by + unexpected un-tagged text messages is solved. + + As before, the destination for text output is specified using + --log-file=, --log-fd= or --log-socket=. + + As before, XML output for a tool is enabled using --xml=yes. + + Because there's a new XML output channel, the XML output + destination is now specified by --xml-file=, --xml-fd= or + --xml-socket=. + + Initial feedback has shown this causes some confusion. To + clarify, the two envisaged usage scenarios are: + + (1) Normal text output. In this case, do not specify --xml=yes + nor any of --xml-file=, --xml-fd= or --xml-socket=. + + (2) XML output. In this case, specify --xml=yes, and one of + --xml-file=, --xml-fd= or --xml-socket= to select the XML + destination, one of --log-file=, --log-fd= or --log-socket= + to select the destination for any remaining text messages, + and, importantly, -q. + + -q makes Valgrind completely silent on the text channel, + except in the case of critical failures, such as Valgrind + itself segfaulting, or failing to read debugging information. + Hence, in this scenario, it suffices to check whether or not + any output appeared on the text channel. If yes, then it is + likely to be a critical error which should be brought to the + attention of the user. If no (the text channel produced no + output) then it can be assumed that the run was successful. + + This allows GUIs to make the critical distinction they need to + make (did the run fail or not?) without having to search or + filter the text output channel in any way. + + It is also recommended to use --child-silent-after-fork=yes in + scenario (2). + + +* Improvements and changes in Helgrind: + + - XML output, as described above + + - Checks for consistent association between pthread condition + variables and their associated mutexes are now performed. + + - pthread_spinlock functions are supported. + + - Modest performance improvements. + + - Initial (skeletal) support for describing the behaviour of + non-POSIX synchronisation objects through ThreadSanitizer + compatible ANNOTATE_* macros. + + - More controllable tradeoffs between performance and the level of + detail of "previous" accesses in a race. There are now three + settings: + + * --history-level=full. This is the default, and was also the + default in 3.4.x. It shows both stacks involved in a race, but + requires a lot of memory and can be very slow in programs that + do many inter-thread synchronisation events. + + * --history-level=none. This only shows the later stack involved + in a race. This can be much faster than --history-level=full, + but makes it much more difficult to find the other access + involved in the race. + + The new intermediate setting is + + * --history-level=approx + + For the earlier (other) access, two stacks are presented. The + earlier access is guaranteed to be somewhere in between the two + program points denoted by those stacks. This is not as useful + as showing the exact stack for the previous access (as per + --history-level=full), but it is better than nothing, and it's + almost as fast as --history-level=none. + + +* New features and improvements in DRD: + + - The error messages printed by DRD are now easier to interpret. + Instead of using two different numbers to identify each thread + (Valgrind thread ID and DRD thread ID), DRD does now identify + threads via a single number (the DRD thread ID). Furthermore + "first observed at" information is now printed for all error + messages related to synchronization objects. + + - Added support for named semaphores (sem_open() and sem_close()). + + - Race conditions between pthread_barrier_wait() and + pthread_barrier_destroy() calls are now reported. + + - Added support for custom allocators through the macros + VALGRIND_MALLOCLIKE_BLOCK() VALGRIND_FREELIKE_BLOCK() (defined in + in ). An alternative for these two macros is + the new client request VG_USERREQ__DRD_CLEAN_MEMORY (defined in + ). + + - Added support for annotating non-POSIX synchronization objects + through several new ANNOTATE_*() macros. + + - OpenMP: added support for the OpenMP runtime (libgomp) included + with gcc versions 4.3.0 and 4.4.0. + + - Faster operation. + + - Added two new command-line options (--first-race-only and + --segment-merging-interval). + + +* Genuinely atomic support for x86/amd64/ppc atomic instructions + + Valgrind will now preserve (memory-access) atomicity of LOCK- + prefixed x86/amd64 instructions, and any others implying a global + bus lock. Ditto for PowerPC l{w,d}arx/st{w,d}cx. instructions. + + This means that Valgrinded processes will "play nicely" in + situations where communication with other processes, or the kernel, + is done through shared memory and coordinated with such atomic + instructions. Prior to this change, such arrangements usually + resulted in hangs, races or other synchronisation failures, because + Valgrind did not honour atomicity of such instructions. + + +* A new experimental tool, BBV, has been added. BBV generates basic + block vectors for use with the SimPoint analysis tool, which allows + a program's overall behaviour to be approximated by running only a + fraction of it. This is useful for computer architecture + researchers. You can run BBV by specifying --tool=exp-bbv (the + "exp-" prefix is short for "experimental"). BBV was written by + Vince Weaver. + + +* Modestly improved support for running Windows applications under + Wine. In particular, initial support for reading Windows .PDB debug + information has been added. + + +* A new Memcheck client request VALGRIND_COUNT_LEAK_BLOCKS has been + added. It is similar to VALGRIND_COUNT_LEAKS but counts blocks + instead of bytes. + + +* The Valgrind client requests VALGRIND_PRINTF and + VALGRIND_PRINTF_BACKTRACE have been changed slightly. Previously, + the string was always printed immediately on its own line. Now, the + string will be added to a buffer but not printed until a newline is + encountered, or other Valgrind output is printed (note that for + VALGRIND_PRINTF_BACKTRACE, the back-trace itself is considered + "other Valgrind output"). This allows you to use multiple + VALGRIND_PRINTF calls to build up a single output line, and also to + print multiple output lines with a single request (by embedding + multiple newlines in the string). + + +* The graphs drawn by Massif's ms_print program have changed slightly: + + - The half-height chars '.' and ',' are no longer drawn, because + they are confusing. The --y option can be used if the default + y-resolution is not high enough. + + - Horizontal lines are now drawn after the top of a snapshot if + there is a gap until the next snapshot. This makes it clear that + the memory usage has not dropped to zero between snapshots. + + +* Something that happened in 3.4.0, but wasn't clearly announced: the + option --read-var-info=yes can be used by some tools (Memcheck, + Helgrind and DRD). When enabled, it causes Valgrind to read DWARF3 + variable type and location information. This makes those tools + start up more slowly and increases memory consumption, but + descriptions of data addresses in error messages become more + detailed. + + +* exp-Omega, an experimental instantaneous leak-detecting tool, was + disabled in 3.4.0 due to a lack of interest and maintenance, + although the source code was still in the distribution. The source + code has now been removed from the distribution. For anyone + interested, the removal occurred in SVN revision r10247. + + +* Some changes have been made to the build system. + + - VEX/ is now integrated properly into the build system. This means + that dependency tracking within VEX/ now works properly, "make + install" will work without requiring "make" before it, and + parallel builds (ie. 'make -j') now work (previously a + .NOTPARALLEL directive was used to serialize builds, ie. 'make -j' + was effectively ignored). + + - The --with-vex configure option has been removed. It was of + little use and removing it simplified the build system. + + - The location of some install files has changed. This should not + affect most users. Those who might be affected: + + * For people who use Valgrind with MPI programs, the installed + libmpiwrap.so library has moved from + $(INSTALL)//libmpiwrap.so to + $(INSTALL)/libmpiwrap-.so. + + * For people who distribute standalone Valgrind tools, the + installed libraries such as $(INSTALL)//libcoregrind.a + have moved to $(INSTALL)/libcoregrind-.a. + + These changes simplify the build system. + + - Previously, all the distributed suppression (*.supp) files were + installed. Now, only default.supp is installed. This should not + affect users as the other installed suppression files were not + read; the fact that they were installed was a mistake. + + +* KNOWN LIMITATIONS: + + - Memcheck is unusable with the Intel compiler suite version 11.1, + when it generates code for SSE2-and-above capable targets. This + is because of icc's use of highly optimised inlined strlen + implementations. It causes Memcheck to report huge numbers of + false errors even in simple programs. Helgrind and DRD may also + have problems. + + Versions 11.0 and earlier may be OK, but this has not been + properly tested. + + +The following bugs have been fixed or resolved. Note that "n-i-bz" +stands for "not in bugzilla" -- that is, a bug that was reported to us +but never got a bugzilla entry. We encourage you to file bugs in +bugzilla (http://bugs.kde.org/enter_valgrind_bug.cgi) rather than +mailing the developers (or mailing lists) directly -- bugs that are +not entered into bugzilla tend to get forgotten about or ignored. + +To see details of a given bug, visit +https://bugs.kde.org/show_bug.cgi?id=XXXXXX +where XXXXXX is the bug number as listed below. + +84303 How about a LockCheck tool? +91633 dereference of null ptr in vgPlain_st_basetype +97452 Valgrind doesn't report any pthreads problems +100628 leak-check gets assertion failure when using + VALGRIND_MALLOCLIKE_BLOCK on malloc()ed memory +108528 NPTL pthread cleanup handlers not called +110126 Valgrind 2.4.1 configure.in tramples CFLAGS +110128 mallinfo is not implemented... +110770 VEX: Generated files not always updated when making valgrind +111102 Memcheck: problems with large (memory footprint) applications +115673 Vex's decoder should never assert +117564 False positive: Syscall param clone(child_tidptr) contains + uninitialised byte(s) +119404 executing ssh from inside valgrind fails +133679 Callgrind does not write path names to sources with dwarf debug + info +135847 configure.in problem with non gnu compilers (and possible fix) +136154 threads.c:273 (vgCallgrind_post_signal): Assertion + '*(vgCallgrind_current_fn_stack.top) == 0' failed. +136230 memcheck reports "possibly lost", should be "still reachable" +137073 NULL arg to MALLOCLIKE_BLOCK causes crash +137904 Valgrind reports a memory leak when using POSIX threads, + while it shouldn't +139076 valgrind VT_GETSTATE error +142228 complaint of elf_dynamic_do_rela in trivial usage +145347 spurious warning with USBDEVFS_REAPURB +148441 (wine) can't find memory leak in Wine, win32 binary + executable file. +148742 Leak-check fails assert on exit +149878 add (proper) check for calloc integer overflow +150606 Call graph is broken when using callgrind control +152393 leak errors produce an exit code of 0. I need some way to + cause leak errors to result in a nonzero exit code. +157154 documentation (leak-resolution doc speaks about num-callers + def=4) + what is a loss record +159501 incorrect handling of ALSA ioctls +162020 Valgrinding an empty/zero-byte file crashes valgrind +162482 ppc: Valgrind crashes while reading stabs information +162718 x86: avoid segment selector 0 in sys_set_thread_area() +163253 (wine) canonicaliseSymtab forgot some fields in DiSym +163560 VEX/test_main.c is missing from valgrind-3.3.1 +164353 malloc_usable_size() doesn't return a usable size +165468 Inconsistent formatting in memcheck manual -- please fix +169505 main.c:286 (endOfInstr): + Assertion 'ii->cost_offset == *cost_offset' failed +177206 Generate default.supp during compile instead of configure +177209 Configure valt_load_address based on arch+os +177305 eventfd / syscall 323 patch lost +179731 Tests fail to build because of inlining of non-local asm labels +181394 helgrind: libhb_core.c:3762 (msm_write): Assertion + 'ordxx == POrd_EQ || ordxx == POrd_LT' failed. +181594 Bogus warning for empty text segment +181707 dwarf doesn't require enumerations to have name +185038 exp-ptrcheck: "unhandled syscall: 285" (fallocate) on x86_64 +185050 exp-ptrcheck: sg_main.c:727 (add_block_to_GlobalTree): + Assertion '!already_present' failed. +185359 exp-ptrcheck: unhandled syscall getresuid() +185794 "WARNING: unhandled syscall: 285" (fallocate) on x86_64 +185816 Valgrind is unable to handle debug info for files with split + debug info that are prelinked afterwards +185980 [darwin] unhandled syscall: sem_open +186238 bbToIR_AMD64: disInstr miscalculated next %rip +186507 exp-ptrcheck unhandled syscalls prctl, etc. +186790 Suppression pattern used for leaks are not reported +186796 Symbols with length>200 in suppression files are ignored +187048 drd: mutex PTHREAD_PROCESS_SHARED attribute missinterpretation +187416 exp-ptrcheck: support for __NR_{setregid,setreuid,setresuid} +188038 helgrind: hg_main.c:926: mk_SHVAL_fail: the 'impossible' happened +188046 bashisms in the configure script +188127 amd64->IR: unhandled instruction bytes: 0xF0 0xF 0xB0 0xA +188161 memcheck: --track-origins=yes asserts "mc_machine.c:672 + (get_otrack_shadow_offset_wrk): the 'impossible' happened." +188248 helgrind: pthread_cleanup_push, pthread_rwlock_unlock, + assertion fail "!lock->heldBy" +188427 Add support for epoll_create1 (with patch) +188530 Support for SIOCGSTAMPNS +188560 Include valgrind.spec in the tarball +188572 Valgrind on Mac should suppress setenv() mem leak +189054 Valgrind fails to build because of duplicate non-local asm labels +189737 vex amd64->IR: unhandled instruction bytes: 0xAC +189762 epoll_create syscall not handled (--tool=exp-ptrcheck) +189763 drd assertion failure: s_threadinfo[tid].is_recording +190219 unhandled syscall: 328 (x86-linux) +190391 dup of 181394; see above +190429 Valgrind reports lots of errors in ld.so with x86_64 2.9.90 glibc +190820 No debug information on powerpc-linux +190820 No debug information on powerpc-linux +191095 PATCH: Improve usbdevfs ioctl handling +191182 memcheck: VALGRIND_LEAK_CHECK quadratic when big nr of chunks + or big nr of errors +191189 --xml=yes should obey --gen-suppressions=all +191192 syslog() needs a suppression on macosx +191271 DARWIN: WARNING: unhandled syscall: 33554697 a.k.a.: 265 +191761 getrlimit on MacOSX +191992 multiple --fn-skip only works sometimes; dependent on order +192634 V. reports "aspacem sync_check_mapping_callback: + segment mismatch" on Darwin +192954 __extension__ missing on 2 client requests +194429 Crash at start-up with glibc-2.10.1 and linux-2.6.29 +194474 "INSTALL" file has different build instructions than "README" +194671 Unhandled syscall (sem_wait?) from mac valgrind +195069 memcheck: reports leak (memory still reachable) for + printf("%d', x) +195169 drd: (vgDrd_barrier_post_wait): + Assertion 'r->sg[p->post_iteration]' failed. +195268 valgrind --log-file doesn't accept ~/... +195838 VEX abort: LibVEX_N_SPILL_BYTES too small for CPUID boilerplate +195860 WARNING: unhandled syscall: unix:223 +196528 need a error suppression for pthread_rwlock_init under os x? +197227 Support aio_* syscalls on Darwin +197456 valgrind should reject --suppressions=(directory) +197512 DWARF2 CFI reader: unhandled CFI instruction 0:10 +197591 unhandled syscall 27 (mincore) +197793 Merge DCAS branch to the trunk == 85756, 142103 +197794 Avoid duplicate filenames in Vex +197898 make check fails on current SVN +197901 make check fails also under exp-ptrcheck in current SVN +197929 Make --leak-resolution=high the default +197930 Reduce spacing between leak reports +197933 Print command line of client at start-up, and shorten preamble +197966 unhandled syscall 205 (x86-linux, --tool=exp-ptrcheck) +198395 add BBV to the distribution as an experimental tool +198624 Missing syscalls on Darwin: 82, 167, 281, 347 +198649 callgrind_annotate doesn't cumulate counters +199338 callgrind_annotate sorting/thresholds are broken for all but Ir +199977 Valgrind complains about an unrecognized instruction in the + atomic_incs test program +200029 valgrind isn't able to read Fedora 12 debuginfo +200760 darwin unhandled syscall: unix:284 +200827 DRD doesn't work on Mac OS X +200990 VG_(read_millisecond_timer)() does not work correctly +201016 Valgrind does not support pthread_kill() on Mac OS +201169 Document --read-var-info +201323 Pre-3.5.0 performance sanity checking +201384 Review user manual for the 3.5.0 release +201585 mfpvr not implemented on ppc +201708 tests failing because x86 direction flag is left set +201757 Valgrind doesn't handle any recent sys_futex additions +204377 64-bit valgrind can not start a shell script + (with #!/path/to/shell) if the shell is a 32-bit executable +n-i-bz drd: fixed assertion failure triggered by mutex reinitialization. +n-i-bz drd: fixed a bug that caused incorrect messages to be printed + about memory allocation events with memory access tracing enabled +n-i-bz drd: fixed a memory leak triggered by vector clock deallocation + +(3.5.0: 20 Aug 2009, vex r1913, valgrind r10846). + + + +Release 3.4.1 (28 February 2009) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +3.4.1 is a bug-fix release that fixes some regressions and assertion +failures in debug info reading in 3.4.0, most notably incorrect stack +traces on amd64-linux on older (glibc-2.3 based) systems. Various +other debug info problems are also fixed. A number of bugs in the +exp-ptrcheck tool introduced in 3.4.0 have been fixed. + +In view of the fact that 3.4.0 contains user-visible regressions +relative to 3.3.x, upgrading to 3.4.1 is recommended. Packagers are +encouraged to ship 3.4.1 in preference to 3.4.0. + +The fixed bugs are as follows. Note that "n-i-bz" stands for "not in +bugzilla" -- that is, a bug that was reported to us but never got a +bugzilla entry. We encourage you to file bugs in bugzilla +(http://bugs.kde.org/enter_valgrind_bug.cgi) rather than mailing the +developers (or mailing lists) directly -- bugs that are not entered +into bugzilla tend to get forgotten about or ignored. + +n-i-bz Fix various bugs reading icc-11 generated debug info +n-i-bz Fix various bugs reading gcc-4.4 generated debug info +n-i-bz Preliminary support for glibc-2.10 / Fedora 11 +n-i-bz Cachegrind and Callgrind: handle non-power-of-two cache sizes, + so as to support (eg) 24k Atom D1 and Core2 with 3/6/12MB L2. +179618 exp-ptrcheck crashed / exit prematurely +179624 helgrind: false positive races with pthread_create and + recv/open/close/read +134207 pkg-config output contains @VG_PLATFORM@ +176926 floating point exception at valgrind startup with PPC 440EPX +181594 Bogus warning for empty text segment +173751 amd64->IR: 0x48 0xF 0x6F 0x45 (even more redundant rex prefixes) +181707 Dwarf3 doesn't require enumerations to have name +185038 exp-ptrcheck: "unhandled syscall: 285" (fallocate) on x86_64 +185050 exp-ptrcheck: sg_main.c:727 (add_block_to_GlobalTree): + Assertion '!already_present' failed. +185359 exp-ptrcheck unhandled syscall getresuid() + +(3.4.1.RC1: 24 Feb 2008, vex r1884, valgrind r9253). +(3.4.1: 28 Feb 2008, vex r1884, valgrind r9293). + + + +Release 3.4.0 (2 January 2009) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +3.4.0 is a feature release with many significant improvements and the +usual collection of bug fixes. This release supports X86/Linux, +AMD64/Linux, PPC32/Linux and PPC64/Linux. Support for recent distros +(using gcc 4.4, glibc 2.8 and 2.9) has been added. + +3.4.0 brings some significant tool improvements. Memcheck can now +report the origin of uninitialised values, the thread checkers +Helgrind and DRD are much improved, and we have a new experimental +tool, exp-Ptrcheck, which is able to detect overruns of stack and +global arrays. In detail: + +* Memcheck is now able to track the origin of uninitialised values. + When it reports an uninitialised value error, it will try to show + the origin of the value, as either a heap or stack allocation. + Origin tracking is expensive and so is not enabled by default. To + use it, specify --track-origins=yes. Memcheck's speed will be + essentially halved, and memory usage will be significantly + increased. Nevertheless it can drastically reduce the effort + required to identify the root cause of uninitialised value errors, + and so is often a programmer productivity win, despite running more + slowly. + +* A version (1.4.0) of the Valkyrie GUI, that works with Memcheck in + 3.4.0, will be released shortly. + +* Helgrind's race detection algorithm has been completely redesigned + and reimplemented, to address usability and scalability concerns: + + - The new algorithm has a lower false-error rate: it is much less + likely to report races that do not really exist. + + - Helgrind will display full call stacks for both accesses involved + in a race. This makes it easier to identify the root causes of + races. + + - Limitations on the size of program that can run have been removed. + + - Performance has been modestly improved, although that is very + workload-dependent. + + - Direct support for Qt4 threading has been added. + + - pthread_barriers are now directly supported. + + - Helgrind works well on all supported Linux targets. + +* The DRD thread debugging tool has seen major improvements: + + - Greatly improved performance and significantly reduced memory + usage. + + - Support for several major threading libraries (Boost.Thread, Qt4, + glib, OpenMP) has been added. + + - Support for atomic instructions, POSIX semaphores, barriers and + reader-writer locks has been added. + + - Works now on PowerPC CPUs too. + + - Added support for printing thread stack usage at thread exit time. + + - Added support for debugging lock contention. + + - Added a manual for Drd. + +* A new experimental tool, exp-Ptrcheck, has been added. Ptrcheck + checks for misuses of pointers. In that sense it is a bit like + Memcheck. However, Ptrcheck can do things Memcheck can't: it can + detect overruns of stack and global arrays, it can detect + arbitrarily far out-of-bounds accesses to heap blocks, and it can + detect accesses heap blocks that have been freed a very long time + ago (millions of blocks in the past). + + Ptrcheck currently works only on x86-linux and amd64-linux. To use + it, use --tool=exp-ptrcheck. A simple manual is provided, as part + of the main Valgrind documentation. As this is an experimental + tool, we would be particularly interested in hearing about your + experiences with it. + +* exp-Omega, an experimental instantaneous leak-detecting tool, is no + longer built by default, although the code remains in the repository + and the tarball. This is due to three factors: a perceived lack of + users, a lack of maintenance, and concerns that it may not be + possible to achieve reliable operation using the existing design. + +* As usual, support for the latest Linux distros and toolchain + components has been added. It should work well on Fedora Core 10, + OpenSUSE 11.1 and Ubuntu 8.10. gcc-4.4 (in its current pre-release + state) is supported, as is glibc-2.9. The C++ demangler has been + updated so as to work well with C++ compiled by even the most recent + g++'s. + +* You can now use frame-level wildcards in suppressions. This was a + frequently-requested enhancement. A line "..." in a suppression now + matches zero or more frames. This makes it easier to write + suppressions which are precise yet insensitive to changes in + inlining behaviour. + +* 3.4.0 adds support on x86/amd64 for the SSSE3 instruction set. + +* Very basic support for IBM Power6 has been added (64-bit processes only). + +* Valgrind is now cross-compilable. For example, it is possible to + cross compile Valgrind on an x86/amd64-linux host, so that it runs + on a ppc32/64-linux target. + +* You can set the main thread's stack size at startup using the + new --main-stacksize= flag (subject of course to ulimit settings). + This is useful for running apps that need a lot of stack space. + +* The limitation that you can't use --trace-children=yes together + with --db-attach=yes has been removed. + +* The following bugs have been fixed. Note that "n-i-bz" stands for + "not in bugzilla" -- that is, a bug that was reported to us but + never got a bugzilla entry. We encourage you to file bugs in + bugzilla (http://bugs.kde.org/enter_valgrind_bug.cgi) rather than + mailing the developers (or mailing lists) directly. + + n-i-bz Make return types for some client requests 64-bit clean + n-i-bz glibc 2.9 support + n-i-bz ignore unsafe .valgrindrc's (CVE-2008-4865) + n-i-bz MPI_Init(0,0) is valid but libmpiwrap.c segfaults + n-i-bz Building in an env without gdb gives bogus gdb attach + 92456 Tracing the origin of uninitialised memory + 106497 Valgrind does not demangle some C++ template symbols + 162222 ==106497 + 151612 Suppression with "..." (frame-level wildcards in .supp files) + 156404 Unable to start oocalc under memcheck on openSUSE 10.3 (64-bit) + 159285 unhandled syscall:25 (stime, on x86-linux) + 159452 unhandled ioctl 0x8B01 on "valgrind iwconfig" + 160954 ppc build of valgrind crashes with illegal instruction (isel) + 160956 mallinfo implementation, w/ patch + 162092 Valgrind fails to start gnome-system-monitor + 162819 malloc_free_fill test doesn't pass on glibc2.8 x86 + 163794 assertion failure with "--track-origins=yes" + 163933 sigcontext.err and .trapno must be set together + 163955 remove constraint !(--db-attach=yes && --trace-children=yes) + 164476 Missing kernel module loading system calls + 164669 SVN regression: mmap() drops posix file locks + 166581 Callgrind output corruption when program forks + 167288 Patch file for missing system calls on Cell BE + 168943 unsupported scas instruction pentium + 171645 Unrecognised instruction (MOVSD, non-binutils encoding) + 172417 x86->IR: 0x82 ... + 172563 amd64->IR: 0xD9 0xF5 - fprem1 + 173099 .lds linker script generation error + 173177 [x86_64] syscalls: 125/126/179 (capget/capset/quotactl) + 173751 amd64->IR: 0x48 0xF 0x6F 0x45 (even more redundant prefixes) + 174532 == 173751 + 174908 --log-file value not expanded correctly for core file + 175044 Add lookup_dcookie for amd64 + 175150 x86->IR: 0xF2 0xF 0x11 0xC1 (movss non-binutils encoding) + +Developer-visible changes: + +* Valgrind's debug-info reading machinery has been majorly overhauled. + It can now correctly establish the addresses for ELF data symbols, + which is something that has never worked properly before now. + + Also, Valgrind can now read DWARF3 type and location information for + stack and global variables. This makes it possible to use the + framework to build tools that rely on knowing the type and locations + of stack and global variables, for example exp-Ptrcheck. + + Reading of such information is disabled by default, because most + tools don't need it, and because it is expensive in space and time. + However, you can force Valgrind to read it, using the + --read-var-info=yes flag. Memcheck, Helgrind and DRD are able to + make use of such information, if present, to provide source-level + descriptions of data addresses in the error messages they create. + +(3.4.0.RC1: 24 Dec 2008, vex r1878, valgrind r8882). +(3.4.0: 3 Jan 2009, vex r1878, valgrind r8899). + + + +Release 3.3.1 (4 June 2008) +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +3.3.1 fixes a bunch of bugs in 3.3.0, adds support for glibc-2.8 based +systems (openSUSE 11, Fedora Core 9), improves the existing glibc-2.7 +support, and adds support for the SSSE3 (Core 2) instruction set. + +3.3.1 will likely be the last release that supports some very old +systems. In particular, the next major release, 3.4.0, will drop +support for the old LinuxThreads threading library, and for gcc +versions prior to 3.0. + +The fixed bugs are as follows. Note that "n-i-bz" stands for "not in +bugzilla" -- that is, a bug that was reported to us but never got a +bugzilla entry. We encourage you to file bugs in bugzilla +(http://bugs.kde.org/enter_valgrind_bug.cgi) rather than mailing the +developers (or mailing lists) directly -- bugs that are not entered +into bugzilla tend to get forgotten about or ignored. + +n-i-bz Massif segfaults at exit +n-i-bz Memcheck asserts on Altivec code +n-i-bz fix sizeof bug in Helgrind +n-i-bz check fd on sys_llseek +n-i-bz update syscall lists to kernel 2.6.23.1 +n-i-bz support sys_sync_file_range +n-i-bz handle sys_sysinfo, sys_getresuid, sys_getresgid on ppc64-linux +n-i-bz intercept memcpy in 64-bit ld.so's +n-i-bz Fix wrappers for sys_{futimesat,utimensat} +n-i-bz Minor false-error avoidance fixes for Memcheck +n-i-bz libmpiwrap.c: add a wrapper for MPI_Waitany +n-i-bz helgrind support for glibc-2.8 +n-i-bz partial fix for mc_leakcheck.c:698 assert: + 'lc_shadows[i]->data + lc_shadows[i] ... +n-i-bz Massif/Cachegrind output corruption when programs fork +n-i-bz register allocator fix: handle spill stores correctly +n-i-bz add support for PA6T PowerPC CPUs +126389 vex x86->IR: 0xF 0xAE (FXRSTOR) +158525 ==126389 +152818 vex x86->IR: 0xF3 0xAC (repz lodsb) +153196 vex x86->IR: 0xF2 0xA6 (repnz cmpsb) +155011 vex x86->IR: 0xCF (iret) +155091 Warning [...] unhandled DW_OP_ opcode 0x23 +156960 ==155901 +155528 support Core2/SSSE3 insns on x86/amd64 +155929 ms_print fails on massif outputs containing long lines +157665 valgrind fails on shmdt(0) after shmat to 0 +157748 support x86 PUSHFW/POPFW +158212 helgrind: handle pthread_rwlock_try{rd,wr}lock. +158425 sys_poll incorrectly emulated when RES==0 +158744 vex amd64->IR: 0xF0 0x41 0xF 0xC0 (xaddb) +160907 Support for a couple of recent Linux syscalls +161285 Patch -- support for eventfd() syscall +161378 illegal opcode in debug libm (FUCOMPP) +160136 ==161378 +161487 number of suppressions files is limited to 10 +162386 ms_print typo in milliseconds time unit for massif +161036 exp-drd: client allocated memory was never freed +162663 signalfd_wrapper fails on 64bit linux + +(3.3.1.RC1: 2 June 2008, vex r1854, valgrind r8169). +(3.3.1: 4 June 2008, vex r1854, valgrind r8180). + + + +Release 3.3.0 (7 December 2007) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +3.3.0 is a feature release with many significant improvements and the +usual collection of bug fixes. This release supports X86/Linux, +AMD64/Linux, PPC32/Linux and PPC64/Linux. Support for recent distros +(using gcc 4.3, glibc 2.6 and 2.7) has been added. + +The main excitement in 3.3.0 is new and improved tools. Helgrind +works again, Massif has been completely overhauled and much improved, +Cachegrind now does branch-misprediction profiling, and a new category +of experimental tools has been created, containing two new tools: +Omega and DRD. There are many other smaller improvements. In detail: + +- Helgrind has been completely overhauled and works for the first time + since Valgrind 2.2.0. Supported functionality is: detection of + misuses of the POSIX PThreads API, detection of potential deadlocks + resulting from cyclic lock dependencies, and detection of data + races. Compared to the 2.2.0 Helgrind, the race detection algorithm + has some significant improvements aimed at reducing the false error + rate. Handling of various kinds of corner cases has been improved. + Efforts have been made to make the error messages easier to + understand. Extensive documentation is provided. + +- Massif has been completely overhauled. Instead of measuring + space-time usage -- which wasn't always useful and many people found + confusing -- it now measures space usage at various points in the + execution, including the point of peak memory allocation. Its + output format has also changed: instead of producing PostScript + graphs and HTML text, it produces a single text output (via the new + 'ms_print' script) that contains both a graph and the old textual + information, but in a more compact and readable form. Finally, the + new version should be more reliable than the old one, as it has been + tested more thoroughly. + +- Cachegrind has been extended to do branch-misprediction profiling. + Both conditional and indirect branches are profiled. The default + behaviour of Cachegrind is unchanged. To use the new functionality, + give the option --branch-sim=yes. + +- A new category of "experimental tools" has been created. Such tools + may not work as well as the standard tools, but are included because + some people will find them useful, and because exposure to a wider + user group provides tool authors with more end-user feedback. These + tools have a "exp-" prefix attached to their names to indicate their + experimental nature. Currently there are two experimental tools: + + * exp-Omega: an instantaneous leak detector. See + exp-omega/docs/omega_introduction.txt. + + * exp-DRD: a data race detector based on the happens-before + relation. See exp-drd/docs/README.txt. + +- Scalability improvements for very large programs, particularly those + which have a million or more malloc'd blocks in use at once. These + improvements mostly affect Memcheck. Memcheck is also up to 10% + faster for all programs, with x86-linux seeing the largest + improvement. + +- Works well on the latest Linux distros. Has been tested on Fedora + Core 8 (x86, amd64, ppc32, ppc64) and openSUSE 10.3. glibc 2.6 and + 2.7 are supported. gcc-4.3 (in its current pre-release state) is + supported. At the same time, 3.3.0 retains support for older + distros. + +- The documentation has been modestly reorganised with the aim of + making it easier to find information on common-usage scenarios. + Some advanced material has been moved into a new chapter in the main + manual, so as to unclutter the main flow, and other tidying up has + been done. + +- There is experimental support for AIX 5.3, both 32-bit and 64-bit + processes. You need to be running a 64-bit kernel to use Valgrind + on a 64-bit executable. + +- There have been some changes to command line options, which may + affect you: + + * --log-file-exactly and + --log-file-qualifier options have been removed. + + To make up for this --log-file option has been made more powerful. + It now accepts a %p format specifier, which is replaced with the + process ID, and a %q{FOO} format specifier, which is replaced with + the contents of the environment variable FOO. + + * --child-silent-after-fork=yes|no [no] + + Causes Valgrind to not show any debugging or logging output for + the child process resulting from a fork() call. This can make the + output less confusing (although more misleading) when dealing with + processes that create children. + + * --cachegrind-out-file, --callgrind-out-file and --massif-out-file + + These control the names of the output files produced by + Cachegrind, Callgrind and Massif. They accept the same %p and %q + format specifiers that --log-file accepts. --callgrind-out-file + replaces Callgrind's old --base option. + + * Cachegrind's 'cg_annotate' script no longer uses the -- + option to specify the output file. Instead, the first non-option + argument is taken to be the name of the output file, and any + subsequent non-option arguments are taken to be the names of + source files to be annotated. + + * Cachegrind and Callgrind now use directory names where possible in + their output files. This means that the -I option to + 'cg_annotate' and 'callgrind_annotate' should not be needed in + most cases. It also means they can correctly handle the case + where two source files in different directories have the same + name. + +- Memcheck offers a new suppression kind: "Jump". This is for + suppressing jump-to-invalid-address errors. Previously you had to + use an "Addr1" suppression, which didn't make much sense. + +- Memcheck has new flags --malloc-fill= and + --free-fill= which free malloc'd / free'd areas with the + specified byte. This can help shake out obscure memory corruption + problems. The definedness and addressability of these areas is + unchanged -- only the contents are affected. + +- The behaviour of Memcheck's client requests VALGRIND_GET_VBITS and + VALGRIND_SET_VBITS have changed slightly. They no longer issue + addressability errors -- if either array is partially unaddressable, + they just return 3 (as before). Also, SET_VBITS doesn't report + definedness errors if any of the V bits are undefined. + +- The following Memcheck client requests have been removed: + VALGRIND_MAKE_NOACCESS + VALGRIND_MAKE_WRITABLE + VALGRIND_MAKE_READABLE + VALGRIND_CHECK_WRITABLE + VALGRIND_CHECK_READABLE + VALGRIND_CHECK_DEFINED + They were deprecated in 3.2.0, when equivalent but better-named client + requests were added. See the 3.2.0 release notes for more details. + +- The behaviour of the tool Lackey has changed slightly. First, the output + from --trace-mem has been made more compact, to reduce the size of the + traces. Second, a new option --trace-superblocks has been added, which + shows the addresses of superblocks (code blocks) as they are executed. + +- The following bugs have been fixed. Note that "n-i-bz" stands for + "not in bugzilla" -- that is, a bug that was reported to us but + never got a bugzilla entry. We encourage you to file bugs in + bugzilla (http://bugs.kde.org/enter_valgrind_bug.cgi) rather than + mailing the developers (or mailing lists) directly. + + n-i-bz x86_linux_REDIR_FOR_index() broken + n-i-bz guest-amd64/toIR.c:2512 (dis_op2_E_G): Assertion `0' failed. + n-i-bz Support x86 INT insn (INT (0xCD) 0x40 - 0x43) + n-i-bz Add sys_utimensat system call for Linux x86 platform + 79844 Helgrind complains about race condition which does not exist + 82871 Massif output function names too short + 89061 Massif: ms_main.c:485 (get_XCon): Assertion `xpt->max_chi...' + 92615 Write output from Massif at crash + 95483 massif feature request: include peak allocation in report + 112163 MASSIF crashed with signal 7 (SIGBUS) after running 2 days + 119404 problems running setuid executables (partial fix) + 121629 add instruction-counting mode for timing + 127371 java vm giving unhandled instruction bytes: 0x26 0x2E 0x64 0x65 + 129937 ==150380 + 129576 Massif loses track of memory, incorrect graphs + 132132 massif --format=html output does not do html entity escaping + 132950 Heap alloc/usage summary + 133962 unhandled instruction bytes: 0xF2 0x4C 0xF 0x10 + 134990 use -fno-stack-protector if possible + 136382 ==134990 + 137396 I would really like helgrind to work again... + 137714 x86/amd64->IR: 0x66 0xF 0xF7 0xC6 (maskmovq, maskmovdq) + 141631 Massif: percentages don't add up correctly + 142706 massif numbers don't seem to add up + 143062 massif crashes on app exit with signal 8 SIGFPE + 144453 (get_XCon): Assertion 'xpt->max_children != 0' failed. + 145559 valgrind aborts when malloc_stats is called + 145609 valgrind aborts all runs with 'repeated section!' + 145622 --db-attach broken again on x86-64 + 145837 ==149519 + 145887 PPC32: getitimer() system call is not supported + 146252 ==150678 + 146456 (update_XCon): Assertion 'xpt->curr_space >= -space_delta'... + 146701 ==134990 + 146781 Adding support for private futexes + 147325 valgrind internal error on syscall (SYS_io_destroy, 0) + 147498 amd64->IR: 0xF0 0xF 0xB0 0xF (lock cmpxchg %cl,(%rdi)) + 147545 Memcheck: mc_main.c:817 (get_sec_vbits8): Assertion 'n' failed. + 147628 SALC opcode 0xd6 unimplemented + 147825 crash on amd64-linux with gcc 4.2 and glibc 2.6 (CFI) + 148174 Incorrect type of freed_list_volume causes assertion [...] + 148447 x86_64 : new NOP codes: 66 66 66 66 2e 0f 1f + 149182 PPC Trap instructions not implemented in valgrind + 149504 Assertion hit on alloc_xpt->curr_space >= -space_delta + 149519 ppc32: V aborts with SIGSEGV on execution of a signal handler + 149892 ==137714 + 150044 SEGV during stack deregister + 150380 dwarf/gcc interoperation (dwarf3 read problems) + 150408 ==148447 + 150678 guest-amd64/toIR.c:3741 (dis_Grp5): Assertion `sz == 4' failed + 151209 V unable to execute programs for users with UID > 2^16 + 151938 help on --db-command= misleading + 152022 subw $0x28, %%sp causes assertion failure in memcheck + 152357 inb and outb not recognized in 64-bit mode + 152501 vex x86->IR: 0x27 0x66 0x89 0x45 (daa) + 152818 vex x86->IR: 0xF3 0xAC 0xFC 0x9C (rep lodsb) + +Developer-visible changes: + +- The names of some functions and types within the Vex IR have + changed. Run 'svn log -r1689 VEX/pub/libvex_ir.h' for full details. + Any existing standalone tools will have to be updated to reflect + these changes. The new names should be clearer. The file + VEX/pub/libvex_ir.h is also much better commented. + +- A number of new debugging command line options have been added. + These are mostly of use for debugging the symbol table and line + number readers: + + --trace-symtab-patt= limit debuginfo tracing to obj name + --trace-cfi=no|yes show call-frame-info details? [no] + --debug-dump=syms mimic /usr/bin/readelf --syms + --debug-dump=line mimic /usr/bin/readelf --debug-dump=line + --debug-dump=frames mimic /usr/bin/readelf --debug-dump=frames + --sym-offsets=yes|no show syms in form 'name+offset' ? [no] + +- Internally, the code base has been further factorised and + abstractified, particularly with respect to support for non-Linux + OSs. + +(3.3.0.RC1: 2 Dec 2007, vex r1803, valgrind r7268). +(3.3.0.RC2: 5 Dec 2007, vex r1804, valgrind r7282). +(3.3.0.RC3: 9 Dec 2007, vex r1804, valgrind r7288). +(3.3.0: 10 Dec 2007, vex r1804, valgrind r7290). + + + Release 3.2.3 (29 Jan 2007) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Unfortunately 3.2.2 introduced a regression which can cause an @@ -16,10 +1102,9 @@ Release 3.2.2 (22 Jan 2007) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3.2.2 fixes a bunch of bugs in 3.2.1, adds support for glibc-2.5 based -systems (openSUSE 10.2, Fedora Core 6), further reduces memcheck's -false error rate on x86/amd64, improves support for icc-9.X compiled -code, and brings modest performance improvements in some areas, -including amd64 floating point, powerpc support, and startup +systems (openSUSE 10.2, Fedora Core 6), improves support for icc-9.X +compiled code, and brings modest performance improvements in some +areas, including amd64 floating point, powerpc support, and startup responsiveness on all targets. The fixed bugs are as follows. Note that "n-i-bz" stands for "not in Index: README =========================================================================== --- README 2012/05/26 10:11:27 #1 +++ README 2012/05/26 10:11:27 @@ -9,50 +9,39 @@ For instructions on how to build/install, see the end of this file. -Valgrind works on most, reasonably recent Linux setups. If you have -problems, consult FAQ.txt to see if there are workarounds. +If you have problems, consult the FAQ to see if there are workarounds. + Executive Summary ~~~~~~~~~~~~~~~~~ -Valgrind is an award-winning suite of tools for debugging and profiling -Linux programs. With the tools that come with Valgrind, you can -automatically detect many memory management and threading bugs, avoiding -hours of frustrating bug-hunting, making your programs more stable. You can -also perform detailed profiling, to speed up and reduce memory use of your -programs. +Valgrind is an award-winning instrumentation framework for building +dynamic analysis tools. There are Valgrind tools that can automatically +detect many memory management and threading bugs, and profile your +programs in detail. You can also use Valgrind to build new tools. -The Valgrind distribution currently includes four tools: a memory error -detector, a thread error detector, a cache profiler and a heap profiler. +The Valgrind distribution currently includes six production-quality +tools: a memory error detector, two thread error detectors, a cache and +branch-prediction profiler, a call-graph generating cache profiler, and +a heap profiler. It also includes two experimental tools: a +heap/stack/global array overrun detector, and a SimPoint basic block vector +generator. -To give you an idea of what Valgrind tools do, when a program is run -under the supervision of Memcheck, the memory error detector tool, all -reads and writes of memory are checked, and calls to malloc/new/free/delete -are intercepted. As a result, Memcheck can detect if your program: +Valgrind is closely tied to details of the CPU, operating system and to +a lesser extent, compiler and basic C libraries. This makes it difficult +to make it portable. Nonetheless, it is available for the following +platforms: - - Accesses memory it shouldn't (areas not yet allocated, areas that have - been freed, areas past the end of heap blocks, inaccessible areas of - the stack). +- x86/Linux +- AMD64/Linux +- PPC32/Linux +- PPC64/Linux +- x86/MacOSX +- AMD64/MacOSX - - Uses uninitialised values in dangerous ways. - - - Leaks memory. - - - Does bad frees of heap blocks (double frees, mismatched frees). - - - Passes overlapping source and destination memory blocks to memcpy() and - related functions. - -Problems like these can be difficult to find by other means, often -lying undetected for long periods, then causing occasional, -difficult-to-diagnose crashes. When one of these errors occurs, you can -attach GDB to your program, so you can poke around and see what's going -on. +Note that AMD64 is just another name for x86-64, and Valgrind runs fine +on Intel processors. Also note that the core of MacOSX is called +"Darwin" and this name is used sometimes. -Valgrind is closely tied to details of the CPU, operating system and -to a less extent, compiler and basic C libraries. This makes it -difficult to make it portable. Nonetheless, it is available for -the following platforms: x86/Linux, AMD64/Linux and PPC32/Linux. - Valgrind is licensed under the GNU General Public License, version 2. Read the file COPYING in the source distribution for details. @@ -80,13 +69,12 @@ To install from a tar.bz2 distribution: - 4. Run ./configure, with some options if you wish. The standard - options are documented in the INSTALL file. The only interesting + 4. Run ./configure, with some options if you wish. The only interesting one is the usual --prefix=/where/you/want/it/installed. - 5. Do "make". + 5. Run "make". - 6. Do "make install", possibly as root if the destination permissions + 6. Run "make install", possibly as root if the destination permissions require that. 7. See if it works. Try "valgrind ls -l". Either this works, or it Index: README_DEVELOPERS =========================================================================== --- README_DEVELOPERS 2012/05/26 10:11:27 #1 +++ README_DEVELOPERS 2012/05/26 10:11:27 @@ -10,6 +10,8 @@ This allows you to compile and run with "make" instead of "make install", saving you time. +Or, you can use the 'vg-in-place' script which does that for you. + I recommend compiling with "make --quiet" to further reduce the amount of output spewed out during compilation, letting you actually see any errors, warnings, etc. @@ -65,13 +67,21 @@ a particular tool) requires a bit more trickery but can be achieved without too much problem by following these steps: -(1) Set VALGRIND_LAUNCHER to /bin/valgrind: +(1) Set VALGRIND_LAUNCHER to point to the valgrind executable. Eg: + + export VALGRIND_LAUNCHER=/usr/local/bin/valgrind + + or for an uninstalled version in a source directory $DIR: + + export VALGRIND_LAUNCHER=$DIR/coregrind/valgrind + +(2) Run gdb on the tool executable. Eg: - export VALGRIND_LAUNCHER=/usr/local/bin/valgrind + gdb /usr/local/lib/valgrind/ppc32-linux/lackey -(2) Run "gdb /lib/valgrind//": + or - gdb /usr/local/lib/valgrind/ppc32-linux/lackey + gdb $DIR/.in_place/x86-linux/memcheck (3) Do "handle SIGSEGV SIGILL nostop noprint" in GDB to prevent GDB from stopping on a SIGSEGV or SIGILL: @@ -88,30 +98,32 @@ (gdb) run pwd +Steps (1)--(3) can be put in a .gdbinit file, but any directory names must +be fully expanded (ie. not an environment variable). + Self-hosting ~~~~~~~~~~~~ To run Valgrind under Valgrind: -(1) Check out 2 trees, "inner" and "outer". "inner" runs the app - directly and is what you will be profiling. "outer" does the - profiling. +(1) Check out 2 trees, "Inner" and "Outer". Inner runs the app + directly. Outer runs Inner. (2) Configure inner with --enable-inner and build/install as usual. -(3) Configure outer normally and build/install as usual. +(3) Configure Outer normally and build/install as usual. (4) Choose a very simple program (date) and try outer/.../bin/valgrind --sim-hints=enable-outer --trace-children=yes \ --tool=cachegrind -v inner/.../bin/valgrind --tool=none -v prog -If you omit the --trace-children=yes, you'll only monitor inner's launcher +If you omit the --trace-children=yes, you'll only monitor Inner's launcher program, not its stage2. The whole thing is fragile, confusing and slow, but it does work well enough -for you to get some useful performance data. The inner Valgrind has most of +for you to get some useful performance data. Inner has most of its output (ie. those lines beginning with "====") prefixed with a '>', which helps a lot. @@ -119,8 +131,8 @@ so Memcheck is not as useful as it could be. It also has not been tested much, so don't be surprised if you hit problems. -When using self-hosting with an outer callgrind tool, use '--pop-on-jump' -(on the outer). Otherwise, callgrind has much higher memory requirements. +When using self-hosting with an outer Callgrind tool, use '--pop-on-jump' +(on the outer). Otherwise, Callgrind has much higher memory requirements. Printing out problematic blocks Index: README_FREEBSD =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- README_FREEBSD Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,14 ---- + So, install ports for autoconf, automake and gmake. + $ sh autogen.sh + $ ./configure --prefix=/where/ever + $ gmake + $ gmake install + + Sun Aug 19 20:26:48 UTC 2007 PS_STRINGS + Valgrind barfs all over the place on setproctitle. + + This also manifests itself in a corrupted environment in + children of child processes when --trace-children=yes is used. + + To cope correctly Valgrind must install the modified argv/envp + pointers in the ps-strings area, and mark it as accessible. Index: README_MISSING_SYSCALL_OR_IOCTL =========================================================================== --- README_MISSING_SYSCALL_OR_IOCTL 2012/05/26 10:11:27 #1 +++ README_MISSING_SYSCALL_OR_IOCTL 2012/05/26 10:11:27 @@ -44,38 +44,47 @@ The syscall wrapper for time() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Removing the debug printing clutter, it looks like this: +The wrapper for the time system call looks like this: - PRE(time) + PRE(sys_time) { /* time_t time(time_t *t); */ - PRINT("time ( %p )",arg1); - if (arg1 != (UWord)NULL) { - PRE_MEM_WRITE( "time", arg1, sizeof(time_t) ); + PRINT("sys_time ( %p )",ARG1); + PRE_REG_READ1(long, "time", int *, t); + if (ARG1 != 0) { + PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) ); } } - POST(time) + POST(sys_time) { - if (arg1 != (UWord)NULL) { - POST_MEM_WRITE( arg1, sizeof(vki_time_t) ); + if (ARG1 != 0) { + POST_MEM_WRITE( ARG1, sizeof(vki_time_t) ); } } -The first thing we do happens before the syscall occurs, in the PRE() function: -if a non-NULL buffer is passed in as the argument, tell the tool that the +The first thing we do happens before the syscall occurs, in the PRE() function. +The PRE() function typically starts with invoking to the PRINT() macro. This +PRINT() macro implements support for the --trace-syscalls command line option. +Next, the tool is told the return type of the syscall, that the syscall has +one argument, the type of the syscall argument and that the argument is being +read from a register: + + PRE_REG_READ1(long, "time", int *, t); + +Next, if a non-NULL buffer is passed in as the argument, tell the tool that the buffer is about to be written to: - if (arg1 != (UWord)NULL) { - PRE_MEM_WRITE( "time", arg1, sizeof(vki_time_t) ); + if (ARG1 != 0) { + PRE_MEM_WRITE( "time", ARG1, sizeof(vki_time_t) ); } Finally, the really important bit, after the syscall occurs, in the POST() function: if, and only if, the system call was successful, tell the tool that the memory was written: - if (arg1 != (UInt)NULL) { - POST_MEM_WRITE( arg1, sizeof(vki_time_t) ); + if (ARG1 != 0) { + POST_MEM_WRITE( ARG1, sizeof(vki_time_t) ); } The POST() function won't be called if the syscall failed, so you @@ -101,7 +110,7 @@ grep NNN /usr/include/asm/unistd.h This should tell you something like __NR_mysyscallname. - Copy this entry to coregrind/vki_unistd-$(VG_PLATFORM).h. + Copy this entry to include/vki/vki-scnums-$(VG_PLATFORM).h. 2. Do 'man 2 mysyscallname' to get some idea of what the syscall @@ -134,8 +143,7 @@ dependant ones (in syswrap-$(PLATFORM)-linux.c). The *XY variant if it requires a PRE() and POST() function, and the *X_ variant if it only requires a PRE() - function. The 2nd arg of these macros indicate if the syscall - could possibly block. + function. If you find this difficult, read the wrappers for other syscalls for ideas. A good tip is to look for the wrapper for a syscall Index: README_PACKAGERS =========================================================================== --- README_PACKAGERS 2012/05/26 10:11:27 #1 +++ README_PACKAGERS 2012/05/26 10:11:27 @@ -2,8 +2,30 @@ Greetings, packaging person! This information is aimed at people building binary distributions of Valgrind. -Thanks for taking the time and effort to make a binary distribution -of Valgrind. The following notes may save you some trouble. +Thanks for taking the time and effort to make a binary distribution of +Valgrind. The following notes may save you some trouble. + + +-- Do not ship your Linux distro with a completely stripped + /lib/ld.so. At least leave the debugging symbol names on -- line + number info isn't necessary. If you don't want to leave symbols on + ld.so, alternatively you can have your distro install ld.so's + debuginfo package by default, or make ld.so.debuginfo be a + requirement of your Valgrind RPM/DEB/whatever. + + Reason for this is that Valgrind's Memcheck tool needs to intercept + calls to, and provide replacements for, some symbols in ld.so at + startup (most importantly strlen). If it cannot do that, Memcheck + shows a large number of false positives due to the highly optimised + strlen (etc) routines in ld.so. This has caused some trouble in + the past. As of version 3.3.0, on some targets (ppc32-linux, + ppc64-linux), Memcheck will simply stop at startup (and print an + error message) if such symbols are not present, because it is + infeasible to continue. + + It's not like this is going to cost you much space. We only need + the symbols for ld.so (a few K at most). Not the debug info and + not any debuginfo or extra symbols for any other libraries. -- (Unfortunate but true) When you configure to build with the @@ -16,9 +38,10 @@ So you can't build a relocatable RPM / whatever from Valgrind. --- Don't strip the debug info off stage2 or libpthread.so. - Valgrind will still work if you do, but it will generate less - helpful error messages. Here's an example: +-- Don't strip the debug info off lib/valgrind/$platform/vgpreload*.so + in the installation tree. Either Valgrind won't work at all, or it + will still work if you do, but will generate less helpful error + messages. Here's an example: Mismatched free() / delete / delete [] at 0x40043249: free (vg_clientfuncs.c:171) @@ -32,15 +55,15 @@ by 0x4C21788F: OLEFilter::convert(QCString const &) (olefilter.cc:272) This tells you that some memory allocated with new[] was freed with - free(). If stage2 was stripped the message would look like this: + free(). Mismatched free() / delete / delete [] - at 0x40043249: (inside stage2) + at 0x40043249: (inside vgpreload_memcheck.so) by 0x4102BB4E: QGArray::~QGArray(void) (tools/qgarray.cpp:149) by 0x4C261C41: PptDoc::~PptDoc(void) (include/qmemarray.h:60) by 0x4C261F0E: PptXml::~PptXml(void) (pptxml.cc:44) Address 0x4BB292A8 is 0 bytes inside a block of size 64 alloc'd - at 0x4004318C: (inside stage2) + at 0x4004318C: (inside vgpreload_memcheck.so) by 0x4C21BC15: KLaola::readSBStream(int) const (klaola.cc:314) by 0x4C21C155: KLaola::stream(KLaola::OLENode const *) (klaola.cc:416) by 0x4C21788F: OLEFilter::convert(QCString const &) (olefilter.cc:272) @@ -51,15 +74,21 @@ from valgrind. --- Please test the final installation works by running it on - something huge. I suggest checking that it can start and - exit successfully both Mozilla-1.0 and OpenOffice.org 1.0. - I use these as test programs, and I know they fairly thoroughly - exercise Valgrind. The command lines to use are: +-- Don't strip symbols from lib/valgrind/* in the installation tree. + Doing so will likely cause problems. Removing the line number info is + probably OK (at least for some of the files in that directory), although + that has not been tested by the Valgrind developers. + + +-- Please test the final installation works by running it on something + huge. I suggest checking that it can start and exit successfully + both Firefox and OpenOffice.org. I use these as test programs, and I + know they fairly thoroughly exercise Valgrind. The command lines to use + are: - valgrind -v --trace-children=yes --workaround-gcc296-bugs=yes mozilla + valgrind -v --trace-children=yes firefox - valgrind -v --trace-children=yes --workaround-gcc296-bugs=yes soffice + valgrind -v --trace-children=yes soffice If you find any more hints/tips for packaging, please report Index: VEX/LICENSE.README =========================================================================== --- VEX/LICENSE.README 2012/05/26 10:11:27 #1 +++ VEX/LICENSE.README 2012/05/26 10:11:27 @@ -2,7 +2,7 @@ This directory and its children contain LibVEX, a library for dynamic binary instrumentation and translation. -Copyright (C) 2004-2007 OpenWorks LLP. All rights reserved. +Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. This library is made available under a dual licensing scheme. ==== //depot/vendor/valgrind/VEX/Makefile#1 - === Index: VEX/Makefile-gcc =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/Makefile-gcc Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,321 ---- + + PUB_HEADERS = pub/libvex_basictypes.h \ + pub/libvex_ir.h \ + pub/libvex.h \ + pub/libvex_trc_values.h \ + pub/libvex_emwarn.h \ + pub/libvex_guest_x86.h \ + pub/libvex_guest_amd64.h \ + pub/libvex_guest_arm.h \ + pub/libvex_guest_ppc32.h \ + pub/libvex_guest_ppc64.h \ + pub/libvex_guest_offsets.h + + PRIV_HEADERS = priv/host_x86_defs.h \ + priv/host_amd64_defs.h \ + priv/host_arm_defs.h \ + priv/host_ppc_defs.h \ + priv/host_generic_regs.h \ + priv/host_generic_simd64.h \ + priv/main_globals.h \ + priv/main_util.h \ + priv/guest_generic_x87.h \ + priv/guest_generic_bb_to_IR.h \ + priv/guest_x86_defs.h \ + priv/guest_amd64_defs.h \ + priv/guest_arm_defs.h \ + priv/guest_ppc_defs.h \ + priv/ir_match.h \ + priv/ir_opt.h + + LIB_OBJS = priv/ir_defs.o \ + priv/ir_match.o \ + priv/ir_opt.o \ + priv/main_main.o \ + priv/main_globals.o \ + priv/main_util.o \ + priv/host_x86_defs.o \ + priv/host_amd64_defs.o \ + priv/host_arm_defs.o \ + priv/host_ppc_defs.o \ + priv/host_x86_isel.o \ + priv/host_amd64_isel.o \ + priv/host_arm_isel.o \ + priv/host_ppc_isel.o \ + priv/host_generic_regs.o \ + priv/host_generic_simd64.o \ + priv/host_generic_reg_alloc2.o \ + priv/guest_generic_x87.o \ + priv/guest_generic_bb_to_IR.o \ + priv/guest_x86_helpers.o \ + priv/guest_amd64_helpers.o \ + priv/guest_arm_helpers.o \ + priv/guest_ppc_helpers.o \ + priv/guest_x86_toIR.o \ + priv/guest_amd64_toIR.o \ + priv/guest_arm_toIR.o \ + priv/guest_ppc_toIR.o + + PUB_INCLUDES = -Ipub + + # Do not add any priv/host-ARCH or priv/guest-ARCH directories to this + # list, as they contain duplicate file names (each host has a hdefs.h, + # for example). + PRIV_INCLUDES = -Ipriv + + + ifndef CC + CC = gcc + endif + ifndef AR + AR = ar + endif + + # Put -g -O2 after any flags we inherit from V. -O2 vs -O + # makes a significant difference, at least with gcc4. + CCFLAGS = -Wall -Wmissing-prototypes -Wshadow \ + -Wpointer-arith -Wbad-function-cast -Wcast-qual \ + -Wcast-align -Wmissing-declarations \ + $(EXTRA_CFLAGS) -g -O2 -fstrict-aliasing + + #CC = icc + #CCFLAGS = -g -Wall -wd981 -wd279 -wd1287 -wd869 -wd111 -wd188 -wd186 + # 981: operands are evaluated in unspecified order + # 279: controlling expression is constant + # 1287: invalid attribute for parameter + # 869: parameter "..." was never referenced + # 111: statement is unreachable + # 188: enumerated type mixed with another type + # (the above are for icc 8.0 -- 8.0.0.55 I think) + # 186: pointless comparison of unsigned integer with zero + + # kludge: stops V biarch builds screwing up at -j 2 or above + # The Right fix is to autoconf/automake-ise vex. + .NOTPARALLEL: + + all: vex + + # Empty, needed for Valgrind + install: + + scratch: clean version all + + vex: libvex.a test_main.o + $(CC) $(CCFLAGS) -o vex test_main.o libvex.a + + libvex.a: $(LIB_OBJS) + rm -f libvex.a + $(AR) crus libvex.a $(LIB_OBJS) + + + # The idea with these TAG-s is to mark the flavour of libvex.a + # most recently built, so if the same target is re-requested, we + # don't rebuild everything, but if a different one is requested + # then we scrub everything and start over. + + libvex-x86-linux.a: TAG-x86-linux libvex.a + mv -f libvex.a libvex-x86-linux.a + TAG-x86-linux: + if [ ! -f TAG-x86-linux ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-x86-linux + + libvex-amd64-linux.a: TAG-amd64-linux libvex.a + mv -f libvex.a libvex-amd64-linux.a + TAG-amd64-linux: + if [ ! -f TAG-amd64-linux ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-amd64-linux + + libvex-ppc32-linux.a: TAG-ppc32-linux libvex.a + mv -f libvex.a libvex-ppc32-linux.a + TAG-ppc32-linux: + if [ ! -f TAG-ppc32-linux ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-ppc32-linux + + libvex-ppc64-linux.a: TAG-ppc64-linux libvex.a + mv -f libvex.a libvex-ppc64-linux.a + TAG-ppc64-linux: + if [ ! -f TAG-ppc64-linux ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-ppc64-linux + + libvex-ppc32-aix5.a: TAG-ppc32-aix5 libvex.a + mv -f libvex.a libvex-ppc32-aix5.a + TAG-ppc32-aix5: + if [ ! -f TAG-ppc32-aix5 ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-ppc32-aix5 + + libvex-ppc64-aix5.a: TAG-ppc64-aix5 libvex.a + mv -f libvex.a libvex-ppc64-aix5.a + TAG-ppc64-aix5: + if [ ! -f TAG-ppc64-aix5 ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-ppc64-aix5 + + libvex-x86-darwin.a: TAG-x86-darwin libvex.a + mv -f libvex.a libvex-x86-darwin.a + TAG-x86-darwin: + if [ ! -f TAG-x86-darwin ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-x86-darwin + + libvex-amd64-darwin.a: TAG-amd64-darwin libvex.a + mv -f libvex.a libvex-amd64-darwin.a + TAG-amd64-darwin: + if [ ! -f TAG-amd64-darwin ] ; then rm -f $(LIB_OBJS) TAG-* libvex.a ; fi + touch TAG-amd64-darwin + + + # This doesn't get rid of priv/main/vex_svnversion.h, because + # that can't be regenerated in the final Valgrind tarball, and + # so if 'make clean' did get rid of it, then in the tarball, + # doing 'make ; make clean ; make' (or distclean) would fail. + clean: + rm -f $(LIB_OBJS) *.a vex test_main.o TAG-* \ + pub/libvex_guest_offsets.h \ + auxprogs/genoffsets.s + + version: + rm -f priv/main/vex_svnversion.h + cat quote.txt >> priv/main/vex_svnversion.h + svnversion -n . >> priv/main/vex_svnversion.h + cat quote.txt >> priv/main/vex_svnversion.h + cat newline.txt >> priv/main/vex_svnversion.h + + minidist: version + rm -f vex--minidist-2005MMDD.tar + tar cf vex--minidist-2005MMDD.tar $(PUB_HEADERS) $(PRIV_HEADERS) \ + priv/main/vex_svnversion.h \ + test_main.c test_main.h \ + Makefile \ + `echo $(LIB_OBJS) | sed "s/\.o/\.c/g"` + @echo + @echo minidist done, size and svnversion follow: + @ls -l vex--minidist-2005MMDD.tar + @cat priv/main/vex_svnversion.h + @echo + + # This is very uggerly. Need to sed out both "xyzzyN" and + # "xyzzy$N" since gcc on different targets emits the constants + # differently -- with a leading $ on x86/amd64 but none on ppc32/64. + pub/libvex_guest_offsets.h: + rm -f auxprogs/genoffsets.s + $(CC) $(CCFLAGS) -O -S -o auxprogs/genoffsets.s \ + auxprogs/genoffsets.c + grep xyzzy auxprogs/genoffsets.s | grep define \ + | sed "s/xyzzy\\$$//g" | sed "s/xyzzy//g" \ + > pub/libvex_guest_offsets.h + rm -f auxprogs/genoffsets.s + + + ALL_HEADERS = $(PUB_HEADERS) $(PRIV_HEADERS) + ALL_INCLUDES = $(PUB_INCLUDES) $(PRIV_INCLUDES) + + test_main.o: $(PUB_HEADERS) test_main.c test_main.h + $(CC) $(CCFLAGS) $(PUB_INCLUDES) -o test_main.o \ + -c test_main.c + + priv/ir_defs.o: $(ALL_HEADERS) priv/ir_defs.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/ir_defs.o \ + -c priv/ir_defs.c + + priv/ir_match.o: $(ALL_HEADERS) priv/ir_match.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/ir_match.o \ + -c priv/ir_match.c + + priv/ir_opt.o: $(ALL_HEADERS) priv/ir_opt.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/ir_opt.o \ + -c priv/ir_opt.c + + priv/main_main.o: $(ALL_HEADERS) priv/main_main.c \ + priv/main/vex_svnversion.h + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/main_main.o \ + -c priv/main_main.c + + priv/main_globals.o: $(ALL_HEADERS) priv/main_globals.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/main_globals.o \ + -c priv/main_globals.c + + priv/main_util.o: $(ALL_HEADERS) priv/main_util.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/main_util.o \ + -c priv/main_util.c + + priv/host_x86_defs.o: $(ALL_HEADERS) priv/host_x86_defs.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_x86_defs.o \ + -c priv/host_x86_defs.c + + priv/host_amd64_defs.o: $(ALL_HEADERS) priv/host_amd64_defs.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_amd64_defs.o \ + -c priv/host_amd64_defs.c + + priv/host_arm_defs.o: $(ALL_HEADERS) priv/host_arm_defs.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_arm_defs.o \ + -c priv/host_arm_defs.c + + priv/host_ppc_defs.o: $(ALL_HEADERS) priv/host_ppc_defs.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_ppc_defs.o \ + -c priv/host_ppc_defs.c + + priv/host_x86_isel.o: $(ALL_HEADERS) priv/host_x86_isel.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_x86_isel.o \ + -c priv/host_x86_isel.c + + priv/host_amd64_isel.o: $(ALL_HEADERS) priv/host_amd64_isel.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_amd64_isel.o \ + -c priv/host_amd64_isel.c + + priv/host_arm_isel.o: $(ALL_HEADERS) priv/host_arm_isel.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_arm_isel.o \ + -c priv/host_arm_isel.c + + priv/host_ppc_isel.o: $(ALL_HEADERS) priv/host_ppc_isel.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_ppc_isel.o \ + -c priv/host_ppc_isel.c + + priv/host_generic_regs.o: $(ALL_HEADERS) priv/host_generic_regs.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_generic_regs.o \ + -c priv/host_generic_regs.c + + priv/host_generic_simd64.o: $(ALL_HEADERS) priv/host_generic_simd64.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_generic_simd64.o \ + -c priv/host_generic_simd64.c + + priv/host_generic_reg_alloc2.o: $(ALL_HEADERS) priv/host_generic_reg_alloc2.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/host_generic_reg_alloc2.o \ + -c priv/host_generic_reg_alloc2.c + + priv/guest_x86_toIR.o: $(ALL_HEADERS) priv/guest_x86_toIR.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_x86_toIR.o \ + -c priv/guest_x86_toIR.c + + priv/guest_generic_x87.o: $(ALL_HEADERS) priv/guest_generic_x87.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_generic_x87.o \ + -c priv/guest_generic_x87.c + + priv/guest_generic_bb_to_IR.o: $(ALL_HEADERS) priv/guest_generic_bb_to_IR.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_generic_bb_to_IR.o \ + -c priv/guest_generic_bb_to_IR.c + + priv/guest_x86_helpers.o: $(ALL_HEADERS) priv/guest_x86_helpers.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_x86_helpers.o \ + -c priv/guest_x86_helpers.c + + priv/guest_amd64_helpers.o: $(ALL_HEADERS) priv/guest_amd64_helpers.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_amd64_helpers.o \ + -c priv/guest_amd64_helpers.c + + priv/guest_amd64_toIR.o: $(ALL_HEADERS) priv/guest_amd64_toIR.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_amd64_toIR.o \ + -c priv/guest_amd64_toIR.c + + priv/guest_arm_helpers.o: $(ALL_HEADERS) priv/guest_arm_helpers.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_arm_helpers.o \ + -c priv/guest_arm_helpers.c + + priv/guest_arm_toIR.o: $(ALL_HEADERS) priv/guest_arm_toIR.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_arm_toIR.o \ + -c priv/guest_arm_toIR.c + + priv/guest_ppc_helpers.o: $(ALL_HEADERS) priv/guest_ppc_helpers.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_ppc_helpers.o \ + -c priv/guest_ppc_helpers.c + + priv/guest_ppc_toIR.o: $(ALL_HEADERS) priv/guest_ppc_toIR.c + $(CC) $(CCFLAGS) $(ALL_INCLUDES) -o priv/guest_ppc_toIR.o \ + -c priv/guest_ppc_toIR.c Index: VEX/auxprogs/genoffsets.c =========================================================================== --- VEX/auxprogs/genoffsets.c 2012/05/26 10:11:27 #1 +++ VEX/auxprogs/genoffsets.c 2012/05/26 10:11:27 @@ -10,7 +10,7 @@ This file is part of LibVEX, a library for dynamic binary instrumentation and translation. - Copyright (C) 2004-2007 OpenWorks LLP. All rights reserved. + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. This library is made available under a dual licensing scheme. @@ -46,7 +46,15 @@ #include -/* A program which generates various guest state offsets. */ +/* A program which, when compiled to assembly, exposes various guest + state offsets. The program isn't executed, since that breaks + cross-compilation. + + It does rely on the assumption that 'my_offsetof(Ty,Field)' is + folded to a constant at a compile time, which seems a bit dodgy + to me. On gcc4 it is possible to use __builtin_offsetof, which + sounds safer, but that doesn't exist on older gccs. Oh Well. +*/ #include "../pub/libvex_basictypes.h" #include "../pub/libvex_guest_x86.h" @@ -54,150 +62,93 @@ #include "../pub/libvex_guest_ppc32.h" #include "../pub/libvex_guest_ppc64.h" -Int main ( void ) -{ - // x86 - printf("#define OFFSET_x86_EAX %3d\n", - offsetof(VexGuestX86State,guest_EAX)); +#define VG_STRINGIFZ(__str) #__str +#define VG_STRINGIFY(__str) VG_STRINGIFZ(__str) - printf("#define OFFSET_x86_EBX %3d\n", - offsetof(VexGuestX86State,guest_EBX)); +#define my_offsetof(__type,__field) (&((__type*)0)->__field) - printf("#define OFFSET_x86_ECX %3d\n", - offsetof(VexGuestX86State,guest_ECX)); +/* This forces gcc to evaluate the my_offsetof call at compile time, + and then emits it in the assembly, along with the nonsense string + "xyzzy", for easy greppability. Once this file is compiled to + assembly, the lines containing "xyzzy" are grepped out and sed-ed + to produce the final result. See the Makefile rule for + pub/libvex_guest_offsets.h. */ +#define GENOFFSET(_structUppercase,_structLowercase,_fieldname) \ + __asm__ __volatile__ ( \ + "\n#define OFFSET_" \ + VG_STRINGIFY(_structLowercase) "_" \ + VG_STRINGIFY(_fieldname) \ + " xyzzy%0\n" : /*out*/ \ + : /*in*/ "n" \ + (my_offsetof(VexGuest##_structUppercase##State, \ + guest_##_fieldname)) \ + ) - printf("#define OFFSET_x86_EDX %3d\n", - offsetof(VexGuestX86State,guest_EDX)); +void foo ( void ); +__attribute__((noinline)) +void foo ( void ) +{ + // x86 + GENOFFSET(X86,x86,EAX); + GENOFFSET(X86,x86,EBX); + GENOFFSET(X86,x86,ECX); + GENOFFSET(X86,x86,EDX); + GENOFFSET(X86,x86,ESI); + GENOFFSET(X86,x86,EDI); + GENOFFSET(X86,x86,EBP); + GENOFFSET(X86,x86,ESP); + GENOFFSET(X86,x86,EIP); + GENOFFSET(X86,x86,CS); + GENOFFSET(X86,x86,DS); + GENOFFSET(X86,x86,ES); + GENOFFSET(X86,x86,FS); + GENOFFSET(X86,x86,GS); + GENOFFSET(X86,x86,SS); - printf("#define OFFSET_x86_ESI %3d\n", - offsetof(VexGuestX86State,guest_ESI)); + // amd64 + GENOFFSET(AMD64,amd64,RAX); + GENOFFSET(AMD64,amd64,RBX); + GENOFFSET(AMD64,amd64,RCX); + GENOFFSET(AMD64,amd64,RDX); + GENOFFSET(AMD64,amd64,RSI); + GENOFFSET(AMD64,amd64,RDI); + GENOFFSET(AMD64,amd64,RSP); + GENOFFSET(AMD64,amd64,RBP); + GENOFFSET(AMD64,amd64,R8); + GENOFFSET(AMD64,amd64,R9); + GENOFFSET(AMD64,amd64,R10); + GENOFFSET(AMD64,amd64,R11); + GENOFFSET(AMD64,amd64,R12); + GENOFFSET(AMD64,amd64,R13); + GENOFFSET(AMD64,amd64,R14); + GENOFFSET(AMD64,amd64,R15); + GENOFFSET(AMD64,amd64,RIP); - printf("#define OFFSET_x86_EDI %3d\n", - offsetof(VexGuestX86State,guest_EDI)); + // ppc32 + GENOFFSET(PPC32,ppc32,GPR0); + GENOFFSET(PPC32,ppc32,GPR2); + GENOFFSET(PPC32,ppc32,GPR3); + GENOFFSET(PPC32,ppc32,GPR4); + GENOFFSET(PPC32,ppc32,GPR5); + GENOFFSET(PPC32,ppc32,GPR6); + GENOFFSET(PPC32,ppc32,GPR7); + GENOFFSET(PPC32,ppc32,GPR8); + GENOFFSET(PPC32,ppc32,GPR9); + GENOFFSET(PPC32,ppc32,GPR10); + GENOFFSET(PPC32,ppc32,CIA); + GENOFFSET(PPC32,ppc32,CR0_0); - printf("#define OFFSET_x86_EBP %3d\n", - offsetof(VexGuestX86State,guest_EBP)); - - printf("#define OFFSET_x86_ESP %3d\n", - offsetof(VexGuestX86State,guest_ESP)); - - printf("#define OFFSET_x86_EIP %3d\n", - offsetof(VexGuestX86State,guest_EIP)); - printf("\n"); - - // amd64 - printf("#define OFFSET_amd64_RAX %3d\n", - offsetof(VexGuestAMD64State,guest_RAX)); - - printf("#define OFFSET_amd64_RBX %3d\n", - offsetof(VexGuestAMD64State,guest_RBX)); - - printf("#define OFFSET_amd64_RCX %3d\n", - offsetof(VexGuestAMD64State,guest_RCX)); - - printf("#define OFFSET_amd64_RDX %3d\n", - offsetof(VexGuestAMD64State,guest_RDX)); - - printf("#define OFFSET_amd64_RSI %3d\n", - offsetof(VexGuestAMD64State,guest_RSI)); - - printf("#define OFFSET_amd64_RDI %3d\n", - offsetof(VexGuestAMD64State,guest_RDI)); - - printf("#define OFFSET_amd64_RSP %3d\n", - offsetof(VexGuestAMD64State,guest_RSP)); - - printf("#define OFFSET_amd64_RBP %3d\n", - offsetof(VexGuestAMD64State,guest_RBP)); - - printf("#define OFFSET_amd64_R8 %3d\n", - offsetof(VexGuestAMD64State,guest_R8)); - - printf("#define OFFSET_amd64_R9 %3d\n", - offsetof(VexGuestAMD64State,guest_R9)); - - printf("#define OFFSET_amd64_R10 %3d\n", - offsetof(VexGuestAMD64State,guest_R10)); - - printf("#define OFFSET_amd64_R11 %3d\n", - offsetof(VexGuestAMD64State,guest_R11)); - - printf("#define OFFSET_amd64_R12 %3d\n", - offsetof(VexGuestAMD64State,guest_R12)); - - printf("#define OFFSET_amd64_R13 %3d\n", - offsetof(VexGuestAMD64State,guest_R13)); - - printf("#define OFFSET_amd64_R14 %3d\n", - offsetof(VexGuestAMD64State,guest_R14)); - - printf("#define OFFSET_amd64_R15 %3d\n", - offsetof(VexGuestAMD64State,guest_R15)); - - printf("#define OFFSET_amd64_RIP %3d\n", - offsetof(VexGuestAMD64State,guest_RIP)); - - printf("\n"); - - // ppc32 - printf("#define OFFSET_ppc32_GPR0 %3d\n", - offsetof(VexGuestPPC32State,guest_GPR0)); - - printf("#define OFFSET_ppc32_GPR3 %3d\n", - offsetof(VexGuestPPC32State,guest_GPR3)); - - printf("#define OFFSET_ppc32_GPR4 %3d\n", - offsetof(VexGuestPPC32State,guest_GPR4)); - - printf("#define OFFSET_ppc32_GPR5 %3d\n", - offsetof(VexGuestPPC32State,guest_GPR5)); - - printf("#define OFFSET_ppc32_GPR6 %3d\n", - offsetof(VexGuestPPC32State,guest_GPR6)); - - printf("#define OFFSET_ppc32_GPR7 %3d\n", - offsetof(VexGuestPPC32State,guest_GPR7)); - - printf("#define OFFSET_ppc32_GPR8 %3d\n", - offsetof(VexGuestPPC32State,guest_GPR8)); - - printf("#define OFFSET_ppc32_CIA %3d\n", - offsetof(VexGuestPPC32State,guest_CIA)); - - printf("#define OFFSET_ppc32_CR0_0 %3d\n", - offsetof(VexGuestPPC32State,guest_CR0_0)); - - printf("\n"); - - // ppc64 - printf("#define OFFSET_ppc64_GPR0 %4d\n", - offsetof(VexGuestPPC64State,guest_GPR0)); - - printf("#define OFFSET_ppc64_GPR3 %4d\n", - offsetof(VexGuestPPC64State,guest_GPR3)); - - printf("#define OFFSET_ppc64_GPR4 %4d\n", - offsetof(VexGuestPPC64State,guest_GPR4)); - - printf("#define OFFSET_ppc64_GPR5 %4d\n", - offsetof(VexGuestPPC64State,guest_GPR5)); - - printf("#define OFFSET_ppc64_GPR6 %4d\n", - offsetof(VexGuestPPC64State,guest_GPR6)); - - printf("#define OFFSET_ppc64_GPR7 %4d\n", - offsetof(VexGuestPPC64State,guest_GPR7)); - - printf("#define OFFSET_ppc64_GPR8 %4d\n", - offsetof(VexGuestPPC64State,guest_GPR8)); - - printf("#define OFFSET_ppc64_CIA %4d\n", - offsetof(VexGuestPPC64State,guest_CIA)); - - printf("#define OFFSET_ppc64_CR0_0 %4d\n", - offsetof(VexGuestPPC64State,guest_CR0_0)); - - printf("\n"); - - return 0; + // ppc64 + GENOFFSET(PPC64,ppc64,GPR0); + GENOFFSET(PPC64,ppc64,GPR2); + GENOFFSET(PPC64,ppc64,GPR3); + GENOFFSET(PPC64,ppc64,GPR4); + GENOFFSET(PPC64,ppc64,GPR5); + GENOFFSET(PPC64,ppc64,GPR6); + GENOFFSET(PPC64,ppc64,GPR7); + GENOFFSET(PPC64,ppc64,GPR8); + GENOFFSET(PPC64,ppc64,GPR9); + GENOFFSET(PPC64,ppc64,GPR10); + GENOFFSET(PPC64,ppc64,CIA); + GENOFFSET(PPC64,ppc64,CR0_0); } Index: VEX/newline.txt =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/newline.txt Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1 ---- + ==== //depot/vendor/valgrind/VEX/priv/guest-amd64/gdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-amd64/ghelpers.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-amd64/toIR.c#2 - === ==== //depot/vendor/valgrind/VEX/priv/guest-arm/gdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-arm/ghelpers.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-arm/toIR.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-generic/bb_to_IR.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-generic/bb_to_IR.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-generic/g_generic_x87.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-generic/g_generic_x87.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-ppc/gdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-ppc/ghelpers.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-ppc/toIR.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-x86/gdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-x86/ghelpers.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/guest-x86/toIR.c#1 - === Index: VEX/priv/guest_amd64_defs.h =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_amd64_defs.h Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,436 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_amd64_defs.h) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* Only to be used within the guest-amd64 directory. */ + + #ifndef __VEX_GUEST_AMD64_DEFS_H + #define __VEX_GUEST_AMD64_DEFS_H + + + /*---------------------------------------------------------*/ + /*--- amd64 to IR conversion ---*/ + /*---------------------------------------------------------*/ + + /* Convert one amd64 insn to IR. See the type DisOneInstrFn in + bb_to_IR.h. */ + extern + DisResult disInstr_AMD64 ( IRSB* irbb, + Bool put_IP, + Bool (*resteerOkFn) ( void*, Addr64 ), + void* callback_opaque, + UChar* guest_code, + Long delta, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian ); + + /* Used by the optimiser to specialise calls to helpers. */ + extern + IRExpr* guest_amd64_spechelper ( HChar* function_name, + IRExpr** args ); + + /* Describes to the optimiser which part of the guest state require + precise memory exceptions. This is logically part of the guest + state description. */ + extern + Bool guest_amd64_state_requires_precise_mem_exns ( Int, Int ); + + extern + VexGuestLayout amd64guest_layout; + + + /*---------------------------------------------------------*/ + /*--- amd64 guest helpers ---*/ + /*---------------------------------------------------------*/ + + /* --- CLEAN HELPERS --- */ + + extern ULong amd64g_calculate_rflags_all ( + ULong cc_op, + ULong cc_dep1, ULong cc_dep2, ULong cc_ndep + ); + + extern ULong amd64g_calculate_rflags_c ( + ULong cc_op, + ULong cc_dep1, ULong cc_dep2, ULong cc_ndep + ); + + extern ULong amd64g_calculate_condition ( + ULong/*AMD64Condcode*/ cond, + ULong cc_op, + ULong cc_dep1, ULong cc_dep2, ULong cc_ndep + ); + + extern ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ); + + extern ULong amd64g_calculate_RCR ( + ULong arg, ULong rot_amt, ULong rflags_in, Long sz + ); + + extern ULong amd64g_calculate_RCL ( + ULong arg, ULong rot_amt, ULong rflags_in, Long sz + ); + + extern ULong amd64g_check_fldcw ( ULong fpucw ); + + extern ULong amd64g_create_fpucw ( ULong fpround ); + + extern ULong amd64g_check_ldmxcsr ( ULong mxcsr ); + + extern ULong amd64g_create_mxcsr ( ULong sseround ); + + extern VexEmWarn amd64g_dirtyhelper_FLDENV ( VexGuestAMD64State*, HWord ); + + extern void amd64g_dirtyhelper_FSTENV ( VexGuestAMD64State*, HWord ); + + /* Translate a guest virtual_addr into a guest linear address by + consulting the supplied LDT/GDT structures. Their representation + must be as specified in pub/libvex_guest_amd64.h. To indicate a + translation failure, 1<<32 is returned. On success, the lower 32 + bits of the returned result indicate the linear address. + */ + //extern + //ULong amd64g_use_seg_selector ( HWord ldt, HWord gdt, + // UInt seg_selector, UInt virtual_addr ); + + extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong ); + extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong ); + extern ULong amd64g_calculate_mmx_pmovmskb ( ULong ); + extern ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); + + + /* --- DIRTY HELPERS --- */ + + extern ULong amd64g_dirtyhelper_loadF80le ( ULong/*addr*/ ); + + extern void amd64g_dirtyhelper_storeF80le ( ULong/*addr*/, ULong/*data*/ ); + + extern void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ); + extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ); + + extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* ); + + extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord ); + + extern ULong amd64g_dirtyhelper_RDTSC ( void ); + + extern ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ); + extern void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, + ULong sz/*1,2 or 4*/ ); + + //extern void amd64g_dirtyhelper_CPUID_sse0 ( VexGuestAMD64State* ); + //extern void amd64g_dirtyhelper_CPUID_sse1 ( VexGuestAMD64State* ); + //extern void amd64g_dirtyhelper_CPUID_sse2 ( VexGuestAMD64State* ); + + //extern void amd64g_dirtyhelper_FSAVE ( VexGuestAMD64State*, HWord ); + + //extern VexEmWarn + // amd64g_dirtyhelper_FRSTOR ( VexGuestAMD64State*, HWord ); + + //extern void amd64g_dirtyhelper_FSTENV ( VexGuestAMD64State*, HWord ); + + //extern VexEmWarn + // amd64g_dirtyhelper_FLDENV ( VexGuestAMD64State*, HWord ); + + + + /*---------------------------------------------------------*/ + /*--- Condition code stuff ---*/ + /*---------------------------------------------------------*/ + + /* rflags masks */ + #define AMD64G_CC_SHIFT_O 11 + #define AMD64G_CC_SHIFT_S 7 + #define AMD64G_CC_SHIFT_Z 6 + #define AMD64G_CC_SHIFT_A 4 + #define AMD64G_CC_SHIFT_C 0 + #define AMD64G_CC_SHIFT_P 2 + + #define AMD64G_CC_MASK_O (1ULL << AMD64G_CC_SHIFT_O) + #define AMD64G_CC_MASK_S (1ULL << AMD64G_CC_SHIFT_S) + #define AMD64G_CC_MASK_Z (1ULL << AMD64G_CC_SHIFT_Z) + #define AMD64G_CC_MASK_A (1ULL << AMD64G_CC_SHIFT_A) + #define AMD64G_CC_MASK_C (1ULL << AMD64G_CC_SHIFT_C) + #define AMD64G_CC_MASK_P (1ULL << AMD64G_CC_SHIFT_P) + + /* FPU flag masks */ + #define AMD64G_FC_SHIFT_C3 14 + #define AMD64G_FC_SHIFT_C2 10 + #define AMD64G_FC_SHIFT_C1 9 + #define AMD64G_FC_SHIFT_C0 8 + + #define AMD64G_FC_MASK_C3 (1ULL << AMD64G_FC_SHIFT_C3) + #define AMD64G_FC_MASK_C2 (1ULL << AMD64G_FC_SHIFT_C2) + #define AMD64G_FC_MASK_C1 (1ULL << AMD64G_FC_SHIFT_C1) + #define AMD64G_FC_MASK_C0 (1ULL << AMD64G_FC_SHIFT_C0) + + + /* %RFLAGS thunk descriptors. A four-word thunk is used to record + details of the most recent flag-setting operation, so the flags can + be computed later if needed. It is possible to do this a little + more efficiently using a 3-word thunk, but that makes it impossible + to describe the flag data dependencies sufficiently accurately for + Memcheck. Hence 4 words are used, with minimal loss of efficiency. + + The four words are: + + CC_OP, which describes the operation. + + CC_DEP1 and CC_DEP2. These are arguments to the operation. + We want Memcheck to believe that the resulting flags are + data-dependent on both CC_DEP1 and CC_DEP2, hence the + name DEP. + + CC_NDEP. This is a 3rd argument to the operation which is + sometimes needed. We arrange things so that Memcheck does + not believe the resulting flags are data-dependent on CC_NDEP + ("not dependent"). + + To make Memcheck believe that (the definedness of) the encoded + flags depends only on (the definedness of) CC_DEP1 and CC_DEP2 + requires two things: + + (1) In the guest state layout info (amd64guest_layout), CC_OP and + CC_NDEP are marked as always defined. + + (2) When passing the thunk components to an evaluation function + (calculate_condition, calculate_eflags, calculate_eflags_c) the + IRCallee's mcx_mask must be set so as to exclude from + consideration all passed args except CC_DEP1 and CC_DEP2. + + Strictly speaking only (2) is necessary for correctness. However, + (1) helps efficiency in that since (2) means we never ask about the + definedness of CC_OP or CC_NDEP, we may as well not even bother to + track their definedness. + + When building the thunk, it is always necessary to write words into + CC_DEP1 and CC_DEP2, even if those args are not used given the + CC_OP field (eg, CC_DEP2 is not used if CC_OP is CC_LOGIC1/2/4). + This is important because otherwise Memcheck could give false + positives as it does not understand the relationship between the + CC_OP field and CC_DEP1 and CC_DEP2, and so believes that the + definedness of the stored flags always depends on both CC_DEP1 and + CC_DEP2. + + However, it is only necessary to set CC_NDEP when the CC_OP value + requires it, because Memcheck ignores CC_NDEP, and the evaluation + functions do understand the CC_OP fields and will only examine + CC_NDEP for suitable values of CC_OP. + + A summary of the field usages is: + + Operation DEP1 DEP2 NDEP + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + add/sub/mul first arg second arg unused + + adc/sbb first arg (second arg) + XOR old_carry old_carry + + and/or/xor result zero unused + + inc/dec result zero old_carry + + shl/shr/sar result subshifted- unused + result + + rol/ror result zero old_flags + + copy old_flags zero unused. + + + Therefore Memcheck will believe the following: + + * add/sub/mul -- definedness of result flags depends on definedness + of both args. + + * adc/sbb -- definedness of result flags depends on definedness of + both args and definedness of the old C flag. Because only two + DEP fields are available, the old C flag is XOR'd into the second + arg so that Memcheck sees the data dependency on it. That means + the NDEP field must contain a second copy of the old C flag + so that the evaluation functions can correctly recover the second + arg. + + * and/or/xor are straightforward -- definedness of result flags + depends on definedness of result value. + + * inc/dec -- definedness of result flags depends only on + definedness of result. This isn't really true -- it also depends + on the old C flag. However, we don't want Memcheck to see that, + and so the old C flag must be passed in NDEP and not in DEP2. + It's inconceivable that a compiler would generate code that puts + the C flag in an undefined state, then does an inc/dec, which + leaves C unchanged, and then makes a conditional jump/move based + on C. So our fiction seems a good approximation. + + * shl/shr/sar -- straightforward, again, definedness of result + flags depends on definedness of result value. The subshifted + value (value shifted one less) is also needed, but its + definedness is the same as the definedness of the shifted value. + + * rol/ror -- these only set O and C, and leave A Z C P alone. + However it seems prudent (as per inc/dec) to say the definedness + of all resulting flags depends on the definedness of the result, + hence the old flags must go in as NDEP and not DEP2. + + * rcl/rcr are too difficult to do in-line, and so are done by a + helper function. They are not part of this scheme. The helper + function takes the value to be rotated, the rotate amount and the + old flags, and returns the new flags and the rotated value. + Since the helper's mcx_mask does not have any set bits, Memcheck + will lazily propagate undefinedness from any of the 3 args into + both results (flags and actual value). + */ + enum { + AMD64G_CC_OP_COPY=0, /* DEP1 = current flags, DEP2 = 0, NDEP = unused */ + /* just copy DEP1 to output */ + + AMD64G_CC_OP_ADDB, /* 1 */ + AMD64G_CC_OP_ADDW, /* 2 DEP1 = argL, DEP2 = argR, NDEP = unused */ + AMD64G_CC_OP_ADDL, /* 3 */ + AMD64G_CC_OP_ADDQ, /* 4 */ + + AMD64G_CC_OP_SUBB, /* 5 */ + AMD64G_CC_OP_SUBW, /* 6 DEP1 = argL, DEP2 = argR, NDEP = unused */ + AMD64G_CC_OP_SUBL, /* 7 */ + AMD64G_CC_OP_SUBQ, /* 8 */ + + AMD64G_CC_OP_ADCB, /* 9 */ + AMD64G_CC_OP_ADCW, /* 10 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */ + AMD64G_CC_OP_ADCL, /* 11 */ + AMD64G_CC_OP_ADCQ, /* 12 */ + + AMD64G_CC_OP_SBBB, /* 13 */ + AMD64G_CC_OP_SBBW, /* 14 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */ + AMD64G_CC_OP_SBBL, /* 15 */ + AMD64G_CC_OP_SBBQ, /* 16 */ + + AMD64G_CC_OP_LOGICB, /* 17 */ + AMD64G_CC_OP_LOGICW, /* 18 DEP1 = result, DEP2 = 0, NDEP = unused */ + AMD64G_CC_OP_LOGICL, /* 19 */ + AMD64G_CC_OP_LOGICQ, /* 20 */ + + AMD64G_CC_OP_INCB, /* 21 */ + AMD64G_CC_OP_INCW, /* 22 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */ + AMD64G_CC_OP_INCL, /* 23 */ + AMD64G_CC_OP_INCQ, /* 24 */ + + AMD64G_CC_OP_DECB, /* 25 */ + AMD64G_CC_OP_DECW, /* 26 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */ + AMD64G_CC_OP_DECL, /* 27 */ + AMD64G_CC_OP_DECQ, /* 28 */ + + AMD64G_CC_OP_SHLB, /* 29 DEP1 = res, DEP2 = res', NDEP = unused */ + AMD64G_CC_OP_SHLW, /* 30 where res' is like res but shifted one bit less */ + AMD64G_CC_OP_SHLL, /* 31 */ + AMD64G_CC_OP_SHLQ, /* 32 */ + + AMD64G_CC_OP_SHRB, /* 33 DEP1 = res, DEP2 = res', NDEP = unused */ + AMD64G_CC_OP_SHRW, /* 34 where res' is like res but shifted one bit less */ + AMD64G_CC_OP_SHRL, /* 35 */ + AMD64G_CC_OP_SHRQ, /* 36 */ + + AMD64G_CC_OP_ROLB, /* 37 */ + AMD64G_CC_OP_ROLW, /* 38 DEP1 = res, DEP2 = 0, NDEP = old flags */ + AMD64G_CC_OP_ROLL, /* 39 */ + AMD64G_CC_OP_ROLQ, /* 40 */ + + AMD64G_CC_OP_RORB, /* 41 */ + AMD64G_CC_OP_RORW, /* 42 DEP1 = res, DEP2 = 0, NDEP = old flags */ + AMD64G_CC_OP_RORL, /* 43 */ + AMD64G_CC_OP_RORQ, /* 44 */ + + AMD64G_CC_OP_UMULB, /* 45 */ + AMD64G_CC_OP_UMULW, /* 46 DEP1 = argL, DEP2 = argR, NDEP = unused */ + AMD64G_CC_OP_UMULL, /* 47 */ + AMD64G_CC_OP_UMULQ, /* 48 */ + + AMD64G_CC_OP_SMULB, /* 49 */ + AMD64G_CC_OP_SMULW, /* 50 DEP1 = argL, DEP2 = argR, NDEP = unused */ + AMD64G_CC_OP_SMULL, /* 51 */ + AMD64G_CC_OP_SMULQ, /* 52 */ + + AMD64G_CC_OP_NUMBER + }; + + typedef + enum { + AMD64CondO = 0, /* overflow */ + AMD64CondNO = 1, /* no overflow */ + + AMD64CondB = 2, /* below */ + AMD64CondNB = 3, /* not below */ + + AMD64CondZ = 4, /* zero */ + AMD64CondNZ = 5, /* not zero */ + + AMD64CondBE = 6, /* below or equal */ + AMD64CondNBE = 7, /* not below or equal */ + + AMD64CondS = 8, /* negative */ + AMD64CondNS = 9, /* not negative */ + + AMD64CondP = 10, /* parity even */ + AMD64CondNP = 11, /* not parity even */ + + AMD64CondL = 12, /* jump less */ + AMD64CondNL = 13, /* not less */ + + AMD64CondLE = 14, /* less or equal */ + AMD64CondNLE = 15, /* not less or equal */ + + AMD64CondAlways = 16 /* HACK */ + } + AMD64Condcode; + + #endif /* ndef __VEX_GUEST_AMD64_DEFS_H */ + + /*---------------------------------------------------------------*/ + /*--- end guest_amd64_defs.h ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_amd64_helpers.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_amd64_helpers.c Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,2501 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_amd64_helpers.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + #include "libvex_basictypes.h" + #include "libvex_emwarn.h" + #include "libvex_guest_amd64.h" + #include "libvex_ir.h" + #include "libvex.h" + + #include "main_util.h" + #include "guest_generic_bb_to_IR.h" + #include "guest_amd64_defs.h" + #include "guest_generic_x87.h" + + + /* This file contains helper functions for amd64 guest code. + Calls to these functions are generated by the back end. + These calls are of course in the host machine code and + this file will be compiled to host machine code, so that + all makes sense. + + Only change the signatures of these helper functions very + carefully. If you change the signature here, you'll have to change + the parameters passed to it in the IR calls constructed by + guest-amd64/toIR.c. + + The convention used is that all functions called from generated + code are named amd64g_, and any function whose name lacks + that prefix is not called from generated code. Note that some + LibVEX_* functions can however be called by VEX's client, but that + is not the same as calling them from VEX-generated code. + */ + + + /* Set to 1 to get detailed profiling info about use of the flag + machinery. */ + #define PROFILE_RFLAGS 0 + + + /*---------------------------------------------------------------*/ + /*--- %rflags run-time helpers. ---*/ + /*---------------------------------------------------------------*/ + + /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags + after imulq/mulq. */ + + static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo ) + { + ULong u0, v0, w0; + Long u1, v1, w1, w2, t; + u0 = u & 0xFFFFFFFFULL; + u1 = u >> 32; + v0 = v & 0xFFFFFFFFULL; + v1 = v >> 32; + w0 = u0 * v0; + t = u1 * v0 + (w0 >> 32); + w1 = t & 0xFFFFFFFFULL; + w2 = t >> 32; + w1 = u0 * v1 + w1; + *rHi = u1 * v1 + w2 + (w1 >> 32); + *rLo = u * v; + } + + static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo ) + { + ULong u0, v0, w0; + ULong u1, v1, w1,w2,t; + u0 = u & 0xFFFFFFFFULL; + u1 = u >> 32; + v0 = v & 0xFFFFFFFFULL; + v1 = v >> 32; + w0 = u0 * v0; + t = u1 * v0 + (w0 >> 32); + w1 = t & 0xFFFFFFFFULL; + w2 = t >> 32; + w1 = u0 * v1 + w1; + *rHi = u1 * v1 + w2 + (w1 >> 32); + *rLo = u * v; + } + + + static const UChar parity_table[256] = { + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, + 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, + }; + + /* generalised left-shifter */ + static inline Long lshift ( Long x, Int n ) + { + if (n >= 0) + return x << n; + else + return x >> (-n); + } + + /* identity on ULong */ + static inline ULong idULong ( ULong x ) + { + return x; + } + + + #define PREAMBLE(__data_bits) \ + /* const */ ULong DATA_MASK \ + = __data_bits==8 \ + ? 0xFFULL \ + : (__data_bits==16 \ + ? 0xFFFFULL \ + : (__data_bits==32 \ + ? 0xFFFFFFFFULL \ + : 0xFFFFFFFFFFFFFFFFULL)); \ + /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \ + /* const */ ULong CC_DEP1 = cc_dep1_formal; \ + /* const */ ULong CC_DEP2 = cc_dep2_formal; \ + /* const */ ULong CC_NDEP = cc_ndep_formal; \ + /* Four bogus assignments, which hopefully gcc can */ \ + /* optimise away, and which stop it complaining about */ \ + /* unused variables. */ \ + SIGN_MASK = SIGN_MASK; \ + DATA_MASK = DATA_MASK; \ + CC_DEP2 = CC_DEP2; \ + CC_NDEP = CC_NDEP; + + + /*-------------------------------------------------------------*/ + + #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + Long argL, argR, res; \ + argL = CC_DEP1; \ + argR = CC_DEP2; \ + res = argL + argR; \ + cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ + 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + Long argL, argR, res; \ + argL = CC_DEP1; \ + argR = CC_DEP2; \ + res = argL - argR; \ + cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR) & (argL ^ res), \ + 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + Long argL, argR, oldC, res; \ + oldC = CC_NDEP & AMD64G_CC_MASK_C; \ + argL = CC_DEP1; \ + argR = CC_DEP2 ^ oldC; \ + res = (argL + argR) + oldC; \ + if (oldC) \ + cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ + else \ + cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ + 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + Long argL, argR, oldC, res; \ + oldC = CC_NDEP & AMD64G_CC_MASK_C; \ + argL = CC_DEP1; \ + argR = CC_DEP2 ^ oldC; \ + res = (argL - argR) - oldC; \ + if (oldC) \ + cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ + else \ + cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR) & (argL ^ res), \ + 12 - DATA_BITS) & AMD64G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + cf = 0; \ + pf = parity_table[(UChar)CC_DEP1]; \ + af = 0; \ + zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ + sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ + of = 0; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + Long argL, argR, res; \ + res = CC_DEP1; \ + argL = res - 1; \ + argR = 1; \ + cf = CC_NDEP & AMD64G_CC_MASK_C; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + Long argL, argR, res; \ + res = CC_DEP1; \ + argL = res + 1; \ + argR = 1; \ + cf = CC_NDEP & AMD64G_CC_MASK_C; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = ((res & DATA_MASK) \ + == ((ULong)SIGN_MASK - 1)) << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \ + pf = parity_table[(UChar)CC_DEP1]; \ + af = 0; /* undefined */ \ + zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ + sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ + /* of is defined if shift count == 1 */ \ + of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ + & AMD64G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + cf = CC_DEP2 & 1; \ + pf = parity_table[(UChar)CC_DEP1]; \ + af = 0; /* undefined */ \ + zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ + sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ + /* of is defined if shift count == 1 */ \ + of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ + & AMD64G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ + /* DEP1 = result, NDEP = old flags */ + #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long fl \ + = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ + | (AMD64G_CC_MASK_C & CC_DEP1) \ + | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ + 11-(DATA_BITS-1)) \ + ^ lshift(CC_DEP1, 11))); \ + return fl; \ + } \ + } + + /*-------------------------------------------------------------*/ + + /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ + /* DEP1 = result, NDEP = old flags */ + #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long fl \ + = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \ + | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ + | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \ + 11-(DATA_BITS-1)) \ + ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ + return fl; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ + DATA_U2TYPE, NARROWto2U) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + DATA_UTYPE hi; \ + DATA_UTYPE lo \ + = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ + * ((DATA_UTYPE)CC_DEP2) ); \ + DATA_U2TYPE rr \ + = NARROWto2U( \ + ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ + * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ + hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ + cf = (hi != 0); \ + pf = parity_table[(UChar)lo]; \ + af = 0; /* undefined */ \ + zf = (lo == 0) << 6; \ + sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ + of = cf << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ + DATA_S2TYPE, NARROWto2S) \ + { \ + PREAMBLE(DATA_BITS); \ + { Long cf, pf, af, zf, sf, of; \ + DATA_STYPE hi; \ + DATA_STYPE lo \ + = NARROWtoS( ((DATA_STYPE)CC_DEP1) \ + * ((DATA_STYPE)CC_DEP2) ); \ + DATA_S2TYPE rr \ + = NARROWto2S( \ + ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ + * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ + hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ + cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ + pf = parity_table[(UChar)lo]; \ + af = 0; /* undefined */ \ + zf = (lo == 0) << 6; \ + sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ + of = cf << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_UMULQ \ + { \ + PREAMBLE(64); \ + { Long cf, pf, af, zf, sf, of; \ + ULong lo, hi; \ + mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \ + cf = (hi != 0); \ + pf = parity_table[(UChar)lo]; \ + af = 0; /* undefined */ \ + zf = (lo == 0) << 6; \ + sf = lshift(lo, 8 - 64) & 0x80; \ + of = cf << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SMULQ \ + { \ + PREAMBLE(64); \ + { Long cf, pf, af, zf, sf, of; \ + Long lo, hi; \ + mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \ + cf = (hi != (lo >>/*s*/ (64-1))); \ + pf = parity_table[(UChar)lo]; \ + af = 0; /* undefined */ \ + zf = (lo == 0) << 6; \ + sf = lshift(lo, 8 - 64) & 0x80; \ + of = cf << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + + #if PROFILE_RFLAGS + + static Bool initted = False; + + /* C flag, fast route */ + static UInt tabc_fast[AMD64G_CC_OP_NUMBER]; + /* C flag, slow route */ + static UInt tabc_slow[AMD64G_CC_OP_NUMBER]; + /* table for calculate_cond */ + static UInt tab_cond[AMD64G_CC_OP_NUMBER][16]; + /* total entry counts for calc_all, calc_c, calc_cond. */ + static UInt n_calc_all = 0; + static UInt n_calc_c = 0; + static UInt n_calc_cond = 0; + + #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) + + + static void showCounts ( void ) + { + Int op, co; + Char ch; + vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", + n_calc_all, n_calc_cond, n_calc_c); + + vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" + " S NS P NP L NL LE NLE\n"); + vex_printf(" -----------------------------------------------------" + "----------------------------------------\n"); + for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { + + ch = ' '; + if (op > 0 && (op-1) % 4 == 0) + ch = 'B'; + if (op > 0 && (op-1) % 4 == 1) + ch = 'W'; + if (op > 0 && (op-1) % 4 == 2) + ch = 'L'; + if (op > 0 && (op-1) % 4 == 3) + ch = 'Q'; + + vex_printf("%2d%c: ", op, ch); + vex_printf("%6u ", tabc_slow[op]); + vex_printf("%6u ", tabc_fast[op]); + for (co = 0; co < 16; co++) { + Int n = tab_cond[op][co]; + if (n >= 1000) { + vex_printf(" %3dK", n / 1000); + } else + if (n >= 0) { + vex_printf(" %3d ", n ); + } else { + vex_printf(" "); + } + } + vex_printf("\n"); + } + vex_printf("\n"); + } + + static void initCounts ( void ) + { + Int op, co; + initted = True; + for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) { + tabc_fast[op] = tabc_slow[op] = 0; + for (co = 0; co < 16; co++) + tab_cond[op][co] = 0; + } + } + + #endif /* PROFILE_RFLAGS */ + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate all the 6 flags from the supplied thunk parameters. + Worker function, not directly called from generated code. */ + static + ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op, + ULong cc_dep1_formal, + ULong cc_dep2_formal, + ULong cc_ndep_formal ) + { + switch (cc_op) { + case AMD64G_CC_OP_COPY: + return cc_dep1_formal + & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z + | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P); + + case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); + case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); + case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); + case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong ); + + case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); + case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); + case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); + case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong ); + + case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); + case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); + case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); + case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong ); + + case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); + case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); + case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); + case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong ); + + case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); + case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); + case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); + case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong ); + + case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); + case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); + case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); + case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong ); + + case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); + case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); + case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); + case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong ); + + case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); + case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); + case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); + case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong ); + + case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); + case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); + case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); + case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong ); + + case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); + case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); + case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); + case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong ); + + case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); + case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); + case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); + case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong ); + + case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, + UShort, toUShort ); + case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, + UInt, toUInt ); + case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, + ULong, idULong ); + + case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ; + + case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, + Short, toUShort ); + case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, + Int, toUInt ); + case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, + Long, idULong ); + + case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ; + + default: + /* shouldn't really make these calls from generated code */ + vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)" + "( %llu, 0x%llx, 0x%llx, 0x%llx )\n", + cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); + vpanic("amd64g_calculate_rflags_all_WRK(AMD64)"); + } + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate all the 6 flags from the supplied thunk parameters. */ + ULong amd64g_calculate_rflags_all ( ULong cc_op, + ULong cc_dep1, + ULong cc_dep2, + ULong cc_ndep ) + { + # if PROFILE_RFLAGS + if (!initted) initCounts(); + n_calc_all++; + if (SHOW_COUNTS_NOW) showCounts(); + # endif + return + amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate just the carry flag from the supplied thunk parameters. */ + ULong amd64g_calculate_rflags_c ( ULong cc_op, + ULong cc_dep1, + ULong cc_dep2, + ULong cc_ndep ) + { + # if PROFILE_RFLAGS + if (!initted) initCounts(); + n_calc_c++; + tabc_fast[cc_op]++; + if (SHOW_COUNTS_NOW) showCounts(); + # endif + + /* Fast-case some common ones. */ + switch (cc_op) { + case AMD64G_CC_OP_COPY: + return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1; + case AMD64G_CC_OP_LOGICQ: + case AMD64G_CC_OP_LOGICL: + case AMD64G_CC_OP_LOGICW: + case AMD64G_CC_OP_LOGICB: + return 0; + // case AMD64G_CC_OP_SUBL: + // return ((UInt)cc_dep1) < ((UInt)cc_dep2) + // ? AMD64G_CC_MASK_C : 0; + // case AMD64G_CC_OP_SUBW: + // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) + // ? AMD64G_CC_MASK_C : 0; + // case AMD64G_CC_OP_SUBB: + // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) + // ? AMD64G_CC_MASK_C : 0; + // case AMD64G_CC_OP_INCL: + // case AMD64G_CC_OP_DECL: + // return cc_ndep & AMD64G_CC_MASK_C; + default: + break; + } + + # if PROFILE_RFLAGS + tabc_fast[cc_op]--; + tabc_slow[cc_op]++; + # endif + + return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) + & AMD64G_CC_MASK_C; + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* returns 1 or 0 */ + ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond, + ULong cc_op, + ULong cc_dep1, + ULong cc_dep2, + ULong cc_ndep ) + { + ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1, + cc_dep2, cc_ndep); + ULong of,sf,zf,cf,pf; + ULong inv = cond & 1; + + # if PROFILE_RFLAGS + if (!initted) initCounts(); + tab_cond[cc_op][cond]++; + n_calc_cond++; + if (SHOW_COUNTS_NOW) showCounts(); + # endif + + switch (cond) { + case AMD64CondNO: + case AMD64CondO: /* OF == 1 */ + of = rflags >> AMD64G_CC_SHIFT_O; + return 1 & (inv ^ of); + + case AMD64CondNZ: + case AMD64CondZ: /* ZF == 1 */ + zf = rflags >> AMD64G_CC_SHIFT_Z; + return 1 & (inv ^ zf); + + case AMD64CondNB: + case AMD64CondB: /* CF == 1 */ + cf = rflags >> AMD64G_CC_SHIFT_C; + return 1 & (inv ^ cf); + break; + + case AMD64CondNBE: + case AMD64CondBE: /* (CF or ZF) == 1 */ + cf = rflags >> AMD64G_CC_SHIFT_C; + zf = rflags >> AMD64G_CC_SHIFT_Z; + return 1 & (inv ^ (cf | zf)); + break; + + case AMD64CondNS: + case AMD64CondS: /* SF == 1 */ + sf = rflags >> AMD64G_CC_SHIFT_S; + return 1 & (inv ^ sf); + + case AMD64CondNP: + case AMD64CondP: /* PF == 1 */ + pf = rflags >> AMD64G_CC_SHIFT_P; + return 1 & (inv ^ pf); + + case AMD64CondNL: + case AMD64CondL: /* (SF xor OF) == 1 */ + sf = rflags >> AMD64G_CC_SHIFT_S; + of = rflags >> AMD64G_CC_SHIFT_O; + return 1 & (inv ^ (sf ^ of)); + break; + + case AMD64CondNLE: + case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */ + sf = rflags >> AMD64G_CC_SHIFT_S; + of = rflags >> AMD64G_CC_SHIFT_O; + zf = rflags >> AMD64G_CC_SHIFT_Z; + return 1 & (inv ^ ((sf ^ of) | zf)); + break; + + default: + /* shouldn't really make these calls from generated code */ + vex_printf("amd64g_calculate_condition" + "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n", + cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); + vpanic("amd64g_calculate_condition"); + } + } + + + /* VISIBLE TO LIBVEX CLIENT */ + ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state ) + { + ULong rflags = amd64g_calculate_rflags_all_WRK( + vex_state->guest_CC_OP, + vex_state->guest_CC_DEP1, + vex_state->guest_CC_DEP2, + vex_state->guest_CC_NDEP + ); + Long dflag = vex_state->guest_DFLAG; + vassert(dflag == 1 || dflag == -1); + if (dflag == -1) + rflags |= (1<<10); + if (vex_state->guest_IDFLAG == 1) + rflags |= (1<<21); + return rflags; + } + + /* VISIBLE TO LIBVEX CLIENT */ + void + LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag, + /*MOD*/VexGuestAMD64State* vex_state ) + { + ULong oszacp = amd64g_calculate_rflags_all_WRK( + vex_state->guest_CC_OP, + vex_state->guest_CC_DEP1, + vex_state->guest_CC_DEP2, + vex_state->guest_CC_NDEP + ); + if (new_carry_flag & 1) { + oszacp |= AMD64G_CC_MASK_C; + } else { + oszacp &= ~AMD64G_CC_MASK_C; + } + vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; + vex_state->guest_CC_DEP1 = oszacp; + vex_state->guest_CC_DEP2 = 0; + vex_state->guest_CC_NDEP = 0; + } + + + /*---------------------------------------------------------------*/ + /*--- %rflags translation-time function specialisers. ---*/ + /*--- These help iropt specialise calls the above run-time ---*/ + /*--- %rflags functions. ---*/ + /*---------------------------------------------------------------*/ + + /* Used by the optimiser to try specialisations. Returns an + equivalent expression, or NULL if none. */ + + static Bool isU64 ( IRExpr* e, ULong n ) + { + return toBool( e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U64 + && e->Iex.Const.con->Ico.U64 == n ); + } + + IRExpr* guest_amd64_spechelper ( HChar* function_name, + IRExpr** args ) + { + # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) + # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) + # define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) + # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) + + Int i, arity = 0; + for (i = 0; args[i]; i++) + arity++; + # if 0 + vex_printf("spec request:\n"); + vex_printf(" %s ", function_name); + for (i = 0; i < arity; i++) { + vex_printf(" "); + ppIRExpr(args[i]); + } + vex_printf("\n"); + # endif + + /* --------- specialising "amd64g_calculate_condition" --------- */ + + if (vex_streq(function_name, "amd64g_calculate_condition")) { + /* specialise calls to above "calculate condition" function */ + IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; + vassert(arity == 5); + cond = args[0]; + cc_op = args[1]; + cc_dep1 = args[2]; + cc_dep2 = args[3]; + + /*---------------- ADDQ ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) { + /* long long add, then Z --> test (dst+src == 0) */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, + binop(Iop_Add64, cc_dep1, cc_dep2), + mkU64(0))); + } + + /*---------------- SUBQ ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) { + /* long long sub/cmp, then Z --> test dst==src */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64,cc_dep1,cc_dep2)); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) { + /* long long sub/cmp, then NZ --> test dst!=src */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE64,cc_dep1,cc_dep2)); + } + + if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) { + /* long long sub/cmp, then L (signed less than) + --> test dst test dst test src <=u dst */ + /* Note, args are opposite way round from the usual */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64U, cc_dep2, cc_dep1)); + } + + if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) { + /* long long sub/cmp, then BE (unsigned less than or equal) + --> test dst <=u src */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64U, cc_dep1, cc_dep2)); + } + + /*---------------- SUBL ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) { + /* long sub/cmp, then Z --> test dst==src */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + binop(Iop_Shl64,cc_dep2,mkU8(32)))); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) { + /* long sub/cmp, then NZ --> test dst!=src */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE64, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + binop(Iop_Shl64,cc_dep2,mkU8(32)))); + } + + if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) { + /* long sub/cmp, then L (signed less than) + --> test dst test dst <=s src */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64S, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + binop(Iop_Shl64,cc_dep2,mkU8(32)))); + + } + + if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) { + /* long sub/cmp, then BE (unsigned less than or equal) + --> test dst <=u src */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64U, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + binop(Iop_Shl64,cc_dep2,mkU8(32)))); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) { + /* long sub/cmp, then NBE (unsigned greater than) + --> test src test dst==src */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ16, + unop(Iop_64to16,cc_dep1), + unop(Iop_64to16,cc_dep2))); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) { + /* word sub/cmp, then NZ --> test dst!=src */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE16, + unop(Iop_64to16,cc_dep1), + unop(Iop_64to16,cc_dep2))); + } + + if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) { + /* word sub/cmp, then LE (signed less than or equal) + --> test dst <=s src */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64S, + binop(Iop_Shl64,cc_dep1,mkU8(48)), + binop(Iop_Shl64,cc_dep2,mkU8(48)))); + + } + + /*---------------- SUBB ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) { + /* byte sub/cmp, then Z --> test dst==src */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ8, + unop(Iop_64to8,cc_dep1), + unop(Iop_64to8,cc_dep2))); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) { + /* byte sub/cmp, then NZ --> test dst!=src */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE8, + unop(Iop_64to8,cc_dep1), + unop(Iop_64to8,cc_dep2))); + } + + if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS) + && isU64(cc_dep2, 0)) { + /* byte sub/cmp of zero, then S --> test (dst-0 test dst (ULong)dst[7] + This is yet another scheme by which gcc figures out if the + top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ + /* Note: isU64(cc_dep2, 0) is correct, even though this is + for an 8-bit comparison, since the args to the helper + function are always U64s. */ + return binop(Iop_And64, + binop(Iop_Shr64,cc_dep1,mkU8(7)), + mkU64(1)); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS) + && isU64(cc_dep2, 0)) { + /* byte sub/cmp of zero, then NS --> test !(dst-0 test !(dst (ULong) !dst[7] + */ + return binop(Iop_Xor64, + binop(Iop_And64, + binop(Iop_Shr64,cc_dep1,mkU8(7)), + mkU64(1)), + mkU64(1)); + } + + /*---------------- LOGICQ ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) { + /* long long and/or/xor, then Z --> test dst==0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); + } + + if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) { + /* long long and/or/xor, then L + LOGIC sets SF and ZF according to the + result and makes OF be zero. L computes SF ^ OF, but + OF is zero, so this reduces to SF -- which will be 1 iff + the result is < signed 0. Hence ... + */ + return unop(Iop_1Uto64, + binop(Iop_CmpLT64S, + cc_dep1, + mkU64(0))); + } + + /*---------------- LOGICL ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) { + /* long and/or/xor, then Z --> test dst==0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + mkU64(0))); + } + + if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) { + /* long and/or/xor, then NZ --> test dst!=0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE64, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + mkU64(0))); + } + + if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) { + /* long and/or/xor, then LE + This is pretty subtle. LOGIC sets SF and ZF according to the + result and makes OF be zero. LE computes (SF ^ OF) | ZF, but + OF is zero, so this reduces to SF | ZF -- which will be 1 iff + the result is <=signed 0. Hence ... + */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64S, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + mkU64(0))); + } + + /*---------------- LOGICB ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) { + /* byte and/or/xor, then Z --> test dst==0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)), + mkU64(0))); + } + + if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) { + /* this is an idiom gcc sometimes uses to find out if the top + bit of a byte register is set: eg testb %al,%al; js .. + Since it just depends on the top bit of the byte, extract + that bit and explicitly get rid of all the rest. This + helps memcheck avoid false positives in the case where any + of the other bits in the byte are undefined. */ + /* byte and/or/xor, then S --> (UInt)result[7] */ + return binop(Iop_And64, + binop(Iop_Shr64,cc_dep1,mkU8(7)), + mkU64(1)); + } + + /*---------------- INCB ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) { + /* 8-bit inc, then LE --> test result <=s 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpLE64S, + binop(Iop_Shl64,cc_dep1,mkU8(56)), + mkU64(0))); + } + + /*---------------- INCW ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) { + /* 16-bit inc, then Z --> test dst == 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, + binop(Iop_Shl64,cc_dep1,mkU8(48)), + mkU64(0))); + } + + /*---------------- DECL ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) { + /* dec L, then Z --> test dst == 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + mkU64(0))); + } + + /*---------------- DECW ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) { + /* 16-bit dec, then NZ --> test dst != 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE64, + binop(Iop_Shl64,cc_dep1,mkU8(48)), + mkU64(0))); + } + + /*---------------- COPY ----------------*/ + /* This can happen, as a result of amd64 FP compares: "comisd ... ; + jbe" for example. */ + + if (isU64(cc_op, AMD64G_CC_OP_COPY) && + (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) { + /* COPY, then BE --> extract C and Z from dep1, and test (C + or Z == 1). */ + /* COPY, then NBE --> extract C and Z from dep1, and test (C + or Z == 0). */ + ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0; + return + unop( + Iop_1Uto64, + binop( + Iop_CmpEQ64, + binop( + Iop_And64, + binop( + Iop_Or64, + binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), + binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)) + ), + mkU64(1) + ), + mkU64(nnn) + ) + ); + } + + if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) { + /* COPY, then B --> extract C dep1, and test (C == 1). */ + return + unop( + Iop_1Uto64, + binop( + Iop_CmpNE64, + binop( + Iop_And64, + binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)), + mkU64(1) + ), + mkU64(0) + ) + ); + } + + if (isU64(cc_op, AMD64G_CC_OP_COPY) + && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) { + /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ + /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ + UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0; + return + unop( + Iop_1Uto64, + binop( + Iop_CmpEQ64, + binop( + Iop_And64, + binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)), + mkU64(1) + ), + mkU64(nnn) + ) + ); + } + + if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) { + /* COPY, then P --> extract P from dep1, and test (P == 1). */ + return + unop( + Iop_1Uto64, + binop( + Iop_CmpNE64, + binop( + Iop_And64, + binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)), + mkU64(1) + ), + mkU64(0) + ) + ); + } + + return NULL; + } + + /* --------- specialising "amd64g_calculate_rflags_c" --------- */ + + if (vex_streq(function_name, "amd64g_calculate_rflags_c")) { + /* specialise calls to above "calculate_rflags_c" function */ + IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; + vassert(arity == 4); + cc_op = args[0]; + cc_dep1 = args[1]; + cc_dep2 = args[2]; + cc_ndep = args[3]; + + if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) { + /* C after sub denotes unsigned less than */ + return unop(Iop_1Uto64, + binop(Iop_CmpLT64U, + cc_dep1, + cc_dep2)); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBL)) { + /* C after sub denotes unsigned less than */ + return unop(Iop_1Uto64, + binop(Iop_CmpLT64U, + binop(Iop_Shl64,cc_dep1,mkU8(32)), + binop(Iop_Shl64,cc_dep2,mkU8(32)))); + } + if (isU64(cc_op, AMD64G_CC_OP_SUBB)) { + /* C after sub denotes unsigned less than */ + return unop(Iop_1Uto64, + binop(Iop_CmpLT64U, + binop(Iop_And64,cc_dep1,mkU64(0xFF)), + binop(Iop_And64,cc_dep2,mkU64(0xFF)))); + } + if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) + || isU64(cc_op, AMD64G_CC_OP_LOGICL) + || isU64(cc_op, AMD64G_CC_OP_LOGICW) + || isU64(cc_op, AMD64G_CC_OP_LOGICB)) { + /* cflag after logic is zero */ + return mkU64(0); + } + if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL) + || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) { + /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ + return cc_ndep; + } + + # if 0 + if (cc_op->tag == Iex_Const) { + vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); + } + # endif + + return NULL; + } + + # undef unop + # undef binop + # undef mkU64 + # undef mkU8 + + return NULL; + } + + + /*---------------------------------------------------------------*/ + /*--- Supporting functions for x87 FPU activities. ---*/ + /*---------------------------------------------------------------*/ + + static inline Bool host_is_little_endian ( void ) + { + UInt x = 0x76543210; + UChar* p = (UChar*)(&x); + return toBool(*p == 0x10); + } + + /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) + { + Bool mantissaIsZero; + Int bexp; + UChar sign; + UChar* f64; + + vassert(host_is_little_endian()); + + /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ + + f64 = (UChar*)(&dbl); + sign = toUChar( (f64[7] >> 7) & 1 ); + + /* First off, if the tag indicates the register was empty, + return 1,0,sign,1 */ + if (tag == 0) { + /* vex_printf("Empty\n"); */ + return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1) + | AMD64G_FC_MASK_C0; + } + + bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); + bexp &= 0x7FF; + + mantissaIsZero + = toBool( + (f64[6] & 0x0F) == 0 + && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 + ); + + /* If both exponent and mantissa are zero, the value is zero. + Return 1,0,sign,0. */ + if (bexp == 0 && mantissaIsZero) { + /* vex_printf("Zero\n"); */ + return AMD64G_FC_MASK_C3 | 0 + | (sign << AMD64G_FC_SHIFT_C1) | 0; + } + + /* If exponent is zero but mantissa isn't, it's a denormal. + Return 1,1,sign,0. */ + if (bexp == 0 && !mantissaIsZero) { + /* vex_printf("Denormal\n"); */ + return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2 + | (sign << AMD64G_FC_SHIFT_C1) | 0; + } + + /* If the exponent is 7FF and the mantissa is zero, this is an infinity. + Return 0,1,sign,1. */ + if (bexp == 0x7FF && mantissaIsZero) { + /* vex_printf("Inf\n"); */ + return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) + | AMD64G_FC_MASK_C0; + } + + /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. + Return 0,0,sign,1. */ + if (bexp == 0x7FF && !mantissaIsZero) { + /* vex_printf("NaN\n"); */ + return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0; + } + + /* Uh, ok, we give up. It must be a normal finite number. + Return 0,1,sign,0. + */ + /* vex_printf("normal\n"); */ + return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0; + } + + + /* Create an x87 FPU state from the guest state, as close as + we can approximate it. */ + static + void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, + /*OUT*/UChar* x87_state ) + { + Int i, stno, preg; + UInt tagw; + ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); + UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); + Fpu_State* x87 = (Fpu_State*)x87_state; + UInt ftop = vex_state->guest_FTOP; + UInt c3210 = vex_state->guest_FC3210; + + for (i = 0; i < 14; i++) + x87->env[i] = 0; + + x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; + x87->env[FP_ENV_STAT] + = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); + x87->env[FP_ENV_CTRL] + = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); + + /* Dump the register stack in ST order. */ + tagw = 0; + for (stno = 0; stno < 8; stno++) { + preg = (stno + ftop) & 7; + if (vexTags[preg] == 0) { + /* register is empty */ + tagw |= (3 << (2*preg)); + convert_f64le_to_f80le( (UChar*)&vexRegs[preg], + &x87->reg[10*stno] ); + } else { + /* register is full. */ + tagw |= (0 << (2*preg)); + convert_f64le_to_f80le( (UChar*)&vexRegs[preg], + &x87->reg[10*stno] ); + } + } + x87->env[FP_ENV_TAG] = toUShort(tagw); + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest state, writes guest mem) */ + /* NOTE: only handles 32-bit format (no REX.W on the insn) */ + void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) + { + /* Derived from values obtained from + vendor_id : AuthenticAMD + cpu family : 15 + model : 12 + model name : AMD Athlon(tm) 64 Processor 3200+ + stepping : 0 + cpu MHz : 2200.000 + cache size : 512 KB + */ + /* Somewhat roundabout, but at least it's simple. */ + Fpu_State tmp; + UShort* addrS = (UShort*)addr; + UChar* addrC = (UChar*)addr; + U128* xmm = (U128*)(addr + 160); + UInt mxcsr; + UShort fp_tags; + UInt summary_tags; + Int r, stno; + UShort *srcS, *dstS; + + do_get_x87( gst, (UChar*)&tmp ); + mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); + + /* Now build the proper fxsave image from the x87 image we just + made. */ + + addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ + addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ + + /* set addrS[2] in an endian-independent way */ + summary_tags = 0; + fp_tags = tmp.env[FP_ENV_TAG]; + for (r = 0; r < 8; r++) { + if ( ((fp_tags >> (2*r)) & 3) != 3 ) + summary_tags |= (1 << r); + } + addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ + addrC[5] = 0; /* pad */ + + /* FOP: faulting fpu opcode. From experimentation, the real CPU + does not write this field. (?!) */ + addrS[3] = 0; /* BOGUS */ + + /* RIP (Last x87 instruction pointer). From experimentation, the + real CPU does not write this field. (?!) */ + addrS[4] = 0; /* BOGUS */ + addrS[5] = 0; /* BOGUS */ + addrS[6] = 0; /* BOGUS */ + addrS[7] = 0; /* BOGUS */ + + /* RDP (Last x87 data pointer). From experimentation, the real CPU + does not write this field. (?!) */ + addrS[8] = 0; /* BOGUS */ + addrS[9] = 0; /* BOGUS */ + addrS[10] = 0; /* BOGUS */ + addrS[11] = 0; /* BOGUS */ + + addrS[12] = toUShort(mxcsr); /* MXCSR */ + addrS[13] = toUShort(mxcsr >> 16); + + addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ + addrS[15] = 0x0000; /* MXCSR mask (hi16) */ + + /* Copy in the FP registers, in ST order. */ + for (stno = 0; stno < 8; stno++) { + srcS = (UShort*)(&tmp.reg[10*stno]); + dstS = (UShort*)(&addrS[16 + 8*stno]); + dstS[0] = srcS[0]; + dstS[1] = srcS[1]; + dstS[2] = srcS[2]; + dstS[3] = srcS[3]; + dstS[4] = srcS[4]; + dstS[5] = 0; + dstS[6] = 0; + dstS[7] = 0; + } + + /* That's the first 160 bytes of the image done. Now only %xmm0 + .. %xmm15 remain to be copied. If the host is big-endian, these + need to be byte-swapped. */ + vassert(host_is_little_endian()); + + # define COPY_U128(_dst,_src) \ + do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ + _dst[2] = _src[2]; _dst[3] = _src[3]; } \ + while (0) + + COPY_U128( xmm[0], gst->guest_XMM0 ); + COPY_U128( xmm[1], gst->guest_XMM1 ); + COPY_U128( xmm[2], gst->guest_XMM2 ); + COPY_U128( xmm[3], gst->guest_XMM3 ); + COPY_U128( xmm[4], gst->guest_XMM4 ); + COPY_U128( xmm[5], gst->guest_XMM5 ); + COPY_U128( xmm[6], gst->guest_XMM6 ); + COPY_U128( xmm[7], gst->guest_XMM7 ); + COPY_U128( xmm[8], gst->guest_XMM8 ); + COPY_U128( xmm[9], gst->guest_XMM9 ); + COPY_U128( xmm[10], gst->guest_XMM10 ); + COPY_U128( xmm[11], gst->guest_XMM11 ); + COPY_U128( xmm[12], gst->guest_XMM12 ); + COPY_U128( xmm[13], gst->guest_XMM13 ); + COPY_U128( xmm[14], gst->guest_XMM14 ); + COPY_U128( xmm[15], gst->guest_XMM15 ); + + # undef COPY_U128 + } + + + /* DIRTY HELPER (writes guest state) */ + /* Initialise the x87 FPU state as per 'finit'. */ + void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) + { + Int i; + gst->guest_FTOP = 0; + for (i = 0; i < 8; i++) { + gst->guest_FPTAG[i] = 0; /* empty */ + gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ + } + gst->guest_FPROUND = (ULong)Irrm_NEAREST; + gst->guest_FC3210 = 0; + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest memory) */ + ULong amd64g_dirtyhelper_loadF80le ( ULong addrU ) + { + ULong f64; + convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 ); + return f64; + } + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (writes guest memory) */ + void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 ) + { + convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) ); + } + + + /* CALLED FROM GENERATED CODE */ + /* CLEAN HELPER */ + /* mxcsr[15:0] contains a SSE native format MXCSR value. + Extract from it the required SSEROUND value and any resulting + emulation warning, and return (warn << 32) | sseround value. + */ + ULong amd64g_check_ldmxcsr ( ULong mxcsr ) + { + /* Decide on a rounding mode. mxcsr[14:13] holds it. */ + /* NOTE, encoded exactly as per enum IRRoundingMode. */ + ULong rmode = (mxcsr >> 13) & 3; + + /* Detect any required emulation warnings. */ + VexEmWarn ew = EmWarn_NONE; + + if ((mxcsr & 0x1F80) != 0x1F80) { + /* unmasked exceptions! */ + ew = EmWarn_X86_sseExns; + } + else + if (mxcsr & (1<<15)) { + /* FZ is set */ + ew = EmWarn_X86_fz; + } + else + if (mxcsr & (1<<6)) { + /* DAZ is set */ + ew = EmWarn_X86_daz; + } + + return (((ULong)ew) << 32) | ((ULong)rmode); + } + + + /* CALLED FROM GENERATED CODE */ + /* CLEAN HELPER */ + /* Given sseround as an IRRoundingMode value, create a suitable SSE + native format MXCSR value. */ + ULong amd64g_create_mxcsr ( ULong sseround ) + { + sseround &= 3; + return 0x1F80 | (sseround << 13); + } + + + /* CLEAN HELPER */ + /* fpucw[15:0] contains a x87 native format FPU control word. + Extract from it the required FPROUND value and any resulting + emulation warning, and return (warn << 32) | fpround value. + */ + ULong amd64g_check_fldcw ( ULong fpucw ) + { + /* Decide on a rounding mode. fpucw[11:10] holds it. */ + /* NOTE, encoded exactly as per enum IRRoundingMode. */ + ULong rmode = (fpucw >> 10) & 3; + + /* Detect any required emulation warnings. */ + VexEmWarn ew = EmWarn_NONE; + + if ((fpucw & 0x3F) != 0x3F) { + /* unmasked exceptions! */ + ew = EmWarn_X86_x87exns; + } + else + if (((fpucw >> 8) & 3) != 3) { + /* unsupported precision */ + ew = EmWarn_X86_x87precision; + } + + return (((ULong)ew) << 32) | ((ULong)rmode); + } + + + /* CLEAN HELPER */ + /* Given fpround as an IRRoundingMode value, create a suitable x87 + native format FPU control word. */ + ULong amd64g_create_fpucw ( ULong fpround ) + { + fpround &= 3; + return 0x037F | (fpround << 10); + } + + + /* This is used to implement 'fldenv'. + Reads 28 bytes at x87_state[0 .. 27]. */ + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER */ + VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state, + /*IN*/HWord x87_state) + { + Int stno, preg; + UInt tag; + UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); + Fpu_State* x87 = (Fpu_State*)x87_state; + UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; + UInt tagw = x87->env[FP_ENV_TAG]; + UInt fpucw = x87->env[FP_ENV_CTRL]; + ULong c3210 = x87->env[FP_ENV_STAT] & 0x4700; + VexEmWarn ew; + ULong fpround; + ULong pair; + + /* Copy tags */ + for (stno = 0; stno < 8; stno++) { + preg = (stno + ftop) & 7; + tag = (tagw >> (2*preg)) & 3; + if (tag == 3) { + /* register is empty */ + vexTags[preg] = 0; + } else { + /* register is non-empty */ + vexTags[preg] = 1; + } + } + + /* stack pointer */ + vex_state->guest_FTOP = ftop; + + /* status word */ + vex_state->guest_FC3210 = c3210; + + /* handle the control word, setting FPROUND and detecting any + emulation warnings. */ + pair = amd64g_check_fldcw ( (ULong)fpucw ); + fpround = pair & 0xFFFFFFFFULL; + ew = (VexEmWarn)(pair >> 32); + + vex_state->guest_FPROUND = fpround & 3; + + /* emulation warnings --> caller */ + return ew; + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER */ + /* Create an x87 FPU env from the guest state, as close as we can + approximate it. Writes 28 bytes at x87_state[0..27]. */ + void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, + /*OUT*/HWord x87_state ) + { + Int i, stno, preg; + UInt tagw; + UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); + Fpu_State* x87 = (Fpu_State*)x87_state; + UInt ftop = vex_state->guest_FTOP; + ULong c3210 = vex_state->guest_FC3210; + + for (i = 0; i < 14; i++) + x87->env[i] = 0; + + x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; + x87->env[FP_ENV_STAT] + = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) )); + x87->env[FP_ENV_CTRL] + = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) )); + + /* Compute the x87 tag word. */ + tagw = 0; + for (stno = 0; stno < 8; stno++) { + preg = (stno + ftop) & 7; + if (vexTags[preg] == 0) { + /* register is empty */ + tagw |= (3 << (2*preg)); + } else { + /* register is full. */ + tagw |= (0 << (2*preg)); + } + } + x87->env[FP_ENV_TAG] = toUShort(tagw); + + /* We don't dump the x87 registers, tho. */ + } + + + /*---------------------------------------------------------------*/ + /*--- Misc integer helpers, including rotates and CPUID. ---*/ + /*---------------------------------------------------------------*/ + + /* Claim to be the following CPU, which is probably representative of + the lowliest (earliest) amd64 offerings. It can do neither sse3 + nor cx16. + + vendor_id : AuthenticAMD + cpu family : 15 + model : 5 + model name : AMD Opteron (tm) Processor 848 + stepping : 10 + cpu MHz : 1797.682 + cache size : 1024 KB + fpu : yes + fpu_exception : yes + cpuid level : 1 + wp : yes + flags : fpu vme de pse tsc msr pae mce cx8 apic sep + mtrr pge mca cmov pat pse36 clflush mmx fxsr + sse sse2 syscall nx mmxext lm 3dnowext 3dnow + bogomips : 3600.62 + TLB size : 1088 4K pages + clflush size : 64 + cache_alignment : 64 + address sizes : 40 bits physical, 48 bits virtual + power management: ts fid vid ttp + */ + void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st ) + { + # define SET_ABCD(_a,_b,_c,_d) \ + do { st->guest_RAX = (ULong)(_a); \ + st->guest_RBX = (ULong)(_b); \ + st->guest_RCX = (ULong)(_c); \ + st->guest_RDX = (ULong)(_d); \ + } while (0) + + switch (0xFFFFFFFF & st->guest_RAX) { + case 0x00000000: + SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); + break; + case 0x00000001: + SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff); + break; + case 0x80000000: + SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); + break; + case 0x80000001: + SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff); + break; + case 0x80000002: + SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428); + break; + case 0x80000003: + SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834); + break; + case 0x80000004: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000005: + SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); + break; + case 0x80000006: + SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000); + break; + case 0x80000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); + break; + case 0x80000008: + SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); + break; + default: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + } + # undef SET_ABCD + } + + + /* Claim to be the following CPU (2 x ...), which is sse3 and cx16 + capable. + + vendor_id : GenuineIntel + cpu family : 6 + model : 15 + model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz + stepping : 6 + cpu MHz : 2394.000 + cache size : 4096 KB + physical id : 0 + siblings : 2 + core id : 0 + cpu cores : 2 + fpu : yes + fpu_exception : yes + cpuid level : 10 + wp : yes + flags : fpu vme de pse tsc msr pae mce cx8 apic sep + mtrr pge mca cmov pat pse36 clflush dts acpi + mmx fxsr sse sse2 ss ht tm syscall nx lm + constant_tsc pni monitor ds_cpl vmx est tm2 + cx16 xtpr lahf_lm + bogomips : 4798.78 + clflush size : 64 + cache_alignment : 64 + address sizes : 36 bits physical, 48 bits virtual + power management: + */ + void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st ) + { + # define SET_ABCD(_a,_b,_c,_d) \ + do { st->guest_RAX = (ULong)(_a); \ + st->guest_RBX = (ULong)(_b); \ + st->guest_RCX = (ULong)(_c); \ + st->guest_RDX = (ULong)(_d); \ + } while (0) + + switch (0xFFFFFFFF & st->guest_RAX) { + case 0x00000000: + SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); + break; + case 0x00000001: + SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); + break; + case 0x00000002: + SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); + break; + case 0x00000003: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000004: { + switch (0xFFFFFFFF & st->guest_RCX) { + case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, + 0x0000003f, 0x00000001); break; + case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, + 0x0000003f, 0x00000001); break; + case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, + 0x00000fff, 0x00000001); break; + default: SET_ABCD(0x00000000, 0x00000000, + 0x00000000, 0x00000000); break; + } + break; + } + case 0x00000005: + SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); + break; + case 0x00000006: + SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); + break; + case 0x00000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000008: + SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000009: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x0000000a: + unhandled_eax_value: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000000: + SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000001: + SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); + break; + case 0x80000002: + SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); + break; + case 0x80000003: + SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); + break; + case 0x80000004: + SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); + break; + case 0x80000005: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000006: + SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); + break; + case 0x80000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000008: + SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); + break; + default: + goto unhandled_eax_value; + } + # undef SET_ABCD + } + + + ULong amd64g_calculate_RCR ( ULong arg, + ULong rot_amt, + ULong rflags_in, + Long szIN ) + { + Bool wantRflags = toBool(szIN < 0); + ULong sz = wantRflags ? (-szIN) : szIN; + ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); + ULong cf=0, of=0, tempcf; + + switch (sz) { + case 8: + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + of = ((arg >> 63) ^ cf) & 1; + while (tempCOUNT > 0) { + tempcf = arg & 1; + arg = (arg >> 1) | (cf << 63); + cf = tempcf; + tempCOUNT--; + } + break; + case 4: + while (tempCOUNT >= 33) tempCOUNT -= 33; + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + of = ((arg >> 31) ^ cf) & 1; + while (tempCOUNT > 0) { + tempcf = arg & 1; + arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31); + cf = tempcf; + tempCOUNT--; + } + break; + case 2: + while (tempCOUNT >= 17) tempCOUNT -= 17; + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + of = ((arg >> 15) ^ cf) & 1; + while (tempCOUNT > 0) { + tempcf = arg & 1; + arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15); + cf = tempcf; + tempCOUNT--; + } + break; + case 1: + while (tempCOUNT >= 9) tempCOUNT -= 9; + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + of = ((arg >> 7) ^ cf) & 1; + while (tempCOUNT > 0) { + tempcf = arg & 1; + arg = ((arg >> 1) & 0x7FULL) | (cf << 7); + cf = tempcf; + tempCOUNT--; + } + break; + default: + vpanic("calculate_RCR(amd64g): invalid size"); + } + + cf &= 1; + of &= 1; + rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); + rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); + + /* caller can ask to have back either the resulting flags or + resulting value, but not both */ + return wantRflags ? rflags_in : arg; + } + + ULong amd64g_calculate_RCL ( ULong arg, + ULong rot_amt, + ULong rflags_in, + Long szIN ) + { + Bool wantRflags = toBool(szIN < 0); + ULong sz = wantRflags ? (-szIN) : szIN; + ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F); + ULong cf=0, of=0, tempcf; + + switch (sz) { + case 8: + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + while (tempCOUNT > 0) { + tempcf = (arg >> 63) & 1; + arg = (arg << 1) | (cf & 1); + cf = tempcf; + tempCOUNT--; + } + of = ((arg >> 63) ^ cf) & 1; + break; + case 4: + while (tempCOUNT >= 33) tempCOUNT -= 33; + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + while (tempCOUNT > 0) { + tempcf = (arg >> 31) & 1; + arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1)); + cf = tempcf; + tempCOUNT--; + } + of = ((arg >> 31) ^ cf) & 1; + break; + case 2: + while (tempCOUNT >= 17) tempCOUNT -= 17; + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + while (tempCOUNT > 0) { + tempcf = (arg >> 15) & 1; + arg = 0xFFFFULL & ((arg << 1) | (cf & 1)); + cf = tempcf; + tempCOUNT--; + } + of = ((arg >> 15) ^ cf) & 1; + break; + case 1: + while (tempCOUNT >= 9) tempCOUNT -= 9; + cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1; + while (tempCOUNT > 0) { + tempcf = (arg >> 7) & 1; + arg = 0xFFULL & ((arg << 1) | (cf & 1)); + cf = tempcf; + tempCOUNT--; + } + of = ((arg >> 7) ^ cf) & 1; + break; + default: + vpanic("calculate_RCL(amd64g): invalid size"); + } + + cf &= 1; + of &= 1; + rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O); + rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O); + + return wantRflags ? rflags_in : arg; + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially-transparent) */ + /* Horrible hack. On non-amd64 platforms, return 1. */ + ULong amd64g_dirtyhelper_RDTSC ( void ) + { + # if defined(__x86_64__) + UInt eax, edx; + __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx)); + return (((ULong)edx) << 32) | ((ULong)eax); + # else + return 1ULL; + # endif + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially-transparent) */ + /* Horrible hack. On non-amd64 platforms, return 0. */ + ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ) + { + # if defined(__x86_64__) + ULong r = 0; + portno &= 0xFFFF; + switch (sz) { + case 4: + __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0" + : "=a" (r) : "Nd" (portno)); + break; + case 2: + __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0" + : "=a" (r) : "Nd" (portno)); + break; + case 1: + __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0" + : "=a" (r) : "Nd" (portno)); + break; + default: + break; /* note: no 64-bit version of insn exists */ + } + return r; + # else + return 0; + # endif + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially-transparent) */ + /* Horrible hack. On non-amd64 platforms, do nothing. */ + void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ ) + { + # if defined(__x86_64__) + portno &= 0xFFFF; + switch (sz) { + case 4: + __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1" + : : "a" (data), "Nd" (portno)); + break; + case 2: + __asm__ __volatile__("outw %w0, %w1" + : : "a" (data), "Nd" (portno)); + break; + case 1: + __asm__ __volatile__("outb %b0, %w1" + : : "a" (data), "Nd" (portno)); + break; + default: + break; /* note: no 64-bit version of insn exists */ + } + # else + /* do nothing */ + # endif + } + + + /*---------------------------------------------------------------*/ + /*--- Helpers for MMX/SSE/SSE2. ---*/ + /*---------------------------------------------------------------*/ + + static inline UChar abdU8 ( UChar xx, UChar yy ) { + return toUChar(xx>yy ? xx-yy : yy-xx); + } + + static inline ULong mk32x2 ( UInt w1, UInt w0 ) { + return (((ULong)w1) << 32) | ((ULong)w0); + } + + static inline UShort sel16x4_3 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUShort(hi32 >> 16); + } + static inline UShort sel16x4_2 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUShort(hi32); + } + static inline UShort sel16x4_1 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUShort(lo32 >> 16); + } + static inline UShort sel16x4_0 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUShort(lo32); + } + + static inline UChar sel8x8_7 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 24); + } + static inline UChar sel8x8_6 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 16); + } + static inline UChar sel8x8_5 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 8); + } + static inline UChar sel8x8_4 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 0); + } + static inline UChar sel8x8_3 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 24); + } + static inline UChar sel8x8_2 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 16); + } + static inline UChar sel8x8_1 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 8); + } + static inline UChar sel8x8_0 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 0); + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) + { + return + mk32x2( + (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) + + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), + (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) + + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) + ); + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong amd64g_calculate_mmx_pmovmskb ( ULong xx ) + { + ULong r = 0; + if (xx & (1ULL << (64-1))) r |= (1<<7); + if (xx & (1ULL << (56-1))) r |= (1<<6); + if (xx & (1ULL << (48-1))) r |= (1<<5); + if (xx & (1ULL << (40-1))) r |= (1<<4); + if (xx & (1ULL << (32-1))) r |= (1<<3); + if (xx & (1ULL << (24-1))) r |= (1<<2); + if (xx & (1ULL << (16-1))) r |= (1<<1); + if (xx & (1ULL << ( 8-1))) r |= (1<<0); + return r; + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) + { + UInt t = 0; + t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); + t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); + t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); + t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); + t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); + t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); + t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); + t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); + t &= 0xFFFF; + return (ULong)t; + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) + { + ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi ); + ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo ); + return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); + } + + + /*---------------------------------------------------------------*/ + /*--- Helpers for dealing with, and describing, ---*/ + /*--- guest state as a whole. ---*/ + /*---------------------------------------------------------------*/ + + /* Initialise the entire amd64 guest state. */ + /* VISIBLE TO LIBVEX CLIENT */ + void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) + { + vex_state->guest_RAX = 0; + vex_state->guest_RCX = 0; + vex_state->guest_RDX = 0; + vex_state->guest_RBX = 0; + vex_state->guest_RSP = 0; + vex_state->guest_RBP = 0; + vex_state->guest_RSI = 0; + vex_state->guest_RDI = 0; + vex_state->guest_R8 = 0; + vex_state->guest_R9 = 0; + vex_state->guest_R10 = 0; + vex_state->guest_R11 = 0; + vex_state->guest_R12 = 0; + vex_state->guest_R13 = 0; + vex_state->guest_R14 = 0; + vex_state->guest_R15 = 0; + + vex_state->guest_CC_OP = AMD64G_CC_OP_COPY; + vex_state->guest_CC_DEP1 = 0; + vex_state->guest_CC_DEP2 = 0; + vex_state->guest_CC_NDEP = 0; + + vex_state->guest_DFLAG = 1; /* forwards */ + vex_state->guest_IDFLAG = 0; + + /* HACK: represent the offset associated with %fs==0. This + assumes that %fs is only ever zero. */ + vex_state->guest_FS_ZERO = 0; + + vex_state->guest_RIP = 0; + + /* Initialise the simulated FPU */ + amd64g_dirtyhelper_FINIT( vex_state ); + + /* Initialise the SSE state. */ + # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0; + + vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST; + SSEZERO(vex_state->guest_XMM0); + SSEZERO(vex_state->guest_XMM1); + SSEZERO(vex_state->guest_XMM2); + SSEZERO(vex_state->guest_XMM3); + SSEZERO(vex_state->guest_XMM4); + SSEZERO(vex_state->guest_XMM5); + SSEZERO(vex_state->guest_XMM6); + SSEZERO(vex_state->guest_XMM7); + SSEZERO(vex_state->guest_XMM8); + SSEZERO(vex_state->guest_XMM9); + SSEZERO(vex_state->guest_XMM10); + SSEZERO(vex_state->guest_XMM11); + SSEZERO(vex_state->guest_XMM12); + SSEZERO(vex_state->guest_XMM13); + SSEZERO(vex_state->guest_XMM14); + SSEZERO(vex_state->guest_XMM15); + + # undef SSEZERO + + vex_state->guest_EMWARN = EmWarn_NONE; + + /* These should not ever be either read or written, but we + initialise them anyway. */ + vex_state->guest_TISTART = 0; + vex_state->guest_TILEN = 0; + + vex_state->guest_NRADDR = 0; + vex_state->guest_SC_CLASS = 0; + vex_state->guest_GS_0x60 = 0; + + vex_state->guest_IP_AT_SYSCALL = 0; + /* vex_state->padding = 0; */ + } + + + /* Figure out if any part of the guest state contained in minoff + .. maxoff requires precise memory exceptions. If in doubt return + True (but this is generates significantly slower code). + + By default we enforce precise exns for guest %RSP, %RBP and %RIP + only. These are the minimum needed to extract correct stack + backtraces from amd64 code. + */ + Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff, + Int maxoff) + { + Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP); + Int rbp_max = rbp_min + 8 - 1; + Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP); + Int rsp_max = rsp_min + 8 - 1; + Int rip_min = offsetof(VexGuestAMD64State, guest_RIP); + Int rip_max = rip_min + 8 - 1; + + if (maxoff < rbp_min || minoff > rbp_max) { + /* no overlap with rbp */ + } else { + return True; + } + + if (maxoff < rsp_min || minoff > rsp_max) { + /* no overlap with rsp */ + } else { + return True; + } + + if (maxoff < rip_min || minoff > rip_max) { + /* no overlap with eip */ + } else { + return True; + } + + return False; + } + + + #define ALWAYSDEFD(field) \ + { offsetof(VexGuestAMD64State, field), \ + (sizeof ((VexGuestAMD64State*)0)->field) } + + VexGuestLayout + amd64guest_layout + = { + /* Total size of the guest state, in bytes. */ + .total_sizeB = sizeof(VexGuestAMD64State), + + /* Describe the stack pointer. */ + .offset_SP = offsetof(VexGuestAMD64State,guest_RSP), + .sizeof_SP = 8, + + /* Describe the frame pointer. */ + .offset_FP = offsetof(VexGuestAMD64State,guest_RBP), + .sizeof_FP = 8, + + /* Describe the instruction pointer. */ + .offset_IP = offsetof(VexGuestAMD64State,guest_RIP), + .sizeof_IP = 8, + + /* Describe any sections to be regarded by Memcheck as + 'always-defined'. */ + .n_alwaysDefd = 16, + + /* flags thunk: OP and NDEP are always defd, whereas DEP1 + and DEP2 have to be tracked. See detailed comment in + gdefs.h on meaning of thunk fields. */ + .alwaysDefd + = { /* 0 */ ALWAYSDEFD(guest_CC_OP), + /* 1 */ ALWAYSDEFD(guest_CC_NDEP), + /* 2 */ ALWAYSDEFD(guest_DFLAG), + /* 3 */ ALWAYSDEFD(guest_IDFLAG), + /* 4 */ ALWAYSDEFD(guest_RIP), + /* 5 */ ALWAYSDEFD(guest_FS_ZERO), + /* 6 */ ALWAYSDEFD(guest_FTOP), + /* 7 */ ALWAYSDEFD(guest_FPTAG), + /* 8 */ ALWAYSDEFD(guest_FPROUND), + /* 9 */ ALWAYSDEFD(guest_FC3210), + // /* */ ALWAYSDEFD(guest_CS), + // /* */ ALWAYSDEFD(guest_DS), + // /* */ ALWAYSDEFD(guest_ES), + // /* */ ALWAYSDEFD(guest_FS), + // /* */ ALWAYSDEFD(guest_GS), + // /* */ ALWAYSDEFD(guest_SS), + // /* */ ALWAYSDEFD(guest_LDT), + // /* */ ALWAYSDEFD(guest_GDT), + /* 10 */ ALWAYSDEFD(guest_EMWARN), + /* 11 */ ALWAYSDEFD(guest_SSEROUND), + /* 12 */ ALWAYSDEFD(guest_TISTART), + /* 13 */ ALWAYSDEFD(guest_TILEN), + /* 14 */ ALWAYSDEFD(guest_SC_CLASS), + /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) + } + }; + + + /*---------------------------------------------------------------*/ + /*--- end guest_amd64_helpers.c ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_amd64_toIR.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_amd64_toIR.c Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,16168 ---- + + /*--------------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_amd64_toIR.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*--------------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* Translates AMD64 code to IR. */ + + /* TODO: + + All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked + to ensure a 64-bit value is being written. + + x87 FP Limitations: + + * all arithmetic done at 64 bits + + * no FP exceptions, except for handling stack over/underflow + + * FP rounding mode observed only for float->int conversions and + int->float conversions which could lose accuracy, and for + float-to-float rounding. For all other operations, + round-to-nearest is used, regardless. + + * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the + simulation claims the argument is in-range (-2^63 <= arg <= 2^63) + even when it isn't. + + * some of the FCOM cases could do with testing -- not convinced + that the args are the right way round. + + * FSAVE does not re-initialise the FPU; it should do + + * FINIT not only initialises the FPU environment, it also zeroes + all the FP registers. It should leave the registers unchanged. + + RDTSC returns zero, always. + + SAHF should cause eflags[1] == 1, and in fact it produces 0. As + per Intel docs this bit has no meaning anyway. Since PUSHF is the + only way to observe eflags[1], a proper fix would be to make that + bit be set by PUSHF. + + This module uses global variables and so is not MT-safe (if that + should ever become relevant). + */ + + /* Notes re address size overrides (0x67). + + According to the AMD documentation (24594 Rev 3.09, Sept 2003, + "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose + and System Instructions"), Section 1.2.3 ("Address-Size Override + Prefix"): + + 0x67 applies to all explicit memory references, causing the top + 32 bits of the effective address to become zero. + + 0x67 has no effect on stack references (push/pop); these always + use a 64-bit address. + + 0x67 changes the interpretation of instructions which implicitly + reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used + instead. These are: + + cmp{s,sb,sw,sd,sq} + in{s,sb,sw,sd} + jcxz, jecxz, jrcxz + lod{s,sb,sw,sd,sq} + loop{,e,bz,be,z} + mov{s,sb,sw,sd,sq} + out{s,sb,sw,sd} + rep{,e,ne,nz} + sca{s,sb,sw,sd,sq} + sto{s,sb,sw,sd,sq} + xlat{,b} */ + + /* "Special" instructions. + + This instruction decoder can decode three special instructions + which mean nothing natively (are no-ops as far as regs/mem are + concerned) but have meaning for supporting Valgrind. A special + instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D + 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq + $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). + Following that, one of the following 3 are allowed (standard + interpretation in parentheses): + + 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) + 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR + 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX + + Any other bytes following the 16-byte preamble are illegal and + constitute a failure in instruction decoding. This all assumes + that the preamble will never occur except in specific code + fragments designed for Valgrind to catch. + + No prefixes may precede a "Special" instruction. + */ + + /* casLE (implementation of lock-prefixed insns) and rep-prefixed + insns: the side-exit back to the start of the insn is done with + Ijk_Boring. This is quite wrong, it should be done with + Ijk_NoRedir, since otherwise the side exit, which is intended to + restart the instruction for whatever reason, could go somewhere + entirely else. Doing it right (with Ijk_NoRedir jumps) would make + no-redir jumps performance critical, at least for rep-prefixed + instructions, since all iterations thereof would involve such a + jump. It's not such a big deal with casLE since the side exit is + only taken if the CAS fails, that is, the location is contended, + which is relatively unlikely. + + Note also, the test for CAS success vs failure is done using + Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary + Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it + shouldn't definedness-check these comparisons. See + COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for + background/rationale. + */ + + /* LOCK prefixed instructions. These are translated using IR-level + CAS statements (IRCAS) and are believed to preserve atomicity, even + from the point of view of some other process racing against a + simulated one (presumably they communicate via a shared memory + segment). + + Handlers which are aware of LOCK prefixes are: + dis_op2_G_E (add, or, adc, sbb, and, sub, xor) + dis_cmpxchg_G_E (cmpxchg) + dis_Grp1 (add, or, adc, sbb, and, sub, xor) + dis_Grp3 (not, neg) + dis_Grp4 (inc, dec) + dis_Grp5 (inc, dec) + dis_Grp8_Imm (bts, btc, btr) + dis_bt_G_E (bts, btc, btr) + dis_xadd_G_E (xadd) + */ + + + #include "libvex_basictypes.h" + #include "libvex_ir.h" + #include "libvex.h" + #include "libvex_guest_amd64.h" + + #include "main_util.h" + #include "main_globals.h" + #include "guest_generic_bb_to_IR.h" + #include "guest_generic_x87.h" + #include "guest_amd64_defs.h" + + + /*------------------------------------------------------------*/ + /*--- Globals ---*/ + /*------------------------------------------------------------*/ + + /* These are set at the start of the translation of an insn, right + down in disInstr_AMD64, so that we don't have to pass them around + endlessly. They are all constant during the translation of any + given insn. */ + + /* These are set at the start of the translation of a BB, so + that we don't have to pass them around endlessly. */ + + /* We need to know this to do sub-register accesses correctly. */ + static Bool host_is_bigendian; + + /* Pointer to the guest code area (points to start of BB, not to the + insn being processed). */ + static UChar* guest_code; + + /* The guest address corresponding to guest_code[0]. */ + static Addr64 guest_RIP_bbstart; + + /* The guest address for the instruction currently being + translated. */ + static Addr64 guest_RIP_curr_instr; + + /* The IRSB* into which we're generating code. */ + static IRSB* irsb; + + /* For ensuring that %rip-relative addressing is done right. A read + of %rip generates the address of the next instruction. It may be + that we don't conveniently know that inside disAMode(). For sanity + checking, if the next insn %rip is needed, we make a guess at what + it is, record that guess here, and set the accompanying Bool to + indicate that -- after this insn's decode is finished -- that guess + needs to be checked. */ + + /* At the start of each insn decode, is set to (0, False). + After the decode, if _mustcheck is now True, _assumed is + checked. */ + + static Addr64 guest_RIP_next_assumed; + static Bool guest_RIP_next_mustcheck; + + + /*------------------------------------------------------------*/ + /*--- Helpers for constructing IR. ---*/ + /*------------------------------------------------------------*/ + + /* Generate a new temporary of the given type. */ + static IRTemp newTemp ( IRType ty ) + { + vassert(isPlausibleIRType(ty)); + return newIRTemp( irsb->tyenv, ty ); + } + + /* Add a statement to the list held by "irsb". */ + static void stmt ( IRStmt* st ) + { + addStmtToIRSB( irsb, st ); + } + + /* Generate a statement "dst := e". */ + static void assign ( IRTemp dst, IRExpr* e ) + { + stmt( IRStmt_WrTmp(dst, e) ); + } + + static IRExpr* unop ( IROp op, IRExpr* a ) + { + return IRExpr_Unop(op, a); + } + + static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) + { + return IRExpr_Binop(op, a1, a2); + } + + static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) + { + return IRExpr_Triop(op, a1, a2, a3); + } + + static IRExpr* mkexpr ( IRTemp tmp ) + { + return IRExpr_RdTmp(tmp); + } + + static IRExpr* mkU8 ( ULong i ) + { + vassert(i < 256); + return IRExpr_Const(IRConst_U8( (UChar)i )); + } + + static IRExpr* mkU16 ( ULong i ) + { + vassert(i < 0x10000ULL); + return IRExpr_Const(IRConst_U16( (UShort)i )); + } + + static IRExpr* mkU32 ( ULong i ) + { + vassert(i < 0x100000000ULL); + return IRExpr_Const(IRConst_U32( (UInt)i )); + } + + static IRExpr* mkU64 ( ULong i ) + { + return IRExpr_Const(IRConst_U64(i)); + } + + static IRExpr* mkU ( IRType ty, ULong i ) + { + switch (ty) { + case Ity_I8: return mkU8(i); + case Ity_I16: return mkU16(i); + case Ity_I32: return mkU32(i); + case Ity_I64: return mkU64(i); + default: vpanic("mkU(amd64)"); + } + } + + static void storeLE ( IRExpr* addr, IRExpr* data ) + { + stmt( IRStmt_Store(Iend_LE, IRTemp_INVALID, addr, data) ); + } + + static IRExpr* loadLE ( IRType ty, IRExpr* data ) + { + return IRExpr_Load(False, Iend_LE, ty, data); + } + + static IROp mkSizedOp ( IRType ty, IROp op8 ) + { + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 + || op8 == Iop_Mul8 + || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 + || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 + || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 + || op8 == Iop_CasCmpNE8 + || op8 == Iop_Not8 ); + switch (ty) { + case Ity_I8: return 0 +op8; + case Ity_I16: return 1 +op8; + case Ity_I32: return 2 +op8; + case Ity_I64: return 3 +op8; + default: vpanic("mkSizedOp(amd64)"); + } + } + + static + IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) + { + if (szSmall == 1 && szBig == 4) { + return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); + } + if (szSmall == 1 && szBig == 2) { + return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); + } + if (szSmall == 2 && szBig == 4) { + return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); + } + if (szSmall == 1 && szBig == 8 && !signd) { + return unop(Iop_8Uto64, src); + } + if (szSmall == 1 && szBig == 8 && signd) { + return unop(Iop_8Sto64, src); + } + if (szSmall == 2 && szBig == 8 && !signd) { + return unop(Iop_16Uto64, src); + } + if (szSmall == 2 && szBig == 8 && signd) { + return unop(Iop_16Sto64, src); + } + vpanic("doScalarWidening(amd64)"); + } + + + + /*------------------------------------------------------------*/ + /*--- Debugging output ---*/ + /*------------------------------------------------------------*/ + + /* Bomb out if we can't handle something. */ + __attribute__ ((noreturn)) + static void unimplemented ( HChar* str ) + { + vex_printf("amd64toIR: unimplemented feature\n"); + vpanic(str); + } + + #define DIP(format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_printf(format, ## args) + + #define DIS(buf, format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_sprintf(buf, format, ## args) + + + /*------------------------------------------------------------*/ + /*--- Offsets of various parts of the amd64 guest state. ---*/ + /*------------------------------------------------------------*/ + + #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) + #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) + #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) + #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) + #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) + #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) + #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) + #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) + #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) + #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) + #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) + #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) + #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) + #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) + #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) + #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) + + #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) + + #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO) + #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60) + + #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) + #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) + #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) + #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) + + #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) + #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) + #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) + #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) + #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) + #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) + #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) + //.. + //.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS) + //.. #define OFFB_DS offsetof(VexGuestX86State,guest_DS) + //.. #define OFFB_ES offsetof(VexGuestX86State,guest_ES) + //.. #define OFFB_FS offsetof(VexGuestX86State,guest_FS) + //.. #define OFFB_GS offsetof(VexGuestX86State,guest_GS) + //.. #define OFFB_SS offsetof(VexGuestX86State,guest_SS) + //.. #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) + //.. #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) + + #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) + #define OFFB_XMM0 offsetof(VexGuestAMD64State,guest_XMM0) + #define OFFB_XMM1 offsetof(VexGuestAMD64State,guest_XMM1) + #define OFFB_XMM2 offsetof(VexGuestAMD64State,guest_XMM2) + #define OFFB_XMM3 offsetof(VexGuestAMD64State,guest_XMM3) + #define OFFB_XMM4 offsetof(VexGuestAMD64State,guest_XMM4) + #define OFFB_XMM5 offsetof(VexGuestAMD64State,guest_XMM5) + #define OFFB_XMM6 offsetof(VexGuestAMD64State,guest_XMM6) + #define OFFB_XMM7 offsetof(VexGuestAMD64State,guest_XMM7) + #define OFFB_XMM8 offsetof(VexGuestAMD64State,guest_XMM8) + #define OFFB_XMM9 offsetof(VexGuestAMD64State,guest_XMM9) + #define OFFB_XMM10 offsetof(VexGuestAMD64State,guest_XMM10) + #define OFFB_XMM11 offsetof(VexGuestAMD64State,guest_XMM11) + #define OFFB_XMM12 offsetof(VexGuestAMD64State,guest_XMM12) + #define OFFB_XMM13 offsetof(VexGuestAMD64State,guest_XMM13) + #define OFFB_XMM14 offsetof(VexGuestAMD64State,guest_XMM14) + #define OFFB_XMM15 offsetof(VexGuestAMD64State,guest_XMM15) + + #define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN) + #define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART) + #define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN) + + #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) + + + /*------------------------------------------------------------*/ + /*--- Helper bits and pieces for deconstructing the ---*/ + /*--- amd64 insn stream. ---*/ + /*------------------------------------------------------------*/ + + /* This is the AMD64 register encoding -- integer regs. */ + #define R_RAX 0 + #define R_RCX 1 + #define R_RDX 2 + #define R_RBX 3 + #define R_RSP 4 + #define R_RBP 5 + #define R_RSI 6 + #define R_RDI 7 + #define R_R8 8 + #define R_R9 9 + #define R_R10 10 + #define R_R11 11 + #define R_R12 12 + #define R_R13 13 + #define R_R14 14 + #define R_R15 15 + + //.. #define R_AL (0+R_EAX) + //.. #define R_AH (4+R_EAX) + + /* This is the Intel register encoding -- segment regs. */ + #define R_ES 0 + #define R_CS 1 + #define R_SS 2 + #define R_DS 3 + #define R_FS 4 + #define R_GS 5 + + + /* Various simple conversions */ + + static ULong extend_s_8to64 ( UChar x ) + { + return (ULong)((((Long)x) << 56) >> 56); + } + + static ULong extend_s_16to64 ( UShort x ) + { + return (ULong)((((Long)x) << 48) >> 48); + } + + static ULong extend_s_32to64 ( UInt x ) + { + return (ULong)((((Long)x) << 32) >> 32); + } + + /* Figure out whether the mod and rm parts of a modRM byte refer to a + register or memory. If so, the byte will have the form 11XXXYYY, + where YYY is the register number. */ + inline + static Bool epartIsReg ( UChar mod_reg_rm ) + { + return toBool(0xC0 == (mod_reg_rm & 0xC0)); + } + + /* Extract the 'g' field from a modRM byte. This only produces 3 + bits, which is not a complete register number. You should avoid + this function if at all possible. */ + inline + static Int gregLO3ofRM ( UChar mod_reg_rm ) + { + return (Int)( (mod_reg_rm >> 3) & 7 ); + } + + /* Ditto the 'e' field of a modRM byte. */ + inline + static Int eregLO3ofRM ( UChar mod_reg_rm ) + { + return (Int)(mod_reg_rm & 0x7); + } + + /* Get a 8/16/32-bit unsigned value out of the insn stream. */ + + static UChar getUChar ( Long delta ) + { + UChar v = guest_code[delta+0]; + return v; + } + + static UInt getUDisp16 ( Long delta ) + { + UInt v = guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return v & 0xFFFF; + } + + //.. static UInt getUDisp ( Int size, Long delta ) + //.. { + //.. switch (size) { + //.. case 4: return getUDisp32(delta); + //.. case 2: return getUDisp16(delta); + //.. case 1: return getUChar(delta); + //.. default: vpanic("getUDisp(x86)"); + //.. } + //.. return 0; /*notreached*/ + //.. } + + + /* Get a byte value out of the insn stream and sign-extend to 64 + bits. */ + static Long getSDisp8 ( Long delta ) + { + return extend_s_8to64( guest_code[delta] ); + } + + /* Get a 16-bit value out of the insn stream and sign-extend to 64 + bits. */ + static Long getSDisp16 ( Long delta ) + { + UInt v = guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return extend_s_16to64( (UShort)v ); + } + + /* Get a 32-bit value out of the insn stream and sign-extend to 64 + bits. */ + static Long getSDisp32 ( Long delta ) + { + UInt v = guest_code[delta+3]; v <<= 8; + v |= guest_code[delta+2]; v <<= 8; + v |= guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return extend_s_32to64( v ); + } + + /* Get a 64-bit value out of the insn stream. */ + static Long getDisp64 ( Long delta ) + { + ULong v = 0; + v |= guest_code[delta+7]; v <<= 8; + v |= guest_code[delta+6]; v <<= 8; + v |= guest_code[delta+5]; v <<= 8; + v |= guest_code[delta+4]; v <<= 8; + v |= guest_code[delta+3]; v <<= 8; + v |= guest_code[delta+2]; v <<= 8; + v |= guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return v; + } + + /* Note: because AMD64 doesn't allow 64-bit literals, it is an error + if this is called with size==8. Should not happen. */ + static Long getSDisp ( Int size, Long delta ) + { + switch (size) { + case 4: return getSDisp32(delta); + case 2: return getSDisp16(delta); + case 1: return getSDisp8(delta); + default: vpanic("getSDisp(amd64)"); + } + } + + static ULong mkSizeMask ( Int sz ) + { + switch (sz) { + case 1: return 0x00000000000000FFULL; + case 2: return 0x000000000000FFFFULL; + case 4: return 0x00000000FFFFFFFFULL; + case 8: return 0xFFFFFFFFFFFFFFFFULL; + default: vpanic("mkSzMask(amd64)"); + } + } + + static Int imin ( Int a, Int b ) + { + return (a < b) ? a : b; + } + + static IRType szToITy ( Int n ) + { + switch (n) { + case 1: return Ity_I8; + case 2: return Ity_I16; + case 4: return Ity_I32; + case 8: return Ity_I64; + default: vex_printf("\nszToITy(%d)\n", n); + vpanic("szToITy(amd64)"); + } + } + + + /*------------------------------------------------------------*/ + /*--- For dealing with prefixes. ---*/ + /*------------------------------------------------------------*/ + + /* The idea is to pass around an int holding a bitmask summarising + info from the prefixes seen on the current instruction, including + info from the REX byte. This info is used in various places, but + most especially when making sense of register fields in + instructions. + + The top 16 bits of the prefix are 0x3141, just as a hacky way + to ensure it really is a valid prefix. + + Things you can safely assume about a well-formed prefix: + * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. + * if REX is not present then REXW,REXR,REXX,REXB will read + as zero. + * F2 and F3 will not both be 1. + */ + + typedef UInt Prefix; + + #define PFX_ASO (1<<0) /* address-size override present (0x67) */ + #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ + #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ + #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ + #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ + #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ + #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ + #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ + #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ + #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ + #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ + #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ + #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ + #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ + #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ + #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ + + #define PFX_EMPTY 0x31410000 + + static Bool IS_VALID_PFX ( Prefix pfx ) { + return toBool((pfx & 0xFFFF0000) == PFX_EMPTY); + } + + static Bool haveREX ( Prefix pfx ) { + return toBool(pfx & PFX_REX); + } + + static Int getRexW ( Prefix pfx ) { + return (pfx & PFX_REXW) ? 1 : 0; + } + /* Apparently unused. + static Int getRexR ( Prefix pfx ) { + return (pfx & PFX_REXR) ? 1 : 0; + } + */ + static Int getRexX ( Prefix pfx ) { + return (pfx & PFX_REXX) ? 1 : 0; + } + static Int getRexB ( Prefix pfx ) { + return (pfx & PFX_REXB) ? 1 : 0; + } + + /* Check a prefix doesn't have F2 or F3 set in it, since usually that + completely changes what instruction it really is. */ + static Bool haveF2orF3 ( Prefix pfx ) { + return toBool((pfx & (PFX_F2|PFX_F3)) > 0); + } + static Bool haveF2 ( Prefix pfx ) { + return toBool((pfx & PFX_F2) > 0); + } + static Bool haveF3 ( Prefix pfx ) { + return toBool((pfx & PFX_F3) > 0); + } + + static Bool have66 ( Prefix pfx ) { + return toBool((pfx & PFX_66) > 0); + } + static Bool haveASO ( Prefix pfx ) { + return toBool((pfx & PFX_ASO) > 0); + } + + /* Return True iff pfx has 66 set and F2 and F3 clear */ + static Bool have66noF2noF3 ( Prefix pfx ) + { + return + toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); + } + + /* Return True iff pfx has F2 set and 66 and F3 clear */ + static Bool haveF2no66noF3 ( Prefix pfx ) + { + return + toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); + } + + /* Return True iff pfx has F3 set and 66 and F2 clear */ + static Bool haveF3no66noF2 ( Prefix pfx ) + { + return + toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); + } + + /* Return True iff pfx has 66, F2 and F3 clear */ + static Bool haveNo66noF2noF3 ( Prefix pfx ) + { + return + toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); + } + + /* Return True iff pfx has any of 66, F2 and F3 set */ + static Bool have66orF2orF3 ( Prefix pfx ) + { + return toBool( ! haveNo66noF2noF3(pfx) ); + } + + /* Return True iff pfx has 66 or F2 set */ + static Bool have66orF2 ( Prefix pfx ) + { + return toBool((pfx & (PFX_66|PFX_F2)) > 0); + } + + /* Clear all the segment-override bits in a prefix. */ + static Prefix clearSegBits ( Prefix p ) + { + return + p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); + } + + + /*------------------------------------------------------------*/ + /*--- For dealing with integer registers ---*/ + /*------------------------------------------------------------*/ + + /* This is somewhat complex. The rules are: + + For 64, 32 and 16 bit register references, the e or g fields in the + modrm bytes supply the low 3 bits of the register number. The + fourth (most-significant) bit of the register number is supplied by + the REX byte, if it is present; else that bit is taken to be zero. + + The REX.R bit supplies the high bit corresponding to the g register + field, and the REX.B bit supplies the high bit corresponding to the + e register field (when the mod part of modrm indicates that modrm's + e component refers to a register and not to memory). + + The REX.X bit supplies a high register bit for certain registers + in SIB address modes, and is generally rarely used. + + For 8 bit register references, the presence of the REX byte itself + has significance. If there is no REX present, then the 3-bit + number extracted from the modrm e or g field is treated as an index + into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the + old x86 encoding scheme. + + But if there is a REX present, the register reference is + interpreted in the same way as for 64/32/16-bit references: a high + bit is extracted from REX, giving a 4-bit number, and the denoted + register is the lowest 8 bits of the 16 integer registers denoted + by the number. In particular, values 3 through 7 of this sequence + do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of + %rsp %rbp %rsi %rdi. + + The REX.W bit has no bearing at all on register numbers. Instead + its presence indicates that the operand size is to be overridden + from its default value (32 bits) to 64 bits instead. This is in + the same fashion that an 0x66 prefix indicates the operand size is + to be overridden from 32 bits down to 16 bits. When both REX.W and + 0x66 are present there is a conflict, and REX.W takes precedence. + + Rather than try to handle this complexity using a single huge + function, several smaller ones are provided. The aim is to make it + as difficult as possible to screw up register decoding in a subtle + and hard-to-track-down way. + + Because these routines fish around in the host's memory (that is, + in the guest state area) for sub-parts of guest registers, their + correctness depends on the host's endianness. So far these + routines only work for little-endian hosts. Those for which + endianness is important have assertions to ensure sanity. + */ + + + /* About the simplest question you can ask: where do the 64-bit + integer registers live (in the guest state) ? */ + + static Int integerGuestReg64Offset ( UInt reg ) + { + switch (reg) { + case R_RAX: return OFFB_RAX; + case R_RCX: return OFFB_RCX; + case R_RDX: return OFFB_RDX; + case R_RBX: return OFFB_RBX; + case R_RSP: return OFFB_RSP; + case R_RBP: return OFFB_RBP; + case R_RSI: return OFFB_RSI; + case R_RDI: return OFFB_RDI; + case R_R8: return OFFB_R8; + case R_R9: return OFFB_R9; + case R_R10: return OFFB_R10; + case R_R11: return OFFB_R11; + case R_R12: return OFFB_R12; + case R_R13: return OFFB_R13; + case R_R14: return OFFB_R14; + case R_R15: return OFFB_R15; + default: vpanic("integerGuestReg64Offset(amd64)"); + } + } + + + /* Produce the name of an integer register, for printing purposes. + reg is a number in the range 0 .. 15 that has been generated from a + 3-bit reg-field number and a REX extension bit. irregular denotes + the case where sz==1 and no REX byte is present. */ + + static + HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) + { + static HChar* ireg64_names[16] + = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; + static HChar* ireg32_names[16] + = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", + "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; + static HChar* ireg16_names[16] + = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", + "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; + static HChar* ireg8_names[16] + = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", + "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; + static HChar* ireg8_irregular[8] + = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; + + vassert(reg < 16); + if (sz == 1) { + if (irregular) + vassert(reg < 8); + } else { + vassert(irregular == False); + } + + switch (sz) { + case 8: return ireg64_names[reg]; + case 4: return ireg32_names[reg]; + case 2: return ireg16_names[reg]; + case 1: if (irregular) { + return ireg8_irregular[reg]; + } else { + return ireg8_names[reg]; + } + default: vpanic("nameIReg(amd64)"); + } + } + + /* Using the same argument conventions as nameIReg, produce the + guest state offset of an integer register. */ + + static + Int offsetIReg ( Int sz, UInt reg, Bool irregular ) + { + vassert(reg < 16); + if (sz == 1) { + if (irregular) + vassert(reg < 8); + } else { + vassert(irregular == False); + } + + /* Deal with irregular case -- sz==1 and no REX present */ + if (sz == 1 && irregular) { + switch (reg) { + case R_RSP: return 1+ OFFB_RAX; + case R_RBP: return 1+ OFFB_RCX; + case R_RSI: return 1+ OFFB_RDX; + case R_RDI: return 1+ OFFB_RBX; + default: break; /* use the normal case */ + } + } + + /* Normal case */ + return integerGuestReg64Offset(reg); + } + + + /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ + + static IRExpr* getIRegCL ( void ) + { + vassert(!host_is_bigendian); + return IRExpr_Get( OFFB_RCX, Ity_I8 ); + } + + + /* Write to the %AH register. */ + + static void putIRegAH ( IRExpr* e ) + { + vassert(!host_is_bigendian); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); + stmt( IRStmt_Put( OFFB_RAX+1, e ) ); + } + + + /* Read/write various widths of %RAX, as it has various + special-purpose uses. */ + + static HChar* nameIRegRAX ( Int sz ) + { + switch (sz) { + case 1: return "%al"; + case 2: return "%ax"; + case 4: return "%eax"; + case 8: return "%rax"; + default: vpanic("nameIRegRAX(amd64)"); + } + } + + static IRExpr* getIRegRAX ( Int sz ) + { + vassert(!host_is_bigendian); + switch (sz) { + case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); + case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); + case 4: return IRExpr_Get( OFFB_RAX, Ity_I32 ); + case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); + default: vpanic("getIRegRAX(amd64)"); + } + } + + static void putIRegRAX ( Int sz, IRExpr* e ) + { + IRType ty = typeOfIRExpr(irsb->tyenv, e); + vassert(!host_is_bigendian); + switch (sz) { + case 8: vassert(ty == Ity_I64); + stmt( IRStmt_Put( OFFB_RAX, e )); + break; + case 4: vassert(ty == Ity_I32); + stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); + break; + case 2: vassert(ty == Ity_I16); + stmt( IRStmt_Put( OFFB_RAX, e )); + break; + case 1: vassert(ty == Ity_I8); + stmt( IRStmt_Put( OFFB_RAX, e )); + break; + default: vpanic("putIRegRAX(amd64)"); + } + } + + + /* Read/write various widths of %RDX, as it has various + special-purpose uses. */ + + static HChar* nameIRegRDX ( Int sz ) + { + switch (sz) { + case 1: return "%dl"; + case 2: return "%dx"; + case 4: return "%edx"; + case 8: return "%rdx"; + default: vpanic("nameIRegRDX(amd64)"); + } + } + + static IRExpr* getIRegRDX ( Int sz ) + { + vassert(!host_is_bigendian); + switch (sz) { + case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); + case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); + case 4: return IRExpr_Get( OFFB_RDX, Ity_I32 ); + case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); + default: vpanic("getIRegRDX(amd64)"); + } + } + + static void putIRegRDX ( Int sz, IRExpr* e ) + { + vassert(!host_is_bigendian); + vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); + switch (sz) { + case 8: stmt( IRStmt_Put( OFFB_RDX, e )); + break; + case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); + break; + case 2: stmt( IRStmt_Put( OFFB_RDX, e )); + break; + case 1: stmt( IRStmt_Put( OFFB_RDX, e )); + break; + default: vpanic("putIRegRDX(amd64)"); + } + } + + + /* Simplistic functions to deal with the integer registers as a + straightforward bank of 16 64-bit regs. */ + + static IRExpr* getIReg64 ( UInt regno ) + { + return IRExpr_Get( integerGuestReg64Offset(regno), + Ity_I64 ); + } + + static void putIReg64 ( UInt regno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); + stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); + } + + static HChar* nameIReg64 ( UInt regno ) + { + return nameIReg( 8, regno, False ); + } + + + /* Simplistic functions to deal with the lower halves of integer + registers as a straightforward bank of 16 32-bit regs. */ + + static IRExpr* getIReg32 ( UInt regno ) + { + vassert(!host_is_bigendian); + return IRExpr_Get( integerGuestReg64Offset(regno), + Ity_I32 ); + } + + static void putIReg32 ( UInt regno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); + stmt( IRStmt_Put( integerGuestReg64Offset(regno), + unop(Iop_32Uto64,e) ) ); + } + + static HChar* nameIReg32 ( UInt regno ) + { + return nameIReg( 4, regno, False ); + } + + + /* Simplistic functions to deal with the lower quarters of integer + registers as a straightforward bank of 16 16-bit regs. */ + + static IRExpr* getIReg16 ( UInt regno ) + { + vassert(!host_is_bigendian); + return IRExpr_Get( integerGuestReg64Offset(regno), + Ity_I16 ); + } + + static HChar* nameIReg16 ( UInt regno ) + { + return nameIReg( 2, regno, False ); + } + + + /* Sometimes what we know is a 3-bit register number, a REX byte, and + which field of the REX byte is to be used to extend to a 4-bit + number. These functions cater for that situation. + */ + static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) + { + vassert(lo3bits < 8); + vassert(IS_VALID_PFX(pfx)); + return getIReg64( lo3bits | (getRexX(pfx) << 3) ); + } + + static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) + { + vassert(lo3bits < 8); + vassert(IS_VALID_PFX(pfx)); + return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); + } + + static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) + { + vassert(lo3bits < 8); + vassert(IS_VALID_PFX(pfx)); + vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); + return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), + toBool(sz==1 && !haveREX(pfx)) ); + } + + static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) + { + vassert(lo3bits < 8); + vassert(IS_VALID_PFX(pfx)); + vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); + return IRExpr_Get( + offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), + toBool(sz==1 && !haveREX(pfx)) ), + szToITy(sz) + ); + } + + static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) + { + vassert(lo3bits < 8); + vassert(IS_VALID_PFX(pfx)); + vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); + vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); + stmt( IRStmt_Put( + offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), + toBool(sz==1 && !haveREX(pfx)) ), + sz==4 ? unop(Iop_32Uto64,e) : e + )); + } + + + /* Functions for getting register numbers from modrm bytes and REX + when we don't have to consider the complexities of integer subreg + accesses. + */ + /* Extract the g reg field from a modRM byte, and augment it using the + REX.R bit from the supplied REX byte. The R bit usually is + associated with the g register field. + */ + static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) + { + Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); + reg += (pfx & PFX_REXR) ? 8 : 0; + return reg; + } + + /* Extract the e reg field from a modRM byte, and augment it using the + REX.B bit from the supplied REX byte. The B bit usually is + associated with the e register field (when modrm indicates e is a + register, that is). + */ + static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) + { + Int rm; + vassert(epartIsReg(mod_reg_rm)); + rm = (Int)(mod_reg_rm & 0x7); + rm += (pfx & PFX_REXB) ? 8 : 0; + return rm; + } + + + /* General functions for dealing with integer register access. */ + + /* Produce the guest state offset for a reference to the 'g' register + field in a modrm byte, taking into account REX (or its absence), + and the size of the access. + */ + static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) + { + UInt reg; + vassert(!host_is_bigendian); + vassert(IS_VALID_PFX(pfx)); + vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); + reg = gregOfRexRM( pfx, mod_reg_rm ); + return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); + } + + static + IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) + { + return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), + szToITy(sz) ); + } + + static + void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); + if (sz == 4) { + e = unop(Iop_32Uto64,e); + } + stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); + } + + static + HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) + { + return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), + toBool(sz==1 && !haveREX(pfx)) ); + } + + + /* Produce the guest state offset for a reference to the 'e' register + field in a modrm byte, taking into account REX (or its absence), + and the size of the access. eregOfRexRM will assert if mod_reg_rm + denotes a memory access rather than a register access. + */ + static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) + { + UInt reg; + vassert(!host_is_bigendian); + vassert(IS_VALID_PFX(pfx)); + vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); + reg = eregOfRexRM( pfx, mod_reg_rm ); + return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); + } + + static + IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) + { + return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), + szToITy(sz) ); + } + + static + void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); + if (sz == 4) { + e = unop(Iop_32Uto64,e); + } + stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); + } + + static + HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) + { + return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), + toBool(sz==1 && !haveREX(pfx)) ); + } + + + /*------------------------------------------------------------*/ + /*--- For dealing with XMM registers ---*/ + /*------------------------------------------------------------*/ + + //.. static Int segmentGuestRegOffset ( UInt sreg ) + //.. { + //.. switch (sreg) { + //.. case R_ES: return OFFB_ES; + //.. case R_CS: return OFFB_CS; + //.. case R_SS: return OFFB_SS; + //.. case R_DS: return OFFB_DS; + //.. case R_FS: return OFFB_FS; + //.. case R_GS: return OFFB_GS; + //.. default: vpanic("segmentGuestRegOffset(x86)"); + //.. } + //.. } + + static Int xmmGuestRegOffset ( UInt xmmreg ) + { + switch (xmmreg) { + case 0: return OFFB_XMM0; + case 1: return OFFB_XMM1; + case 2: return OFFB_XMM2; + case 3: return OFFB_XMM3; + case 4: return OFFB_XMM4; + case 5: return OFFB_XMM5; + case 6: return OFFB_XMM6; + case 7: return OFFB_XMM7; + case 8: return OFFB_XMM8; + case 9: return OFFB_XMM9; + case 10: return OFFB_XMM10; + case 11: return OFFB_XMM11; + case 12: return OFFB_XMM12; + case 13: return OFFB_XMM13; + case 14: return OFFB_XMM14; + case 15: return OFFB_XMM15; + default: vpanic("xmmGuestRegOffset(amd64)"); + } + } + + /* Lanes of vector registers are always numbered from zero being the + least significant lane (rightmost in the register). */ + + static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) + { + /* Correct for little-endian host only. */ + vassert(!host_is_bigendian); + vassert(laneno >= 0 && laneno < 8); + return xmmGuestRegOffset( xmmreg ) + 2 * laneno; + } + + static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) + { + /* Correct for little-endian host only. */ + vassert(!host_is_bigendian); + vassert(laneno >= 0 && laneno < 4); + return xmmGuestRegOffset( xmmreg ) + 4 * laneno; + } + + static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) + { + /* Correct for little-endian host only. */ + vassert(!host_is_bigendian); + vassert(laneno >= 0 && laneno < 2); + return xmmGuestRegOffset( xmmreg ) + 8 * laneno; + } + + //.. static IRExpr* getSReg ( UInt sreg ) + //.. { + //.. return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); + //.. } + //.. + //.. static void putSReg ( UInt sreg, IRExpr* e ) + //.. { + //.. vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); + //.. stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); + //.. } + + static IRExpr* getXMMReg ( UInt xmmreg ) + { + return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); + } + + static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); + } + + static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); + } + + static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); + } + + static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); + } + + static void putXMMReg ( UInt xmmreg, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); + stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); + } + + static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); + stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); + stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); + stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); + stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); + stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); + } + + static IRExpr* mkV128 ( UShort mask ) + { + return IRExpr_Const(IRConst_V128(mask)); + } + + static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) + { + vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); + vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); + return unop(Iop_64to1, + binop(Iop_And64, + unop(Iop_1Uto64,x), + unop(Iop_1Uto64,y))); + } + + /* Generate a compare-and-swap operation, operating on memory at + 'addr'. The expected value is 'expVal' and the new value is + 'newVal'. If the operation fails, then transfer control (with a + no-redir jump (XXX no -- see comment at top of this file)) to + 'restart_point', which is presumably the address of the guest + instruction again -- retrying, essentially. */ + static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, + Addr64 restart_point ) + { + IRCAS* cas; + IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); + IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); + IRTemp oldTmp = newTemp(tyE); + IRTemp expTmp = newTemp(tyE); + vassert(tyE == tyN); + vassert(tyE == Ity_I64 || tyE == Ity_I32 + || tyE == Ity_I16 || tyE == Ity_I8); + assign(expTmp, expVal); + cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, + NULL, mkexpr(expTmp), NULL, newVal ); + stmt( IRStmt_CAS(cas) ); + stmt( IRStmt_Exit( + binop( mkSizedOp(tyE,Iop_CasCmpNE8), + mkexpr(oldTmp), mkexpr(expTmp) ), + Ijk_Boring, /*Ijk_NoRedir*/ + IRConst_U64( restart_point ) + )); + } + + + /*------------------------------------------------------------*/ + /*--- Helpers for %rflags. ---*/ + /*------------------------------------------------------------*/ + + /* -------------- Evaluating the flags-thunk. -------------- */ + + /* Build IR to calculate all the eflags from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: + Ity_I64. */ + static IRExpr* mk_amd64g_calculate_rflags_all ( void ) + { + IRExpr** args + = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), + IRExpr_Get(OFFB_CC_DEP1, Ity_I64), + IRExpr_Get(OFFB_CC_DEP2, Ity_I64), + IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); + IRExpr* call + = mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, + args + ); + /* Exclude OP and NDEP from definedness checking. We're only + interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); + return call; + } + + /* Build IR to calculate some particular condition from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: + Ity_Bit. */ + static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) + { + IRExpr** args + = mkIRExprVec_5( mkU64(cond), + IRExpr_Get(OFFB_CC_OP, Ity_I64), + IRExpr_Get(OFFB_CC_DEP1, Ity_I64), + IRExpr_Get(OFFB_CC_DEP2, Ity_I64), + IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); + IRExpr* call + = mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + "amd64g_calculate_condition", &amd64g_calculate_condition, + args + ); + /* Exclude the requested condition, OP and NDEP from definedness + checking. We're only interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); + return unop(Iop_64to1, call); + } + + /* Build IR to calculate just the carry flag from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ + static IRExpr* mk_amd64g_calculate_rflags_c ( void ) + { + IRExpr** args + = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), + IRExpr_Get(OFFB_CC_DEP1, Ity_I64), + IRExpr_Get(OFFB_CC_DEP2, Ity_I64), + IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); + IRExpr* call + = mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, + args + ); + /* Exclude OP and NDEP from definedness checking. We're only + interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); + return call; + } + + + /* -------------- Building the flags-thunk. -------------- */ + + /* The machinery in this section builds the flag-thunk following a + flag-setting operation. Hence the various setFlags_* functions. + */ + + static Bool isAddSub ( IROp op8 ) + { + return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); + } + + static Bool isLogic ( IROp op8 ) + { + return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); + } + + /* U-widen 8/16/32/64 bit int expr to 64. */ + static IRExpr* widenUto64 ( IRExpr* e ) + { + switch (typeOfIRExpr(irsb->tyenv,e)) { + case Ity_I64: return e; + case Ity_I32: return unop(Iop_32Uto64, e); + case Ity_I16: return unop(Iop_16Uto64, e); + case Ity_I8: return unop(Iop_8Uto64, e); + default: vpanic("widenUto64"); + } + } + + /* S-widen 8/16/32/64 bit int expr to 32. */ + static IRExpr* widenSto64 ( IRExpr* e ) + { + switch (typeOfIRExpr(irsb->tyenv,e)) { + case Ity_I64: return e; + case Ity_I32: return unop(Iop_32Sto64, e); + case Ity_I16: return unop(Iop_16Sto64, e); + case Ity_I8: return unop(Iop_8Sto64, e); + default: vpanic("widenSto64"); + } + } + + /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some + of these combinations make sense. */ + static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) + { + IRType src_ty = typeOfIRExpr(irsb->tyenv,e); + if (src_ty == dst_ty) + return e; + if (src_ty == Ity_I32 && dst_ty == Ity_I16) + return unop(Iop_32to16, e); + if (src_ty == Ity_I32 && dst_ty == Ity_I8) + return unop(Iop_32to8, e); + if (src_ty == Ity_I64 && dst_ty == Ity_I32) + return unop(Iop_64to32, e); + if (src_ty == Ity_I64 && dst_ty == Ity_I16) + return unop(Iop_64to16, e); + if (src_ty == Ity_I64 && dst_ty == Ity_I8) + return unop(Iop_64to8, e); + + vex_printf("\nsrc, dst tys are: "); + ppIRType(src_ty); + vex_printf(", "); + ppIRType(dst_ty); + vex_printf("\n"); + vpanic("narrowTo(amd64)"); + } + + + /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is + auto-sized up to the real op. */ + + static + void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) + { + Int ccOp = 0; + switch (ty) { + case Ity_I8: ccOp = 0; break; + case Ity_I16: ccOp = 1; break; + case Ity_I32: ccOp = 2; break; + case Ity_I64: ccOp = 3; break; + default: vassert(0); + } + switch (op8) { + case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; + case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; + default: ppIROp(op8); + vpanic("setFlags_DEP1_DEP2(amd64)"); + } + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); + } + + + /* Set the OP and DEP1 fields only, and write zero to DEP2. */ + + static + void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) + { + Int ccOp = 0; + switch (ty) { + case Ity_I8: ccOp = 0; break; + case Ity_I16: ccOp = 1; break; + case Ity_I32: ccOp = 2; break; + case Ity_I64: ccOp = 3; break; + default: vassert(0); + } + switch (op8) { + case Iop_Or8: + case Iop_And8: + case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; + default: ppIROp(op8); + vpanic("setFlags_DEP1(amd64)"); + } + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); + } + + + /* For shift operations, we put in the result and the undershifted + result. Except if the shift amount is zero, the thunk is left + unchanged. */ + + static void setFlags_DEP1_DEP2_shift ( IROp op64, + IRTemp res, + IRTemp resUS, + IRType ty, + IRTemp guard ) + { + Int ccOp = 0; + switch (ty) { + case Ity_I8: ccOp = 0; break; + case Ity_I16: ccOp = 1; break; + case Ity_I32: ccOp = 2; break; + case Ity_I64: ccOp = 3; break; + default: vassert(0); + } + + vassert(guard); + + /* Both kinds of right shifts are handled by the same thunk + operation. */ + switch (op64) { + case Iop_Shr64: + case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; + case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; + default: ppIROp(op64); + vpanic("setFlags_DEP1_DEP2_shift(amd64)"); + } + + /* DEP1 contains the result, DEP2 contains the undershifted value. */ + stmt( IRStmt_Put( OFFB_CC_OP, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_OP,Ity_I64), + mkU64(ccOp))) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_DEP1,Ity_I64), + widenUto64(mkexpr(res)))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_DEP2,Ity_I64), + widenUto64(mkexpr(resUS)))) ); + } + + + /* For the inc/dec case, we store in DEP1 the result value and in NDEP + the former value of the carry flag, which unfortunately we have to + compute. */ + + static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) + { + Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; + + switch (ty) { + case Ity_I8: ccOp += 0; break; + case Ity_I16: ccOp += 1; break; + case Ity_I32: ccOp += 2; break; + case Ity_I64: ccOp += 3; break; + default: vassert(0); + } + + /* This has to come first, because calculating the C flag + may require reading all four thunk fields. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); + } + + + /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the + two arguments. */ + + static + void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) + { + switch (ty) { + case Ity_I8: + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); + break; + case Ity_I16: + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); + break; + case Ity_I32: + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); + break; + case Ity_I64: + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); + break; + default: + vpanic("setFlags_MUL(amd64)"); + } + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); + } + + + /* -------------- Condition codes. -------------- */ + + /* Condition codes, using the AMD encoding. */ + + static HChar* name_AMD64Condcode ( AMD64Condcode cond ) + { + switch (cond) { + case AMD64CondO: return "o"; + case AMD64CondNO: return "no"; + case AMD64CondB: return "b"; + case AMD64CondNB: return "ae"; /*"nb";*/ + case AMD64CondZ: return "e"; /*"z";*/ + case AMD64CondNZ: return "ne"; /*"nz";*/ + case AMD64CondBE: return "be"; + case AMD64CondNBE: return "a"; /*"nbe";*/ + case AMD64CondS: return "s"; + case AMD64CondNS: return "ns"; + case AMD64CondP: return "p"; + case AMD64CondNP: return "np"; + case AMD64CondL: return "l"; + case AMD64CondNL: return "ge"; /*"nl";*/ + case AMD64CondLE: return "le"; + case AMD64CondNLE: return "g"; /*"nle";*/ + case AMD64CondAlways: return "ALWAYS"; + default: vpanic("name_AMD64Condcode"); + } + } + + static + AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, + /*OUT*/Bool* needInvert ) + { + vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); + if (cond & 1) { + *needInvert = True; + return cond-1; + } else { + *needInvert = False; + return cond; + } + } + + + /* -------------- Helpers for ADD/SUB with carry. -------------- */ + + /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags + appropriately. + + Optionally, generate a store for the 'tres' value. This can either + be a normal store, or it can be a cas-with-possible-failure style + store: + + if taddr is IRTemp_INVALID, then no store is generated. + + if taddr is not IRTemp_INVALID, then a store (using taddr as + the address) is generated: + + if texpVal is IRTemp_INVALID then a normal store is + generated, and restart_point must be zero (it is irrelevant). + + if texpVal is not IRTemp_INVALID then a cas-style store is + generated. texpVal is the expected value, restart_point + is the restart point if the store fails, and texpVal must + have the same type as tres. + + */ + static void helper_ADC ( Int sz, + IRTemp tres, IRTemp ta1, IRTemp ta2, + /* info about optional store: */ + IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) + { + UInt thunkOp; + IRType ty = szToITy(sz); + IRTemp oldc = newTemp(Ity_I64); + IRTemp oldcn = newTemp(ty); + IROp plus = mkSizedOp(ty, Iop_Add8); + IROp xor = mkSizedOp(ty, Iop_Xor8); + + vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); + + switch (sz) { + case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; + case 4: thunkOp = AMD64G_CC_OP_ADCL; break; + case 2: thunkOp = AMD64G_CC_OP_ADCW; break; + case 1: thunkOp = AMD64G_CC_OP_ADCB; break; + default: vassert(0); + } + + /* oldc = old carry flag, 0 or 1 */ + assign( oldc, binop(Iop_And64, + mk_amd64g_calculate_rflags_c(), + mkU64(1)) ); + + assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); + + assign( tres, binop(plus, + binop(plus,mkexpr(ta1),mkexpr(ta2)), + mkexpr(oldcn)) ); + + /* Possibly generate a store of 'tres' to 'taddr'. See comment at + start of this function. */ + if (taddr != IRTemp_INVALID) { + if (texpVal == IRTemp_INVALID) { + vassert(restart_point == 0); + storeLE( mkexpr(taddr), mkexpr(tres) ); + } else { + vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); + /* .. and hence 'texpVal' has the same type as 'tres'. */ + casLE( mkexpr(taddr), + mkexpr(texpVal), mkexpr(tres), restart_point ); + } + } + + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), + mkexpr(oldcn)) )) ); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); + } + + + /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags + appropriately. As with helper_ADC, possibly generate a store of + the result -- see comments on helper_ADC for details. + */ + static void helper_SBB ( Int sz, + IRTemp tres, IRTemp ta1, IRTemp ta2, + /* info about optional store: */ + IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) + { + UInt thunkOp; + IRType ty = szToITy(sz); + IRTemp oldc = newTemp(Ity_I64); + IRTemp oldcn = newTemp(ty); + IROp minus = mkSizedOp(ty, Iop_Sub8); + IROp xor = mkSizedOp(ty, Iop_Xor8); + + vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); + + switch (sz) { + case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; + case 4: thunkOp = AMD64G_CC_OP_SBBL; break; + case 2: thunkOp = AMD64G_CC_OP_SBBW; break; + case 1: thunkOp = AMD64G_CC_OP_SBBB; break; + default: vassert(0); + } + + /* oldc = old carry flag, 0 or 1 */ + assign( oldc, binop(Iop_And64, + mk_amd64g_calculate_rflags_c(), + mkU64(1)) ); + + assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); + + assign( tres, binop(minus, + binop(minus,mkexpr(ta1),mkexpr(ta2)), + mkexpr(oldcn)) ); + + /* Possibly generate a store of 'tres' to 'taddr'. See comment at + start of this function. */ + if (taddr != IRTemp_INVALID) { + if (texpVal == IRTemp_INVALID) { + vassert(restart_point == 0); + storeLE( mkexpr(taddr), mkexpr(tres) ); + } else { + vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); + /* .. and hence 'texpVal' has the same type as 'tres'. */ + casLE( mkexpr(taddr), + mkexpr(texpVal), mkexpr(tres), restart_point ); + } + } + + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), + mkexpr(oldcn)) )) ); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); + } + + + /* -------------- Helpers for disassembly printing. -------------- */ + + static HChar* nameGrp1 ( Int opc_aux ) + { + static HChar* grp1_names[8] + = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; + if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); + return grp1_names[opc_aux]; + } + + static HChar* nameGrp2 ( Int opc_aux ) + { + static HChar* grp2_names[8] + = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; + if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); + return grp2_names[opc_aux]; + } + + static HChar* nameGrp4 ( Int opc_aux ) + { + static HChar* grp4_names[8] + = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; + if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); + return grp4_names[opc_aux]; + } + + static HChar* nameGrp5 ( Int opc_aux ) + { + static HChar* grp5_names[8] + = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; + if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); + return grp5_names[opc_aux]; + } + + static HChar* nameGrp8 ( Int opc_aux ) + { + static HChar* grp8_names[8] + = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; + if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); + return grp8_names[opc_aux]; + } + + //.. static HChar* nameSReg ( UInt sreg ) + //.. { + //.. switch (sreg) { + //.. case R_ES: return "%es"; + //.. case R_CS: return "%cs"; + //.. case R_SS: return "%ss"; + //.. case R_DS: return "%ds"; + //.. case R_FS: return "%fs"; + //.. case R_GS: return "%gs"; + //.. default: vpanic("nameSReg(x86)"); + //.. } + //.. } + + static HChar* nameMMXReg ( Int mmxreg ) + { + static HChar* mmx_names[8] + = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; + if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); + return mmx_names[mmxreg]; + } + + static HChar* nameXMMReg ( Int xmmreg ) + { + static HChar* xmm_names[16] + = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; + if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); + return xmm_names[xmmreg]; + } + + static HChar* nameMMXGran ( Int gran ) + { + switch (gran) { + case 0: return "b"; + case 1: return "w"; + case 2: return "d"; + case 3: return "q"; + default: vpanic("nameMMXGran(amd64,guest)"); + } + } + + static HChar nameISize ( Int size ) + { + switch (size) { + case 8: return 'q'; + case 4: return 'l'; + case 2: return 'w'; + case 1: return 'b'; + default: vpanic("nameISize(amd64)"); + } + } + + + /*------------------------------------------------------------*/ + /*--- JMP helpers ---*/ + /*------------------------------------------------------------*/ + + static void jmp_lit( IRJumpKind kind, Addr64 d64 ) + { + irsb->next = mkU64(d64); + irsb->jumpkind = kind; + } + + static void jmp_treg( IRJumpKind kind, IRTemp t ) + { + irsb->next = mkexpr(t); + irsb->jumpkind = kind; + } + + static + void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) + { + Bool invert; + AMD64Condcode condPos; + condPos = positiveIse_AMD64Condcode ( cond, &invert ); + if (invert) { + stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), + Ijk_Boring, + IRConst_U64(d64_false) ) ); + irsb->next = mkU64(d64_true); + irsb->jumpkind = Ijk_Boring; + } else { + stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), + Ijk_Boring, + IRConst_U64(d64_true) ) ); + irsb->next = mkU64(d64_false); + irsb->jumpkind = Ijk_Boring; + } + } + + /* Let new_rsp be the %rsp value after a call/return. Let nia be the + guest address of the next instruction to be executed. + + This function generates an AbiHint to say that -128(%rsp) + .. -1(%rsp) should now be regarded as uninitialised. + */ + static + void make_redzone_AbiHint ( VexAbiInfo* vbi, + IRTemp new_rsp, IRTemp nia, HChar* who ) + { + Int szB = vbi->guest_stack_redzone_size; + vassert(szB >= 0); + + /* A bit of a kludge. Currently the only AbI we've guested AMD64 + for is ELF. So just check it's the expected 128 value + (paranoia). */ + vassert(szB == 128); + + if (0) vex_printf("AbiHint: %s\n", who); + vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); + vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); + if (szB > 0) + stmt( IRStmt_AbiHint( + binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), + szB, + mkexpr(nia) + )); + } + + + /*------------------------------------------------------------*/ + /*--- Disassembling addressing modes ---*/ + /*------------------------------------------------------------*/ + + static + HChar* segRegTxt ( Prefix pfx ) + { + if (pfx & PFX_CS) return "%cs:"; + if (pfx & PFX_DS) return "%ds:"; + if (pfx & PFX_ES) return "%es:"; + if (pfx & PFX_FS) return "%fs:"; + if (pfx & PFX_GS) return "%gs:"; + if (pfx & PFX_SS) return "%ss:"; + return ""; /* no override */ + } + + + /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a + linear address by adding any required segment override as indicated + by sorb, and also dealing with any address size override + present. */ + static + IRExpr* handleAddrOverrides ( VexAbiInfo* vbi, + Prefix pfx, IRExpr* virtual ) + { + /* --- segment overrides --- */ + if (pfx & PFX_FS) { + if (vbi->guest_amd64_assume_fs_is_zero) { + /* Note that this is a linux-kernel specific hack that relies + on the assumption that %fs is always zero. */ + /* return virtual + guest_FS_ZERO. */ + virtual = binop(Iop_Add64, virtual, + IRExpr_Get(OFFB_FS_ZERO, Ity_I64)); + } else { + unimplemented("amd64 %fs segment override"); + } + } + + if (pfx & PFX_GS) { + if (vbi->guest_amd64_assume_gs_is_0x60) { + /* Note that this is a darwin-kernel specific hack that relies + on the assumption that %gs is always 0x60. */ + /* return virtual + guest_GS_0x60. */ + virtual = binop(Iop_Add64, virtual, + IRExpr_Get(OFFB_GS_0x60, Ity_I64)); + } else { + unimplemented("amd64 %gs segment override"); + } + } + + /* cs, ds, es and ss are simply ignored in 64-bit mode. */ + + /* --- address size override --- */ + if (haveASO(pfx)) + virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); + + return virtual; + } + + //.. { + //.. Int sreg; + //.. IRType hWordTy; + //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; + //.. + //.. if (sorb == 0) + //.. /* the common case - no override */ + //.. return virtual; + //.. + //.. switch (sorb) { + //.. case 0x3E: sreg = R_DS; break; + //.. case 0x26: sreg = R_ES; break; + //.. case 0x64: sreg = R_FS; break; + //.. case 0x65: sreg = R_GS; break; + //.. default: vpanic("handleAddrOverrides(x86,guest)"); + //.. } + //.. + //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; + //.. + //.. seg_selector = newTemp(Ity_I32); + //.. ldt_ptr = newTemp(hWordTy); + //.. gdt_ptr = newTemp(hWordTy); + //.. r64 = newTemp(Ity_I64); + //.. + //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); + //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); + //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); + //.. + //.. /* + //.. Call this to do the translation and limit checks: + //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, + //.. UInt seg_selector, UInt virtual_addr ) + //.. */ + //.. assign( + //.. r64, + //.. mkIRExprCCall( + //.. Ity_I64, + //.. 0/*regparms*/, + //.. "x86g_use_seg_selector", + //.. &x86g_use_seg_selector, + //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), + //.. mkexpr(seg_selector), virtual) + //.. ) + //.. ); + //.. + //.. /* If the high 32 of the result are non-zero, there was a + //.. failure in address translation. In which case, make a + //.. quick exit. + //.. */ + //.. stmt( + //.. IRStmt_Exit( + //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), + //.. Ijk_MapFail, + //.. IRConst_U32( guest_eip_curr_instr ) + //.. ) + //.. ); + //.. + //.. /* otherwise, here's the translated result. */ + //.. return unop(Iop_64to32, mkexpr(r64)); + //.. } + + + /* Generate IR to calculate an address indicated by a ModRM and + following SIB bytes. The expression, and the number of bytes in + the address mode, are returned (the latter in *len). Note that + this fn should not be called if the R/M part of the address denotes + a register instead of memory. If print_codegen is true, text of + the addressing mode is placed in buf. + + The computed address is stored in a new tempreg, and the + identity of the tempreg is returned. + + extra_bytes holds the number of bytes after the amode, as supplied + by the caller. This is needed to make sense of %rip-relative + addresses. Note that the value that *len is set to is only the + length of the amode itself and does not include the value supplied + in extra_bytes. + */ + + static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) + { + IRTemp tmp = newTemp(Ity_I64); + assign( tmp, addr64 ); + return tmp; + } + + static + IRTemp disAMode ( /*OUT*/Int* len, + VexAbiInfo* vbi, Prefix pfx, Long delta, + /*OUT*/HChar* buf, Int extra_bytes ) + { + UChar mod_reg_rm = getUChar(delta); + delta++; + + buf[0] = (UChar)0; + vassert(extra_bytes >= 0 && extra_bytes < 10); + + /* squeeze out the reg field from mod_reg_rm, since a 256-entry + jump table seems a bit excessive. + */ + mod_reg_rm &= 0xC7; /* is now XX000YYY */ + mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); + /* is now XX0XXYYY */ + mod_reg_rm &= 0x1F; /* is now 000XXYYY */ + switch (mod_reg_rm) { + + /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). + REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). + */ + case 0x00: case 0x01: case 0x02: case 0x03: + /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: + { UChar rm = toUChar(mod_reg_rm & 7); + DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); + *len = 1; + return disAMode_copy2tmp( + handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); + } + + /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) + REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) + */ + case 0x08: case 0x09: case 0x0A: case 0x0B: + /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: + { UChar rm = toUChar(mod_reg_rm & 7); + Long d = getSDisp8(delta); + if (d == 0) { + DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); + } else { + DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); + } + *len = 2; + return disAMode_copy2tmp( + handleAddrOverrides(vbi, pfx, + binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); + } + + /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) + REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) + */ + case 0x10: case 0x11: case 0x12: case 0x13: + /* ! 14 */ case 0x15: case 0x16: case 0x17: + { UChar rm = toUChar(mod_reg_rm & 7); + Long d = getSDisp32(delta); + DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); + *len = 5; + return disAMode_copy2tmp( + handleAddrOverrides(vbi, pfx, + binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); + } + + /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ + /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ + case 0x18: case 0x19: case 0x1A: case 0x1B: + case 0x1C: case 0x1D: case 0x1E: case 0x1F: + vpanic("disAMode(amd64): not an addr!"); + + /* RIP + disp32. This assumes that guest_RIP_curr_instr is set + correctly at the start of handling each instruction. */ + case 0x05: + { Long d = getSDisp32(delta); + *len = 5; + DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); + /* We need to know the next instruction's start address. + Try and figure out what it is, record the guess, and ask + the top-level driver logic (bbToIR_AMD64) to check we + guessed right, after the instruction is completely + decoded. */ + guest_RIP_next_mustcheck = True; + guest_RIP_next_assumed = guest_RIP_bbstart + + delta+4 + extra_bytes; + return disAMode_copy2tmp( + handleAddrOverrides(vbi, pfx, + binop(Iop_Add64, mkU64(guest_RIP_next_assumed), + mkU64(d)))); + } + + case 0x04: { + /* SIB, with no displacement. Special cases: + -- %rsp cannot act as an index value. + If index_r indicates %rsp, zero is used for the index. + -- when mod is zero and base indicates RBP or R13, base is + instead a 32-bit sign-extended literal. + It's all madness, I tell you. Extract %index, %base and + scale from the SIB byte. The value denoted is then: + | %index == %RSP && (%base == %RBP || %base == %R13) + = d32 following SIB byte + | %index == %RSP && !(%base == %RBP || %base == %R13) + = %base + | %index != %RSP && (%base == %RBP || %base == %R13) + = d32 following SIB byte + (%index << scale) + | %index != %RSP && !(%base == %RBP || %base == %R13) + = %base + (%index << scale) + */ + UChar sib = getUChar(delta); + UChar scale = toUChar((sib >> 6) & 3); + UChar index_r = toUChar((sib >> 3) & 7); + UChar base_r = toUChar(sib & 7); + /* correct since #(R13) == 8 + #(RBP) */ + Bool base_is_BPor13 = toBool(base_r == R_RBP); + Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); + delta++; + + if ((!index_is_SP) && (!base_is_BPor13)) { + if (scale == 0) { + DIS(buf, "%s(%s,%s)", segRegTxt(pfx), + nameIRegRexB(8,pfx,base_r), + nameIReg64rexX(pfx,index_r)); + } else { + DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), + nameIRegRexB(8,pfx,base_r), + nameIReg64rexX(pfx,index_r), 1<> 6) & 3); + UChar index_r = toUChar((sib >> 3) & 7); + UChar base_r = toUChar(sib & 7); + Long d = getSDisp8(delta+1); + + if (index_r == R_RSP && 0==getRexX(pfx)) { + DIS(buf, "%s%lld(%s)", segRegTxt(pfx), + d, nameIRegRexB(8,pfx,base_r)); + *len = 3; + return disAMode_copy2tmp( + handleAddrOverrides(vbi, pfx, + binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); + } else { + if (scale == 0) { + DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, + nameIRegRexB(8,pfx,base_r), + nameIReg64rexX(pfx,index_r)); + } else { + DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, + nameIRegRexB(8,pfx,base_r), + nameIReg64rexX(pfx,index_r), 1<> 6) & 3); + UChar index_r = toUChar((sib >> 3) & 7); + UChar base_r = toUChar(sib & 7); + Long d = getSDisp32(delta+1); + + if (index_r == R_RSP && 0==getRexX(pfx)) { + DIS(buf, "%s%lld(%s)", segRegTxt(pfx), + d, nameIRegRexB(8,pfx,base_r)); + *len = 6; + return disAMode_copy2tmp( + handleAddrOverrides(vbi, pfx, + binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); + } else { + if (scale == 0) { + DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, + nameIRegRexB(8,pfx,base_r), + nameIReg64rexX(pfx,index_r)); + } else { + DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, + nameIRegRexB(8,pfx,base_r), + nameIReg64rexX(pfx,index_r), 1<> 3)); + /* is now XX0XXYYY */ + mod_reg_rm &= 0x1F; /* is now 000XXYYY */ + switch (mod_reg_rm) { + + /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). + REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). + */ + case 0x00: case 0x01: case 0x02: case 0x03: + /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: + return 1; + + /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) + REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) + */ + case 0x08: case 0x09: case 0x0A: case 0x0B: + /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: + return 2; + + /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) + REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) + */ + case 0x10: case 0x11: case 0x12: case 0x13: + /* ! 14 */ case 0x15: case 0x16: case 0x17: + return 5; + + /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ + /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ + /* Not an address, but still handled. */ + case 0x18: case 0x19: case 0x1A: case 0x1B: + case 0x1C: case 0x1D: case 0x1E: case 0x1F: + return 1; + + /* RIP + disp32. */ + case 0x05: + return 5; + + case 0x04: { + /* SIB, with no displacement. */ + UChar sib = getUChar(delta); + UChar base_r = toUChar(sib & 7); + /* correct since #(R13) == 8 + #(RBP) */ + Bool base_is_BPor13 = toBool(base_r == R_RBP); + + if (base_is_BPor13) { + return 6; + } else { + return 2; + } + } + + /* SIB, with 8-bit displacement. */ + case 0x0C: + return 3; + + /* SIB, with 32-bit displacement. */ + case 0x14: + return 6; + + default: + vpanic("lengthAMode(amd64)"); + return 0; /*notreached*/ + } + } + + + /*------------------------------------------------------------*/ + /*--- Disassembling common idioms ---*/ + /*------------------------------------------------------------*/ + + /* Handle binary integer instructions of the form + op E, G meaning + op reg-or-mem, reg + Is passed the a ptr to the modRM byte, the actual operation, and the + data size. Returns the address advanced completely over this + instruction. + + E(src) is reg-or-mem + G(dst) is reg. + + If E is reg, --> GET %G, tmp + OP %E, tmp + PUT tmp, %G + + If E is mem and OP is not reversible, + --> (getAddr E) -> tmpa + LD (tmpa), tmpa + GET %G, tmp2 + OP tmpa, tmp2 + PUT tmp2, %G + + If E is mem and OP is reversible + --> (getAddr E) -> tmpa + LD (tmpa), tmpa + OP %G, tmpa + PUT tmpa, %G + */ + static + ULong dis_op2_E_G ( VexAbiInfo* vbi, + Prefix pfx, + Bool addSubCarry, + IROp op8, + Bool keep, + Int size, + Long delta0, + HChar* t_amd64opc ) + { + HChar dis_buf[50]; + Int len; + IRType ty = szToITy(size); + IRTemp dst1 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst0 = newTemp(ty); + UChar rm = getUChar(delta0); + IRTemp addr = IRTemp_INVALID; + + /* addSubCarry == True indicates the intended operation is + add-with-carry or subtract-with-borrow. */ + if (addSubCarry) { + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); + vassert(keep); + } + + if (epartIsReg(rm)) { + /* Specially handle XOR reg,reg, because that doesn't really + depend on reg, and doing the obvious thing potentially + generates a spurious value check failure due to the bogus + dependency. */ + if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) + && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { + if (False && op8 == Iop_Sub8) + vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); + putIRegG(size,pfx,rm, mkU(ty,0)); + } + + assign( dst0, getIRegG(size,pfx,rm) ); + assign( src, getIRegE(size,pfx,rm) ); + + if (addSubCarry && op8 == Iop_Add8) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIRegG(size, pfx, rm, mkexpr(dst1)); + } else + if (addSubCarry && op8 == Iop_Sub8) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIRegG(size, pfx, rm, mkexpr(dst1)); + } else { + assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + if (keep) + putIRegG(size, pfx, rm, mkexpr(dst1)); + } + + DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), + nameIRegE(size,pfx,rm), + nameIRegG(size,pfx,rm)); + return 1+delta0; + } else { + /* E refers to memory */ + addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + assign( dst0, getIRegG(size,pfx,rm) ); + assign( src, loadLE(szToITy(size), mkexpr(addr)) ); + + if (addSubCarry && op8 == Iop_Add8) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIRegG(size, pfx, rm, mkexpr(dst1)); + } else + if (addSubCarry && op8 == Iop_Sub8) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIRegG(size, pfx, rm, mkexpr(dst1)); + } else { + assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + if (keep) + putIRegG(size, pfx, rm, mkexpr(dst1)); + } + + DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), + dis_buf, nameIRegG(size, pfx, rm)); + return len+delta0; + } + } + + + + /* Handle binary integer instructions of the form + op G, E meaning + op reg, reg-or-mem + Is passed the a ptr to the modRM byte, the actual operation, and the + data size. Returns the address advanced completely over this + instruction. + + G(src) is reg. + E(dst) is reg-or-mem + + If E is reg, --> GET %E, tmp + OP %G, tmp + PUT tmp, %E + + If E is mem, --> (getAddr E) -> tmpa + LD (tmpa), tmpv + OP %G, tmpv + ST tmpv, (tmpa) + */ + static + ULong dis_op2_G_E ( VexAbiInfo* vbi, + Prefix pfx, + Bool addSubCarry, + IROp op8, + Bool keep, + Int size, + Long delta0, + HChar* t_amd64opc ) + { + HChar dis_buf[50]; + Int len; + IRType ty = szToITy(size); + IRTemp dst1 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst0 = newTemp(ty); + UChar rm = getUChar(delta0); + IRTemp addr = IRTemp_INVALID; + + /* addSubCarry == True indicates the intended operation is + add-with-carry or subtract-with-borrow. */ + if (addSubCarry) { + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); + vassert(keep); + } + + if (epartIsReg(rm)) { + /* Specially handle XOR reg,reg, because that doesn't really + depend on reg, and doing the obvious thing potentially + generates a spurious value check failure due to the bogus + dependency. Ditto SBB reg,reg. */ + if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) + && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { + putIRegE(size,pfx,rm, mkU(ty,0)); + } + + assign(dst0, getIRegE(size,pfx,rm)); + assign(src, getIRegG(size,pfx,rm)); + + if (addSubCarry && op8 == Iop_Add8) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIRegE(size, pfx, rm, mkexpr(dst1)); + } else + if (addSubCarry && op8 == Iop_Sub8) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIRegE(size, pfx, rm, mkexpr(dst1)); + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + if (keep) + putIRegE(size, pfx, rm, mkexpr(dst1)); + } + + DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), + nameIRegG(size,pfx,rm), + nameIRegE(size,pfx,rm)); + return 1+delta0; + } + + /* E refers to memory */ + { + addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + assign(dst0, loadLE(ty,mkexpr(addr))); + assign(src, getIRegG(size,pfx,rm)); + + if (addSubCarry && op8 == Iop_Add8) { + if (pfx & PFX_LOCK) { + /* cas-style store */ + helper_ADC( size, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); + } else { + /* normal store */ + helper_ADC( size, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else + if (addSubCarry && op8 == Iop_Sub8) { + if (pfx & PFX_LOCK) { + /* cas-style store */ + helper_SBB( size, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); + } else { + /* normal store */ + helper_SBB( size, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (keep) { + if (pfx & PFX_LOCK) { + if (0) vex_printf("locked case\n" ); + casLE( mkexpr(addr), + mkexpr(dst0)/*expval*/, + mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); + } else { + if (0) vex_printf("nonlocked case\n"); + storeLE(mkexpr(addr), mkexpr(dst1)); + } + } + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + } + + DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), + nameIRegG(size,pfx,rm), dis_buf); + return len+delta0; + } + } + + + /* Handle move instructions of the form + mov E, G meaning + mov reg-or-mem, reg + Is passed the a ptr to the modRM byte, and the data size. Returns + the address advanced completely over this instruction. + + E(src) is reg-or-mem + G(dst) is reg. + + If E is reg, --> GET %E, tmpv + PUT tmpv, %G + + If E is mem --> (getAddr E) -> tmpa + LD (tmpa), tmpb + PUT tmpb, %G + */ + static + ULong dis_mov_E_G ( VexAbiInfo* vbi, + Prefix pfx, + Int size, + Long delta0 ) + { + Int len; + UChar rm = getUChar(delta0); + HChar dis_buf[50]; + + if (epartIsReg(rm)) { + putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); + DIP("mov%c %s,%s\n", nameISize(size), + nameIRegE(size,pfx,rm), + nameIRegG(size,pfx,rm)); + return 1+delta0; + } + + /* E refers to memory */ + { + IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); + DIP("mov%c %s,%s\n", nameISize(size), + dis_buf, + nameIRegG(size,pfx,rm)); + return delta0+len; + } + } + + + /* Handle move instructions of the form + mov G, E meaning + mov reg, reg-or-mem + Is passed the a ptr to the modRM byte, and the data size. Returns + the address advanced completely over this instruction. + + G(src) is reg. + E(dst) is reg-or-mem + + If E is reg, --> GET %G, tmp + PUT tmp, %E + + If E is mem, --> (getAddr E) -> tmpa + GET %G, tmpv + ST tmpv, (tmpa) + */ + static + ULong dis_mov_G_E ( VexAbiInfo* vbi, + Prefix pfx, + Int size, + Long delta0 ) + { + Int len; + UChar rm = getUChar(delta0); + HChar dis_buf[50]; + + if (epartIsReg(rm)) { + putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); + DIP("mov%c %s,%s\n", nameISize(size), + nameIRegG(size,pfx,rm), + nameIRegE(size,pfx,rm)); + return 1+delta0; + } + + /* E refers to memory */ + { + IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); + DIP("mov%c %s,%s\n", nameISize(size), + nameIRegG(size,pfx,rm), + dis_buf); + return len+delta0; + } + } + + + /* op $immediate, AL/AX/EAX/RAX. */ + static + ULong dis_op_imm_A ( Int size, + Bool carrying, + IROp op8, + Bool keep, + Long delta, + HChar* t_amd64opc ) + { + Int size4 = imin(size,4); + IRType ty = szToITy(size); + IRTemp dst0 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst1 = newTemp(ty); + Long lit = getSDisp(size4,delta); + assign(dst0, getIRegRAX(size)); + assign(src, mkU(ty,lit & mkSizeMask(size))); + + if (isAddSub(op8) && !carrying) { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + setFlags_DEP1_DEP2(op8, dst0, src, ty); + } + else + if (isLogic(op8)) { + vassert(!carrying); + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + setFlags_DEP1(op8, dst1, ty); + } + else + if (op8 == Iop_Add8 && carrying) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } + else + if (op8 == Iop_Sub8 && carrying) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } + else + vpanic("dis_op_imm_A(amd64,guest)"); + + if (keep) + putIRegRAX(size, mkexpr(dst1)); + + DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), + lit, nameIRegRAX(size)); + return delta+size4; + } + + + /* Sign- and Zero-extending moves. */ + static + ULong dis_movx_E_G ( VexAbiInfo* vbi, + Prefix pfx, + Long delta, Int szs, Int szd, Bool sign_extend ) + { + UChar rm = getUChar(delta); + if (epartIsReg(rm)) { + putIRegG(szd, pfx, rm, + doScalarWidening( + szs,szd,sign_extend, + getIRegE(szs,pfx,rm))); + DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', + nameISize(szs), + nameISize(szd), + nameIRegE(szs,pfx,rm), + nameIRegG(szd,pfx,rm)); + return 1+delta; + } + + /* E refers to memory */ + { + Int len; + HChar dis_buf[50]; + IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); + putIRegG(szd, pfx, rm, + doScalarWidening( + szs,szd,sign_extend, + loadLE(szToITy(szs),mkexpr(addr)))); + DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', + nameISize(szs), + nameISize(szd), + dis_buf, + nameIRegG(szd,pfx,rm)); + return len+delta; + } + } + + + /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by + the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ + static + void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) + { + /* special-case the 64-bit case */ + if (sz == 8) { + IROp op = signed_divide ? Iop_DivModS128to64 + : Iop_DivModU128to64; + IRTemp src128 = newTemp(Ity_I128); + IRTemp dst128 = newTemp(Ity_I128); + assign( src128, binop(Iop_64HLto128, + getIReg64(R_RDX), + getIReg64(R_RAX)) ); + assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); + putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); + putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); + } else { + IROp op = signed_divide ? Iop_DivModS64to32 + : Iop_DivModU64to32; + IRTemp src64 = newTemp(Ity_I64); + IRTemp dst64 = newTemp(Ity_I64); + switch (sz) { + case 4: + assign( src64, + binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); + assign( dst64, + binop(op, mkexpr(src64), mkexpr(t)) ); + putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); + putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); + break; + case 2: { + IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; + IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; + assign( src64, unop(widen3264, + binop(Iop_16HLto32, + getIRegRDX(2), + getIRegRAX(2))) ); + assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); + putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); + putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); + break; + } + case 1: { + IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; + IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; + IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; + assign( src64, unop(widen3264, + unop(widen1632, getIRegRAX(2))) ); + assign( dst64, + binop(op, mkexpr(src64), + unop(widen1632, unop(widen816, mkexpr(t)))) ); + putIRegRAX( 1, unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64to32,mkexpr(dst64)))) ); + putIRegAH( unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64HIto32,mkexpr(dst64)))) ); + break; + } + default: + vpanic("codegen_div(amd64)"); + } + } + } + + static + ULong dis_Grp1 ( VexAbiInfo* vbi, + Prefix pfx, + Long delta, UChar modrm, + Int am_sz, Int d_sz, Int sz, Long d64 ) + { + Int len; + HChar dis_buf[50]; + IRType ty = szToITy(sz); + IRTemp dst1 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst0 = newTemp(ty); + IRTemp addr = IRTemp_INVALID; + IROp op8 = Iop_INVALID; + ULong mask = mkSizeMask(sz); + + switch (gregLO3ofRM(modrm)) { + case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; + case 2: break; // ADC + case 3: break; // SBB + case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; + case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; + /*NOTREACHED*/ + default: vpanic("dis_Grp1(amd64): unhandled case"); + } + + if (epartIsReg(modrm)) { + vassert(am_sz == 1); + + assign(dst0, getIRegE(sz,pfx,modrm)); + assign(src, mkU(ty,d64 & mask)); + + if (gregLO3ofRM(modrm) == 2 /* ADC */) { + helper_ADC( sz, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } else + if (gregLO3ofRM(modrm) == 3 /* SBB */) { + helper_SBB( sz, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + } + + if (gregLO3ofRM(modrm) < 7) + putIRegE(sz, pfx, modrm, mkexpr(dst1)); + + delta += (am_sz + d_sz); + DIP("%s%c $%lld, %s\n", + nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, + nameIRegE(sz,pfx,modrm)); + } else { + addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); + + assign(dst0, loadLE(ty,mkexpr(addr))); + assign(src, mkU(ty,d64 & mask)); + + if (gregLO3ofRM(modrm) == 2 /* ADC */) { + if (pfx & PFX_LOCK) { + /* cas-style store */ + helper_ADC( sz, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); + } else { + /* normal store */ + helper_ADC( sz, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else + if (gregLO3ofRM(modrm) == 3 /* SBB */) { + if (pfx & PFX_LOCK) { + /* cas-style store */ + helper_SBB( sz, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); + } else { + /* normal store */ + helper_SBB( sz, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (gregLO3ofRM(modrm) < 7) { + if (pfx & PFX_LOCK) { + casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, + mkexpr(dst1)/*newVal*/, + guest_RIP_curr_instr ); + } else { + storeLE(mkexpr(addr), mkexpr(dst1)); + } + } + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + } + + delta += (len+d_sz); + DIP("%s%c $%lld, %s\n", + nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), + d64, dis_buf); + } + return delta; + } + + + /* Group 2 extended opcodes. shift_expr must be an 8-bit typed + expression. */ + + static + ULong dis_Grp2 ( VexAbiInfo* vbi, + Prefix pfx, + Long delta, UChar modrm, + Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, + HChar* shift_expr_txt, Bool* decode_OK ) + { + /* delta on entry points at the modrm byte. */ + HChar dis_buf[50]; + Int len; + Bool isShift, isRotate, isRotateC; + IRType ty = szToITy(sz); + IRTemp dst0 = newTemp(ty); + IRTemp dst1 = newTemp(ty); + IRTemp addr = IRTemp_INVALID; + + *decode_OK = True; + + vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); + + /* Put value to shift/rotate in dst0. */ + if (epartIsReg(modrm)) { + assign(dst0, getIRegE(sz, pfx, modrm)); + delta += (am_sz + d_sz); + } else { + addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); + assign(dst0, loadLE(ty,mkexpr(addr))); + delta += len + d_sz; + } + + isShift = False; + switch (gregLO3ofRM(modrm)) { case 4: case 5: case 7: isShift = True; } + + isRotate = False; + switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } + + isRotateC = False; + switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } + + if (gregLO3ofRM(modrm) == 6) { + *decode_OK = False; + return delta; + } + + if (!isShift && !isRotate && !isRotateC) { + /*NOTREACHED*/ + vpanic("dis_Grp2(Reg): unhandled case(amd64)"); + } + + if (isRotateC) { + /* Call a helper; this insn is so ridiculous it does not deserve + better. One problem is, the helper has to calculate both the + new value and the new flags. This is more than 64 bits, and + there is no way to return more than 64 bits from the helper. + Hence the crude and obvious solution is to call it twice, + using the sign of the sz field to indicate whether it is the + value or rflags result we want. + */ + Bool left = toBool(gregLO3ofRM(modrm) == 2); + IRExpr** argsVALUE; + IRExpr** argsRFLAGS; + + IRTemp new_value = newTemp(Ity_I64); + IRTemp new_rflags = newTemp(Ity_I64); + IRTemp old_rflags = newTemp(Ity_I64); + + assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); + + argsVALUE + = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ + widenUto64(shift_expr), /* rotate amount */ + mkexpr(old_rflags), + mkU64(sz) ); + assign( new_value, + mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", + left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, + argsVALUE + ) + ); + + argsRFLAGS + = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ + widenUto64(shift_expr), /* rotate amount */ + mkexpr(old_rflags), + mkU64(-sz) ); + assign( new_rflags, + mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", + left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, + argsRFLAGS + ) + ); + + assign( dst1, narrowTo(ty, mkexpr(new_value)) ); + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); + } + + else + if (isShift) { + + IRTemp pre64 = newTemp(Ity_I64); + IRTemp res64 = newTemp(Ity_I64); + IRTemp res64ss = newTemp(Ity_I64); + IRTemp shift_amt = newTemp(Ity_I8); + UChar mask = toUChar(sz==8 ? 63 : 31); + IROp op64; + + switch (gregLO3ofRM(modrm)) { + case 4: op64 = Iop_Shl64; break; + case 5: op64 = Iop_Shr64; break; + case 7: op64 = Iop_Sar64; break; + /*NOTREACHED*/ + default: vpanic("dis_Grp2:shift"); break; + } + + /* Widen the value to be shifted to 64 bits, do the shift, and + narrow back down. This seems surprisingly long-winded, but + unfortunately the AMD semantics requires that 8/16/32-bit + shifts give defined results for shift values all the way up + to 32, and this seems the simplest way to do it. It has the + advantage that the only IR level shifts generated are of 64 + bit values, and the shift amount is guaranteed to be in the + range 0 .. 63, thereby observing the IR semantics requiring + all shift values to be in the range 0 .. 2^word_size-1. + + Therefore the shift amount is masked with 63 for 64-bit shifts + and 31 for all others. + */ + /* shift_amt = shift_expr & MASK, regardless of operation size */ + assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); + + /* suitably widen the value to be shifted to 64 bits. */ + assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) + : widenUto64(mkexpr(dst0)) ); + + /* res64 = pre64 `shift` shift_amt */ + assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); + + /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ + assign( res64ss, + binop(op64, + mkexpr(pre64), + binop(Iop_And8, + binop(Iop_Sub8, + mkexpr(shift_amt), mkU8(1)), + mkU8(mask))) ); + + /* Build the flags thunk. */ + setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); + + /* Narrow the result back down. */ + assign( dst1, narrowTo(ty, mkexpr(res64)) ); + + } /* if (isShift) */ + + else + if (isRotate) { + Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 + : (ty==Ity_I32 ? 2 : 3)); + Bool left = toBool(gregLO3ofRM(modrm) == 0); + IRTemp rot_amt = newTemp(Ity_I8); + IRTemp rot_amt64 = newTemp(Ity_I8); + IRTemp oldFlags = newTemp(Ity_I64); + UChar mask = toUChar(sz==8 ? 63 : 31); + + /* rot_amt = shift_expr & mask */ + /* By masking the rotate amount thusly, the IR-level Shl/Shr + expressions never shift beyond the word size and thus remain + well defined. */ + assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); + + if (ty == Ity_I64) + assign(rot_amt, mkexpr(rot_amt64)); + else + assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); + + if (left) { + + /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ + assign(dst1, + binop( mkSizedOp(ty,Iop_Or8), + binop( mkSizedOp(ty,Iop_Shl8), + mkexpr(dst0), + mkexpr(rot_amt) + ), + binop( mkSizedOp(ty,Iop_Shr8), + mkexpr(dst0), + binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) + ) + ) + ); + ccOp += AMD64G_CC_OP_ROLB; + + } else { /* right */ + + /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ + assign(dst1, + binop( mkSizedOp(ty,Iop_Or8), + binop( mkSizedOp(ty,Iop_Shr8), + mkexpr(dst0), + mkexpr(rot_amt) + ), + binop( mkSizedOp(ty,Iop_Shl8), + mkexpr(dst0), + binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) + ) + ) + ); + ccOp += AMD64G_CC_OP_RORB; + + } + + /* dst1 now holds the rotated value. Build flag thunk. We + need the resulting value for this, and the previous flags. + Except don't set it if the rotate count is zero. */ + + assign(oldFlags, mk_amd64g_calculate_rflags_all()); + + /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ + stmt( IRStmt_Put( OFFB_CC_OP, + IRExpr_Mux0X( mkexpr(rot_amt64), + IRExpr_Get(OFFB_CC_OP,Ity_I64), + mkU64(ccOp))) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, + IRExpr_Mux0X( mkexpr(rot_amt64), + IRExpr_Get(OFFB_CC_DEP1,Ity_I64), + widenUto64(mkexpr(dst1)))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, + IRExpr_Mux0X( mkexpr(rot_amt64), + IRExpr_Get(OFFB_CC_DEP2,Ity_I64), + mkU64(0))) ); + stmt( IRStmt_Put( OFFB_CC_NDEP, + IRExpr_Mux0X( mkexpr(rot_amt64), + IRExpr_Get(OFFB_CC_NDEP,Ity_I64), + mkexpr(oldFlags))) ); + } /* if (isRotate) */ + + /* Save result, and finish up. */ + if (epartIsReg(modrm)) { + putIRegE(sz, pfx, modrm, mkexpr(dst1)); + if (vex_traceflags & VEX_TRACE_FE) { + vex_printf("%s%c ", + nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); + if (shift_expr_txt) + vex_printf("%s", shift_expr_txt); + else + ppIRExpr(shift_expr); + vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); + } + } else { + storeLE(mkexpr(addr), mkexpr(dst1)); + if (vex_traceflags & VEX_TRACE_FE) { + vex_printf("%s%c ", + nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); + if (shift_expr_txt) + vex_printf("%s", shift_expr_txt); + else + ppIRExpr(shift_expr); + vex_printf(", %s\n", dis_buf); + } + } + return delta; + } + + + /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ + static + ULong dis_Grp8_Imm ( VexAbiInfo* vbi, + Prefix pfx, + Long delta, UChar modrm, + Int am_sz, Int sz, ULong src_val, + Bool* decode_OK ) + { + /* src_val denotes a d8. + And delta on entry points at the modrm byte. */ + + IRType ty = szToITy(sz); + IRTemp t2 = newTemp(Ity_I64); + IRTemp t2m = newTemp(Ity_I64); + IRTemp t_addr = IRTemp_INVALID; + HChar dis_buf[50]; + ULong mask; + + /* we're optimists :-) */ + *decode_OK = True; + + /* Limit src_val -- the bit offset -- to something within a word. + The Intel docs say that literal offsets larger than a word are + masked in this way. */ + switch (sz) { + case 2: src_val &= 15; break; + case 4: src_val &= 31; break; + case 8: src_val &= 63; break; + default: *decode_OK = False; return delta; + } + + /* Invent a mask suitable for the operation. */ + switch (gregLO3ofRM(modrm)) { + case 4: /* BT */ mask = 0; break; + case 5: /* BTS */ mask = 1ULL << src_val; break; + case 6: /* BTR */ mask = ~(1ULL << src_val); break; + case 7: /* BTC */ mask = 1ULL << src_val; break; + /* If this needs to be extended, probably simplest to make a + new function to handle the other cases (0 .. 3). The + Intel docs do however not indicate any use for 0 .. 3, so + we don't expect this to happen. */ + default: *decode_OK = False; return delta; + } + + /* Fetch the value to be tested and modified into t2, which is + 64-bits wide regardless of sz. */ + if (epartIsReg(modrm)) { + vassert(am_sz == 1); + assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); + delta += (am_sz + 1); + DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), + nameISize(sz), + src_val, nameIRegE(sz,pfx,modrm)); + } else { + Int len; + t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); + delta += (len+1); + assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); + DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), + nameISize(sz), + src_val, dis_buf); + } + + /* Compute the new value into t2m, if non-BT. */ + switch (gregLO3ofRM(modrm)) { + case 4: /* BT */ + break; + case 5: /* BTS */ + assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); + break; + case 6: /* BTR */ + assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); + break; + case 7: /* BTC */ + assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); + break; + default: + /*NOTREACHED*/ /*the previous switch guards this*/ + vassert(0); + } + + /* Write the result back, if non-BT. */ + if (gregLO3ofRM(modrm) != 4 /* BT */) { + if (epartIsReg(modrm)) { + putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); + } else { + if (pfx & PFX_LOCK) { + casLE( mkexpr(t_addr), + narrowTo(ty, mkexpr(t2))/*expd*/, + narrowTo(ty, mkexpr(t2m))/*new*/, + guest_RIP_curr_instr ); + } else { + storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); + } + } + } + + /* Copy relevant bit from t2 into the carry flag. */ + /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop(Iop_And64, + binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), + mkU64(1)) + )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); + + return delta; + } + + + /* Signed/unsigned widening multiply. Generate IR to multiply the + value in RAX/EAX/AX/AL by the given IRTemp, and park the result in + RDX:RAX/EDX:EAX/DX:AX/AX. + */ + static void codegen_mulL_A_D ( Int sz, Bool syned, + IRTemp tmp, HChar* tmp_txt ) + { + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + + assign( t1, getIRegRAX(sz) ); + + switch (ty) { + case Ity_I64: { + IRTemp res128 = newTemp(Ity_I128); + IRTemp resHi = newTemp(Ity_I64); + IRTemp resLo = newTemp(Ity_I64); + IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; + UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; + setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); + assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); + assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); + assign( resLo, unop(Iop_128to64,mkexpr(res128))); + putIReg64(R_RDX, mkexpr(resHi)); + putIReg64(R_RAX, mkexpr(resLo)); + break; + } + case Ity_I32: { + IRTemp res64 = newTemp(Ity_I64); + IRTemp resHi = newTemp(Ity_I32); + IRTemp resLo = newTemp(Ity_I32); + IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; + UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; + setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); + assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); + assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); + assign( resLo, unop(Iop_64to32,mkexpr(res64))); + putIRegRDX(4, mkexpr(resHi)); + putIRegRAX(4, mkexpr(resLo)); + break; + } + case Ity_I16: { + IRTemp res32 = newTemp(Ity_I32); + IRTemp resHi = newTemp(Ity_I16); + IRTemp resLo = newTemp(Ity_I16); + IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; + UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; + setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); + assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); + assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); + assign( resLo, unop(Iop_32to16,mkexpr(res32))); + putIRegRDX(2, mkexpr(resHi)); + putIRegRAX(2, mkexpr(resLo)); + break; + } + case Ity_I8: { + IRTemp res16 = newTemp(Ity_I16); + IRTemp resHi = newTemp(Ity_I8); + IRTemp resLo = newTemp(Ity_I8); + IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; + UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; + setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); + assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); + assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); + assign( resLo, unop(Iop_16to8,mkexpr(res16))); + putIRegRAX(2, mkexpr(res16)); + break; + } + default: + ppIRType(ty); + vpanic("codegen_mulL_A_D(amd64)"); + } + DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); + } + + + /* Group 3 extended opcodes. */ + static + ULong dis_Grp3 ( VexAbiInfo* vbi, + Prefix pfx, Int sz, Long delta, Bool* decode_OK ) + { + Long d64; + UChar modrm; + HChar dis_buf[50]; + Int len; + IRTemp addr; + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + IRTemp dst1, src, dst0; + *decode_OK = True; + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + switch (gregLO3ofRM(modrm)) { + case 0: { /* TEST */ + delta++; + d64 = getSDisp(imin(4,sz), delta); + delta += imin(4,sz); + dst1 = newTemp(ty); + assign(dst1, binop(mkSizedOp(ty,Iop_And8), + getIRegE(sz,pfx,modrm), + mkU(ty, d64 & mkSizeMask(sz)))); + setFlags_DEP1( Iop_And8, dst1, ty ); + DIP("test%c $%lld, %s\n", + nameISize(sz), d64, + nameIRegE(sz, pfx, modrm)); + break; + } + case 1: + *decode_OK = False; + return delta; + case 2: /* NOT */ + delta++; + putIRegE(sz, pfx, modrm, + unop(mkSizedOp(ty,Iop_Not8), + getIRegE(sz, pfx, modrm))); + DIP("not%c %s\n", nameISize(sz), + nameIRegE(sz, pfx, modrm)); + break; + case 3: /* NEG */ + delta++; + dst0 = newTemp(ty); + src = newTemp(ty); + dst1 = newTemp(ty); + assign(dst0, mkU(ty,0)); + assign(src, getIRegE(sz, pfx, modrm)); + assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), + mkexpr(src))); + setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); + putIRegE(sz, pfx, modrm, mkexpr(dst1)); + DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); + break; + case 4: /* MUL (unsigned widening) */ + delta++; + src = newTemp(ty); + assign(src, getIRegE(sz,pfx,modrm)); + codegen_mulL_A_D ( sz, False, src, + nameIRegE(sz,pfx,modrm) ); + break; + case 5: /* IMUL (signed widening) */ + delta++; + src = newTemp(ty); + assign(src, getIRegE(sz,pfx,modrm)); + codegen_mulL_A_D ( sz, True, src, + nameIRegE(sz,pfx,modrm) ); + break; + case 6: /* DIV */ + delta++; + assign( t1, getIRegE(sz, pfx, modrm) ); + codegen_div ( sz, t1, False ); + DIP("div%c %s\n", nameISize(sz), + nameIRegE(sz, pfx, modrm)); + break; + case 7: /* IDIV */ + delta++; + assign( t1, getIRegE(sz, pfx, modrm) ); + codegen_div ( sz, t1, True ); + DIP("idiv%c %s\n", nameISize(sz), + nameIRegE(sz, pfx, modrm)); + break; + default: + /*NOTREACHED*/ + vpanic("Grp3(amd64,R)"); + } + } else { + addr = disAMode ( &len, vbi, pfx, delta, dis_buf, + /* we have to inform disAMode of any immediate + bytes used */ + gregLO3ofRM(modrm)==0/*TEST*/ + ? imin(4,sz) + : 0 + ); + t1 = newTemp(ty); + delta += len; + assign(t1, loadLE(ty,mkexpr(addr))); + switch (gregLO3ofRM(modrm)) { + case 0: { /* TEST */ + d64 = getSDisp(imin(4,sz), delta); + delta += imin(4,sz); + dst1 = newTemp(ty); + assign(dst1, binop(mkSizedOp(ty,Iop_And8), + mkexpr(t1), + mkU(ty, d64 & mkSizeMask(sz)))); + setFlags_DEP1( Iop_And8, dst1, ty ); + DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); + break; + } + case 1: + *decode_OK = False; + return delta; + case 2: /* NOT */ + dst1 = newTemp(ty); + assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); + if (pfx & PFX_LOCK) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, + guest_RIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(dst1) ); + } + DIP("not%c %s\n", nameISize(sz), dis_buf); + break; + case 3: /* NEG */ + dst0 = newTemp(ty); + src = newTemp(ty); + dst1 = newTemp(ty); + assign(dst0, mkU(ty,0)); + assign(src, mkexpr(t1)); + assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), + mkexpr(src))); + if (pfx & PFX_LOCK) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, + guest_RIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(dst1) ); + } + setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); + DIP("neg%c %s\n", nameISize(sz), dis_buf); + break; + case 4: /* MUL (unsigned widening) */ + codegen_mulL_A_D ( sz, False, t1, dis_buf ); + break; + case 5: /* IMUL */ + codegen_mulL_A_D ( sz, True, t1, dis_buf ); + break; + case 6: /* DIV */ + codegen_div ( sz, t1, False ); + DIP("div%c %s\n", nameISize(sz), dis_buf); + break; + case 7: /* IDIV */ + codegen_div ( sz, t1, True ); + DIP("idiv%c %s\n", nameISize(sz), dis_buf); + break; + default: + /*NOTREACHED*/ + vpanic("Grp3(amd64,M)"); + } + } + return delta; + } + + + /* Group 4 extended opcodes. */ + static + ULong dis_Grp4 ( VexAbiInfo* vbi, + Prefix pfx, Long delta, Bool* decode_OK ) + { + Int alen; + UChar modrm; + HChar dis_buf[50]; + IRType ty = Ity_I8; + IRTemp t1 = newTemp(ty); + IRTemp t2 = newTemp(ty); + + *decode_OK = True; + + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + assign(t1, getIRegE(1, pfx, modrm)); + switch (gregLO3ofRM(modrm)) { + case 0: /* INC */ + assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); + putIRegE(1, pfx, modrm, mkexpr(t2)); + setFlags_INC_DEC( True, t2, ty ); + break; + case 1: /* DEC */ + assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); + putIRegE(1, pfx, modrm, mkexpr(t2)); + setFlags_INC_DEC( False, t2, ty ); + break; + default: + *decode_OK = False; + return delta; + } + delta++; + DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), + nameIRegE(1, pfx, modrm)); + } else { + IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( t1, loadLE(ty, mkexpr(addr)) ); + switch (gregLO3ofRM(modrm)) { + case 0: /* INC */ + assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); + if (pfx & PFX_LOCK) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, + guest_RIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(t2) ); + } + setFlags_INC_DEC( True, t2, ty ); + break; + case 1: /* DEC */ + assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); + if (pfx & PFX_LOCK) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, + guest_RIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(t2) ); + } + setFlags_INC_DEC( False, t2, ty ); + break; + default: + *decode_OK = False; + return delta; + } + delta += alen; + DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); + } + return delta; + } + + + /* Group 5 extended opcodes. */ + static + ULong dis_Grp5 ( VexAbiInfo* vbi, + Prefix pfx, Int sz, Long delta, + DisResult* dres, Bool* decode_OK ) + { + Int len; + UChar modrm; + HChar dis_buf[50]; + IRTemp addr = IRTemp_INVALID; + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + IRTemp t2 = IRTemp_INVALID; + IRTemp t3 = IRTemp_INVALID; + Bool showSz = True; + + *decode_OK = True; + + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + assign(t1, getIRegE(sz,pfx,modrm)); + switch (gregLO3ofRM(modrm)) { + case 0: /* INC */ + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(t1), mkU(ty,1))); + setFlags_INC_DEC( True, t2, ty ); + putIRegE(sz,pfx,modrm, mkexpr(t2)); + break; + case 1: /* DEC */ + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Sub8), + mkexpr(t1), mkU(ty,1))); + setFlags_INC_DEC( False, t2, ty ); + putIRegE(sz,pfx,modrm, mkexpr(t2)); + break; + case 2: /* call Ev */ + /* Ignore any sz value and operate as if sz==8. */ + if (!(sz == 4 || sz == 8)) goto unhandled; + sz = 8; + t3 = newTemp(Ity_I64); + assign(t3, getIRegE(sz,pfx,modrm)); + t2 = newTemp(Ity_I64); + assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); + putIReg64(R_RSP, mkexpr(t2)); + storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); + make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); + jmp_treg(Ijk_Call,t3); + dres->whatNext = Dis_StopHere; + showSz = False; + break; + case 4: /* jmp Ev */ + /* Ignore any sz value and operate as if sz==8. */ + if (!(sz == 4 || sz == 8)) goto unhandled; + sz = 8; + t3 = newTemp(Ity_I64); + assign(t3, getIRegE(sz,pfx,modrm)); + jmp_treg(Ijk_Boring,t3); + dres->whatNext = Dis_StopHere; + showSz = False; + break; + default: + *decode_OK = False; + return delta; + } + delta++; + DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), + showSz ? nameISize(sz) : ' ', + nameIRegE(sz, pfx, modrm)); + } else { + addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); + if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 + && gregLO3ofRM(modrm) != 6) { + assign(t1, loadLE(ty,mkexpr(addr))); + } + switch (gregLO3ofRM(modrm)) { + case 0: /* INC */ + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(t1), mkU(ty,1))); + if (pfx & PFX_LOCK) { + casLE( mkexpr(addr), + mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); + } else { + storeLE(mkexpr(addr),mkexpr(t2)); + } + setFlags_INC_DEC( True, t2, ty ); + break; + case 1: /* DEC */ + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Sub8), + mkexpr(t1), mkU(ty,1))); + if (pfx & PFX_LOCK) { + casLE( mkexpr(addr), + mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); + } else { + storeLE(mkexpr(addr),mkexpr(t2)); + } + setFlags_INC_DEC( False, t2, ty ); + break; + case 2: /* call Ev */ + /* Ignore any sz value and operate as if sz==8. */ + if (!(sz == 4 || sz == 8)) goto unhandled; + sz = 8; + t3 = newTemp(Ity_I64); + assign(t3, loadLE(Ity_I64,mkexpr(addr))); + t2 = newTemp(Ity_I64); + assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); + putIReg64(R_RSP, mkexpr(t2)); + storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); + make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); + jmp_treg(Ijk_Call,t3); + dres->whatNext = Dis_StopHere; + showSz = False; + break; + case 4: /* JMP Ev */ + /* Ignore any sz value and operate as if sz==8. */ + if (!(sz == 4 || sz == 8)) goto unhandled; + sz = 8; + t3 = newTemp(Ity_I64); + assign(t3, loadLE(Ity_I64,mkexpr(addr))); + jmp_treg(Ijk_Boring,t3); + dres->whatNext = Dis_StopHere; + showSz = False; + break; + case 6: /* PUSH Ev */ + /* There is no encoding for 32-bit operand size; hence ... */ + if (sz == 4) sz = 8; + if (!(sz == 8 || sz == 2)) goto unhandled; + if (sz == 8) { + t3 = newTemp(Ity_I64); + assign(t3, loadLE(Ity_I64,mkexpr(addr))); + t2 = newTemp(Ity_I64); + assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); + putIReg64(R_RSP, mkexpr(t2) ); + storeLE( mkexpr(t2), mkexpr(t3) ); + break; + } else { + goto unhandled; /* awaiting test case */ + } + default: + unhandled: + *decode_OK = False; + return delta; + } + delta += len; + DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), + showSz ? nameISize(sz) : ' ', + dis_buf); + } + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- Disassembling string ops (including REP prefixes) ---*/ + /*------------------------------------------------------------*/ + + /* Code shared by all the string ops */ + static + void dis_string_op_increment ( Int sz, IRTemp t_inc ) + { + UChar logSz; + if (sz == 8 || sz == 4 || sz == 2) { + logSz = 1; + if (sz == 4) logSz = 2; + if (sz == 8) logSz = 3; + assign( t_inc, + binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), + mkU8(logSz) ) ); + } else { + assign( t_inc, + IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); + } + } + + static + void dis_string_op( void (*dis_OP)( Int, IRTemp ), + Int sz, HChar* name, Prefix pfx ) + { + IRTemp t_inc = newTemp(Ity_I64); + /* Really we ought to inspect the override prefixes, but we don't. + The following assertion catches any resulting sillyness. */ + vassert(pfx == clearSegBits(pfx)); + dis_string_op_increment(sz, t_inc); + dis_OP( sz, t_inc ); + DIP("%s%c\n", name, nameISize(sz)); + } + + static + void dis_MOVS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp td = newTemp(Ity_I64); /* RDI */ + IRTemp ts = newTemp(Ity_I64); /* RSI */ + + assign( td, getIReg64(R_RDI) ); + assign( ts, getIReg64(R_RSI) ); + + storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); + + putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); + putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) ); + } + + static + void dis_LODS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp ts = newTemp(Ity_I64); /* RSI */ + + assign( ts, getIReg64(R_RSI) ); + + putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); + + putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) ); + } + + static + void dis_STOS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp ta = newTemp(ty); /* rAX */ + IRTemp td = newTemp(Ity_I64); /* RDI */ + + assign( ta, getIRegRAX(sz) ); + + assign( td, getIReg64(R_RDI) ); + + storeLE( mkexpr(td), mkexpr(ta) ); + + putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); + } + + static + void dis_CMPS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp tdv = newTemp(ty); /* (RDI) */ + IRTemp tsv = newTemp(ty); /* (RSI) */ + IRTemp td = newTemp(Ity_I64); /* RDI */ + IRTemp ts = newTemp(Ity_I64); /* RSI */ + + assign( td, getIReg64(R_RDI) ); + + assign( ts, getIReg64(R_RSI) ); + + assign( tdv, loadLE(ty,mkexpr(td)) ); + + assign( tsv, loadLE(ty,mkexpr(ts)) ); + + setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); + + putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); + + putIReg64(R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) ); + } + + static + void dis_SCAS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp ta = newTemp(ty); /* rAX */ + IRTemp td = newTemp(Ity_I64); /* RDI */ + IRTemp tdv = newTemp(ty); /* (RDI) */ + + assign( ta, getIRegRAX(sz) ); + + assign( td, getIReg64(R_RDI) ); + + assign( tdv, loadLE(ty,mkexpr(td)) ); + + setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); + + putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); + } + + + /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume + the insn is the last one in the basic block, and so emit a jump to + the next insn, rather than just falling through. */ + static + void dis_REP_op ( AMD64Condcode cond, + void (*dis_OP)(Int, IRTemp), + Int sz, Addr64 rip, Addr64 rip_next, HChar* name, + Prefix pfx ) + { + IRTemp t_inc = newTemp(Ity_I64); + IRTemp tc = newTemp(Ity_I64); /* RCX */ + + /* Really we ought to inspect the override prefixes, but we don't. + The following assertion catches any resulting sillyness. */ + vassert(pfx == clearSegBits(pfx)); + + assign( tc, getIReg64(R_RCX) ); + + stmt( IRStmt_Exit( binop(Iop_CmpEQ64,mkexpr(tc),mkU64(0)), + Ijk_Boring, + IRConst_U64(rip_next) ) ); + + putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); + + dis_string_op_increment(sz, t_inc); + dis_OP (sz, t_inc); + + if (cond == AMD64CondAlways) { + jmp_lit(Ijk_Boring,rip); + } else { + stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), + Ijk_Boring, + IRConst_U64(rip) ) ); + jmp_lit(Ijk_Boring,rip_next); + } + DIP("%s%c\n", name, nameISize(sz)); + } + + + /*------------------------------------------------------------*/ + /*--- Arithmetic, etc. ---*/ + /*------------------------------------------------------------*/ + + /* IMUL E, G. Supplied eip points to the modR/M byte. */ + static + ULong dis_mul_E_G ( VexAbiInfo* vbi, + Prefix pfx, + Int size, + Long delta0 ) + { + Int alen; + HChar dis_buf[50]; + UChar rm = getUChar(delta0); + IRType ty = szToITy(size); + IRTemp te = newTemp(ty); + IRTemp tg = newTemp(ty); + IRTemp resLo = newTemp(ty); + + assign( tg, getIRegG(size, pfx, rm) ); + if (epartIsReg(rm)) { + assign( te, getIRegE(size, pfx, rm) ); + } else { + IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); + assign( te, loadLE(ty,mkexpr(addr)) ); + } + + setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); + + assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); + + putIRegG(size, pfx, rm, mkexpr(resLo) ); + + if (epartIsReg(rm)) { + DIP("imul%c %s, %s\n", nameISize(size), + nameIRegE(size,pfx,rm), + nameIRegG(size,pfx,rm)); + return 1+delta0; + } else { + DIP("imul%c %s, %s\n", nameISize(size), + dis_buf, + nameIRegG(size,pfx,rm)); + return alen+delta0; + } + } + + + /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ + static + ULong dis_imul_I_E_G ( VexAbiInfo* vbi, + Prefix pfx, + Int size, + Long delta, + Int litsize ) + { + Long d64; + Int alen; + HChar dis_buf[50]; + UChar rm = getUChar(delta); + IRType ty = szToITy(size); + IRTemp te = newTemp(ty); + IRTemp tl = newTemp(ty); + IRTemp resLo = newTemp(ty); + + vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); + + if (epartIsReg(rm)) { + assign(te, getIRegE(size, pfx, rm)); + delta++; + } else { + IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, + imin(4,litsize) ); + assign(te, loadLE(ty, mkexpr(addr))); + delta += alen; + } + d64 = getSDisp(imin(4,litsize),delta); + delta += imin(4,litsize); + + d64 &= mkSizeMask(size); + assign(tl, mkU(ty,d64)); + + assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); + + setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); + + putIRegG(size, pfx, rm, mkexpr(resLo)); + + DIP("imul%c $%lld, %s, %s\n", + nameISize(size), d64, + ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), + nameIRegG(size,pfx,rm) ); + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- ---*/ + /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ + /*--- ---*/ + /*------------------------------------------------------------*/ + + /* --- Helper functions for dealing with the register stack. --- */ + + /* --- Set the emulation-warning pseudo-register. --- */ + + static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) + { + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); + stmt( IRStmt_Put( OFFB_EMWARN, e ) ); + } + + /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ + + static IRExpr* mkQNaN64 ( void ) + { + /* QNaN is 0 2047 1 0(51times) + == 0b 11111111111b 1 0(51times) + == 0x7FF8 0000 0000 0000 + */ + return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); + } + + /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ + + static IRExpr* get_ftop ( void ) + { + return IRExpr_Get( OFFB_FTOP, Ity_I32 ); + } + + static void put_ftop ( IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); + stmt( IRStmt_Put( OFFB_FTOP, e ) ); + } + + /* --------- Get/put the C3210 bits. --------- */ + + static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) + { + return IRExpr_Get( OFFB_FC3210, Ity_I64 ); + } + + static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) + { + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); + stmt( IRStmt_Put( OFFB_FC3210, e ) ); + } + + /* --------- Get/put the FPU rounding mode. --------- */ + static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) + { + return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); + } + + static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) + { + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); + stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); + } + + + /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ + /* Produces a value in 0 .. 3, which is encoded as per the type + IRRoundingMode. Since the guest_FPROUND value is also encoded as + per IRRoundingMode, we merely need to get it and mask it for + safety. + */ + static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) + { + return binop( Iop_And32, get_fpround(), mkU32(3) ); + } + + static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) + { + return mkU32(Irrm_NEAREST); + } + + + /* --------- Get/set FP register tag bytes. --------- */ + + /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ + + static void put_ST_TAG ( Int i, IRExpr* value ) + { + IRRegArray* descr; + vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); + descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); + } + + /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be + zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ + + static IRExpr* get_ST_TAG ( Int i ) + { + IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + return IRExpr_GetI( descr, get_ftop(), i ); + } + + + /* --------- Get/set FP registers. --------- */ + + /* Given i, and some expression e, emit 'ST(i) = e' and set the + register's tag to indicate the register is full. The previous + state of the register is not checked. */ + + static void put_ST_UNCHECKED ( Int i, IRExpr* value ) + { + IRRegArray* descr; + vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); + descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); + stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); + /* Mark the register as in-use. */ + put_ST_TAG(i, mkU8(1)); + } + + /* Given i, and some expression e, emit + ST(i) = is_full(i) ? NaN : e + and set the tag accordingly. + */ + + static void put_ST ( Int i, IRExpr* value ) + { + put_ST_UNCHECKED( i, + IRExpr_Mux0X( get_ST_TAG(i), + /* 0 means empty */ + value, + /* non-0 means full */ + mkQNaN64() + ) + ); + } + + + /* Given i, generate an expression yielding 'ST(i)'. */ + + static IRExpr* get_ST_UNCHECKED ( Int i ) + { + IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); + return IRExpr_GetI( descr, get_ftop(), i ); + } + + + /* Given i, generate an expression yielding + is_full(i) ? ST(i) : NaN + */ + + static IRExpr* get_ST ( Int i ) + { + return + IRExpr_Mux0X( get_ST_TAG(i), + /* 0 means empty */ + mkQNaN64(), + /* non-0 means full */ + get_ST_UNCHECKED(i)); + } + + + /* Adjust FTOP downwards by one register. */ + + static void fp_push ( void ) + { + put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); + } + + /* Adjust FTOP upwards by one register, and mark the vacated register + as empty. */ + + static void fp_pop ( void ) + { + put_ST_TAG(0, mkU8(0)); + put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); + } + + /* Clear the C2 bit of the FPU status register, for + sin/cos/tan/sincos. */ + + static void clear_C2 ( void ) + { + put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); + } + + /* Invent a plausible-looking FPU status word value: + ((ftop & 7) << 11) | (c3210 & 0x4700) + */ + static IRExpr* get_FPU_sw ( void ) + { + return + unop(Iop_32to16, + binop(Iop_Or32, + binop(Iop_Shl32, + binop(Iop_And32, get_ftop(), mkU32(7)), + mkU8(11)), + binop(Iop_And32, unop(Iop_64to32, get_C3210()), + mkU32(0x4700)) + )); + } + + + /* ------------------------------------------------------- */ + /* Given all that stack-mangling junk, we can now go ahead + and describe FP instructions. + */ + + /* ST(0) = ST(0) `op` mem64/32(addr) + Need to check ST(0)'s tag on read, but not on write. + */ + static + void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, + IROp op, Bool dbl ) + { + DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); + if (dbl) { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + loadLE(Ity_F64,mkexpr(addr)) + )); + } else { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) + )); + } + } + + + /* ST(0) = mem64/32(addr) `op` ST(0) + Need to check ST(0)'s tag on read, but not on write. + */ + static + void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, + IROp op, Bool dbl ) + { + DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); + if (dbl) { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + loadLE(Ity_F64,mkexpr(addr)), + get_ST(0) + )); + } else { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), + get_ST(0) + )); + } + } + + + /* ST(dst) = ST(dst) `op` ST(src). + Check dst and src tags when reading but not on write. + */ + static + void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, + Bool pop_after ) + { + DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); + put_ST_UNCHECKED( + st_dst, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(st_dst), + get_ST(st_src) ) + ); + if (pop_after) + fp_pop(); + } + + /* ST(dst) = ST(src) `op` ST(dst). + Check dst and src tags when reading but not on write. + */ + static + void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, + Bool pop_after ) + { + DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); + put_ST_UNCHECKED( + st_dst, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(st_src), + get_ST(st_dst) ) + ); + if (pop_after) + fp_pop(); + } + + /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ + static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) + { + DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); + /* This is a bit of a hack (and isn't really right). It sets + Z,P,C,O correctly, but forces A and S to zero, whereas the Intel + documentation implies A and S are unchanged. + */ + /* It's also fishy in that it is used both for COMIP and + UCOMIP, and they aren't the same (although similar). */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop( Iop_And64, + unop( Iop_32Uto64, + binop(Iop_CmpF64, get_ST(0), get_ST(i))), + mkU64(0x45) + ))); + if (pop_after) + fp_pop(); + } + + + /* returns + 32to16( if e32 s 32767 then -32768 else e32 ) + */ + static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) + { + IRTemp t32 = newTemp(Ity_I32); + assign( t32, e32 ); + return + IRExpr_Mux0X( + unop(Iop_1Uto8, + binop(Iop_CmpLT64U, + unop(Iop_32Uto64, + binop(Iop_Add32, mkexpr(t32), mkU32(32768))), + mkU64(65536))), + mkU16( 0x8000 ), + unop(Iop_32to16, mkexpr(t32))); + } + + + static + ULong dis_FPU ( /*OUT*/Bool* decode_ok, + VexAbiInfo* vbi, Prefix pfx, Long delta ) + { + Int len; + UInt r_src, r_dst; + HChar dis_buf[50]; + IRTemp t1, t2; + + /* On entry, delta points at the second byte of the insn (the modrm + byte).*/ + UChar first_opcode = getUChar(delta-1); + UChar modrm = getUChar(delta+0); + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ + + if (first_opcode == 0xD8) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + + switch (gregLO3ofRM(modrm)) { + + case 0: /* FADD single-real */ + fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); + break; + + case 1: /* FMUL single-real */ + fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); + break; + + //.. case 2: /* FCOM single-real */ + //.. DIP("fcoms %s\n", dis_buf); + //.. /* This forces C1 to zero, which isn't right. */ + //.. put_C3210( + //.. binop( Iop_And32, + //.. binop(Iop_Shl32, + //.. binop(Iop_CmpF64, + //.. get_ST(0), + //.. unop(Iop_F32toF64, + //.. loadLE(Ity_F32,mkexpr(addr)))), + //.. mkU8(8)), + //.. mkU32(0x4500) + //.. )); + //.. break; + //.. + //.. case 3: /* FCOMP single-real */ + //.. DIP("fcomps %s\n", dis_buf); + //.. /* This forces C1 to zero, which isn't right. */ + //.. put_C3210( + //.. binop( Iop_And32, + //.. binop(Iop_Shl32, + //.. binop(Iop_CmpF64, + //.. get_ST(0), + //.. unop(Iop_F32toF64, + //.. loadLE(Ity_F32,mkexpr(addr)))), + //.. mkU8(8)), + //.. mkU32(0x4500) + //.. )); + //.. fp_pop(); + //.. break; + + case 4: /* FSUB single-real */ + fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); + break; + + case 5: /* FSUBR single-real */ + fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); + break; + + case 6: /* FDIV single-real */ + fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); + break; + + case 7: /* FDIVR single-real */ + fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xD8\n"); + goto decode_fail; + } + } else { + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ + fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); + break; + + case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ + fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); + break; + + /* Dunno if this is right */ + case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ + r_dst = (UInt)modrm - 0xD0; + DIP("fcom %%st(0),%%st(%d)\n", r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + unop(Iop_32Uto64, + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + ))); + break; + + /* Dunno if this is right */ + case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ + r_dst = (UInt)modrm - 0xD8; + DIP("fcomp %%st(0),%%st(%d)\n", r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + unop(Iop_32Uto64, + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + ))); + fp_pop(); + break; + + case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ + fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); + break; + + case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ + fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); + break; + + case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ + fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); + break; + + case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ + fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xD9) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + + switch (gregLO3ofRM(modrm)) { + + case 0: /* FLD single-real */ + DIP("flds %s\n", dis_buf); + fp_push(); + put_ST(0, unop(Iop_F32toF64, + loadLE(Ity_F32, mkexpr(addr)))); + break; + + case 2: /* FST single-real */ + DIP("fsts %s\n", dis_buf); + storeLE(mkexpr(addr), + binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); + break; + + case 3: /* FSTP single-real */ + DIP("fstps %s\n", dis_buf); + storeLE(mkexpr(addr), + binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); + fp_pop(); + break; + + case 4: { /* FLDENV m28 */ + /* Uses dirty helper: + VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ + IRTemp ew = newTemp(Ity_I32); + IRTemp w64 = newTemp(Ity_I64); + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "amd64g_dirtyhelper_FLDENV", + &amd64g_dirtyhelper_FLDENV, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + d->tmp = w64; + /* declare we're reading memory */ + d->mFx = Ifx_Read; + d->mAddr = mkexpr(addr); + d->mSize = 28; + + /* declare we're writing guest state */ + d->nFxState = 4; + + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_FPTAGS; + d->fxState[1].size = 8 * sizeof(UChar); + + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_FPROUND; + d->fxState[2].size = sizeof(ULong); + + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_FC3210; + d->fxState[3].size = sizeof(ULong); + + stmt( IRStmt_Dirty(d) ); + + /* ew contains any emulation warning we may need to + issue. If needed, side-exit to the next insn, + reporting the warning, so that Valgrind's dispatcher + sees the warning. */ + assign(ew, unop(Iop_64to32,mkexpr(w64)) ); + put_emwarn( mkexpr(ew) ); + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), + Ijk_EmWarn, + IRConst_U64( guest_RIP_bbstart+delta ) + ) + ); + + DIP("fldenv %s\n", dis_buf); + break; + } + + case 5: {/* FLDCW */ + /* The only thing we observe in the control word is the + rounding mode. Therefore, pass the 16-bit value + (x87 native-format control word) to a clean helper, + getting back a 64-bit value, the lower half of which + is the FPROUND value to store, and the upper half of + which is the emulation-warning token which may be + generated. + */ + /* ULong amd64h_check_fldcw ( ULong ); */ + IRTemp t64 = newTemp(Ity_I64); + IRTemp ew = newTemp(Ity_I32); + DIP("fldcw %s\n", dis_buf); + assign( t64, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_check_fldcw", + &amd64g_check_fldcw, + mkIRExprVec_1( + unop( Iop_16Uto64, + loadLE(Ity_I16, mkexpr(addr))) + ) + ) + ); + + put_fpround( unop(Iop_64to32, mkexpr(t64)) ); + assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); + put_emwarn( mkexpr(ew) ); + /* Finally, if an emulation warning was reported, + side-exit to the next insn, reporting the warning, + so that Valgrind's dispatcher sees the warning. */ + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), + Ijk_EmWarn, + IRConst_U64( guest_RIP_bbstart+delta ) + ) + ); + break; + } + + case 6: { /* FNSTENV m28 */ + /* Uses dirty helper: + void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "amd64g_dirtyhelper_FSTENV", + &amd64g_dirtyhelper_FSTENV, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 28; + + /* declare we're reading guest state */ + d->nFxState = 4; + + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = OFFB_FPTAGS; + d->fxState[1].size = 8 * sizeof(UChar); + + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = OFFB_FPROUND; + d->fxState[2].size = sizeof(ULong); + + d->fxState[3].fx = Ifx_Read; + d->fxState[3].offset = OFFB_FC3210; + d->fxState[3].size = sizeof(ULong); + + stmt( IRStmt_Dirty(d) ); + + DIP("fnstenv %s\n", dis_buf); + break; + } + + case 7: /* FNSTCW */ + /* Fake up a native x87 FPU control word. The only + thing it depends on is FPROUND[1:0], so call a clean + helper to cook it up. */ + /* ULong amd64g_create_fpucw ( ULong fpround ) */ + DIP("fnstcw %s\n", dis_buf); + storeLE( + mkexpr(addr), + unop( Iop_64to16, + mkIRExprCCall( + Ity_I64, 0/*regp*/, + "amd64g_create_fpucw", &amd64g_create_fpucw, + mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) + ) + ) + ); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xD9\n"); + goto decode_fail; + } + + } else { + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FLD %st(?) */ + r_src = (UInt)modrm - 0xC0; + DIP("fld %%st(%u)\n", r_src); + t1 = newTemp(Ity_F64); + assign(t1, get_ST(r_src)); + fp_push(); + put_ST(0, mkexpr(t1)); + break; + + case 0xC8 ... 0xCF: /* FXCH %st(?) */ + r_src = (UInt)modrm - 0xC8; + DIP("fxch %%st(%u)\n", r_src); + t1 = newTemp(Ity_F64); + t2 = newTemp(Ity_F64); + assign(t1, get_ST(0)); + assign(t2, get_ST(r_src)); + put_ST_UNCHECKED(0, mkexpr(t2)); + put_ST_UNCHECKED(r_src, mkexpr(t1)); + break; + + case 0xE0: /* FCHS */ + DIP("fchs\n"); + put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); + break; + + case 0xE1: /* FABS */ + DIP("fabs\n"); + put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); + break; + + case 0xE5: { /* FXAM */ + /* This is an interesting one. It examines %st(0), + regardless of whether the tag says it's empty or not. + Here, just pass both the tag (in our format) and the + value (as a double, actually a ULong) to a helper + function. */ + IRExpr** args + = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), + unop(Iop_ReinterpF64asI64, + get_ST_UNCHECKED(0)) ); + put_C3210(mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, + args + )); + DIP("fxam\n"); + break; + } + + case 0xE8: /* FLD1 */ + DIP("fld1\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); + break; + + case 0xE9: /* FLDL2T */ + DIP("fldl2t\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); + break; + + case 0xEA: /* FLDL2E */ + DIP("fldl2e\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); + break; + + case 0xEB: /* FLDPI */ + DIP("fldpi\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); + break; + + case 0xEC: /* FLDLG2 */ + DIP("fldlg2\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); + break; + + case 0xED: /* FLDLN2 */ + DIP("fldln2\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); + break; + + case 0xEE: /* FLDZ */ + DIP("fldz\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); + break; + + case 0xF0: /* F2XM1 */ + DIP("f2xm1\n"); + put_ST_UNCHECKED(0, + binop(Iop_2xm1F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + break; + + case 0xF1: /* FYL2X */ + DIP("fyl2x\n"); + put_ST_UNCHECKED(1, + triop(Iop_Yl2xF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(1), + get_ST(0))); + fp_pop(); + break; + + case 0xF2: /* FPTAN */ + DIP("ftan\n"); + put_ST_UNCHECKED(0, + binop(Iop_TanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + fp_push(); + put_ST(0, IRExpr_Const(IRConst_F64(1.0))); + clear_C2(); /* HACK */ + break; + + case 0xF3: /* FPATAN */ + DIP("fpatan\n"); + put_ST_UNCHECKED(1, + triop(Iop_AtanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(1), + get_ST(0))); + fp_pop(); + break; + + case 0xF4: { /* FXTRACT */ + IRTemp argF = newTemp(Ity_F64); + IRTemp sigF = newTemp(Ity_F64); + IRTemp expF = newTemp(Ity_F64); + IRTemp argI = newTemp(Ity_I64); + IRTemp sigI = newTemp(Ity_I64); + IRTemp expI = newTemp(Ity_I64); + DIP("fxtract\n"); + assign( argF, get_ST(0) ); + assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); + assign( sigI, + mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86amd64g_calculate_FXTRACT", + &x86amd64g_calculate_FXTRACT, + mkIRExprVec_2( mkexpr(argI), + mkIRExpr_HWord(0)/*sig*/ )) + ); + assign( expI, + mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86amd64g_calculate_FXTRACT", + &x86amd64g_calculate_FXTRACT, + mkIRExprVec_2( mkexpr(argI), + mkIRExpr_HWord(1)/*exp*/ )) + ); + assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); + assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); + /* exponent */ + put_ST_UNCHECKED(0, mkexpr(expF) ); + fp_push(); + /* significand */ + put_ST(0, mkexpr(sigF) ); + break; + } + + case 0xF5: { /* FPREM1 -- IEEE compliant */ + IRTemp a1 = newTemp(Ity_F64); + IRTemp a2 = newTemp(Ity_F64); + DIP("fprem1\n"); + /* Do FPREM1 twice, once to get the remainder, and once + to get the C3210 flag values. */ + assign( a1, get_ST(0) ); + assign( a2, get_ST(1) ); + put_ST_UNCHECKED(0, + triop(Iop_PRem1F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2))); + put_C3210( + unop(Iop_32Uto64, + triop(Iop_PRem1C3210F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2)) )); + break; + } + + case 0xF7: /* FINCSTP */ + DIP("fincstp\n"); + put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); + break; + + case 0xF8: { /* FPREM -- not IEEE compliant */ + IRTemp a1 = newTemp(Ity_F64); + IRTemp a2 = newTemp(Ity_F64); + DIP("fprem\n"); + /* Do FPREM twice, once to get the remainder, and once + to get the C3210 flag values. */ + assign( a1, get_ST(0) ); + assign( a2, get_ST(1) ); + put_ST_UNCHECKED(0, + triop(Iop_PRemF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2))); + put_C3210( + unop(Iop_32Uto64, + triop(Iop_PRemC3210F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2)) )); + break; + } + + case 0xF9: /* FYL2XP1 */ + DIP("fyl2xp1\n"); + put_ST_UNCHECKED(1, + triop(Iop_Yl2xp1F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(1), + get_ST(0))); + fp_pop(); + break; + + case 0xFA: /* FSQRT */ + DIP("fsqrt\n"); + put_ST_UNCHECKED(0, + binop(Iop_SqrtF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + break; + + case 0xFB: { /* FSINCOS */ + IRTemp a1 = newTemp(Ity_F64); + assign( a1, get_ST(0) ); + DIP("fsincos\n"); + put_ST_UNCHECKED(0, + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1))); + fp_push(); + put_ST(0, + binop(Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1))); + clear_C2(); /* HACK */ + break; + } + + case 0xFC: /* FRNDINT */ + DIP("frndint\n"); + put_ST_UNCHECKED(0, + binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); + break; + + case 0xFD: /* FSCALE */ + DIP("fscale\n"); + put_ST_UNCHECKED(0, + triop(Iop_ScaleF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + get_ST(1))); + break; + + case 0xFE: /* FSIN */ + DIP("fsin\n"); + put_ST_UNCHECKED(0, + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + clear_C2(); /* HACK */ + break; + + case 0xFF: /* FCOS */ + DIP("fcos\n"); + put_ST_UNCHECKED(0, + binop(Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + clear_C2(); /* HACK */ + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDA) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IROp fop; + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + switch (gregLO3ofRM(modrm)) { + + case 0: /* FIADD m32int */ /* ST(0) += m32int */ + DIP("fiaddl %s\n", dis_buf); + fop = Iop_AddF64; + goto do_fop_m32; + + case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ + DIP("fimull %s\n", dis_buf); + fop = Iop_MulF64; + goto do_fop_m32; + + case 4: /* FISUB m32int */ /* ST(0) -= m32int */ + DIP("fisubl %s\n", dis_buf); + fop = Iop_SubF64; + goto do_fop_m32; + + case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ + DIP("fisubrl %s\n", dis_buf); + fop = Iop_SubF64; + goto do_foprev_m32; + + case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ + DIP("fisubl %s\n", dis_buf); + fop = Iop_DivF64; + goto do_fop_m32; + + case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ + DIP("fidivrl %s\n", dis_buf); + fop = Iop_DivF64; + goto do_foprev_m32; + + do_fop_m32: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + unop(Iop_I32toF64, + loadLE(Ity_I32, mkexpr(addr))))); + break; + + do_foprev_m32: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + unop(Iop_I32toF64, + loadLE(Ity_I32, mkexpr(addr))), + get_ST(0))); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xDA\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC0; + DIP("fcmovb %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondB)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC8; + DIP("fcmovz %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondZ)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD0; + DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondBE)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD8; + DIP("fcmovu %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondP)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xE9: /* FUCOMPP %st(0),%st(1) */ + DIP("fucompp %%st(0),%%st(1)\n"); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + unop(Iop_32Uto64, + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(1)), + mkU8(8)), + mkU32(0x4500) + ))); + fp_pop(); + fp_pop(); + break; + + default: + goto decode_fail; + } + + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDB) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + + switch (gregLO3ofRM(modrm)) { + + case 0: /* FILD m32int */ + DIP("fildl %s\n", dis_buf); + fp_push(); + put_ST(0, unop(Iop_I32toF64, + loadLE(Ity_I32, mkexpr(addr)))); + break; + + case 1: /* FISTTPL m32 (SSE3) */ + DIP("fisttpl %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_ST(0)) ); + fp_pop(); + break; + + case 2: /* FIST m32 */ + DIP("fistl %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI32, get_roundingmode(), get_ST(0)) ); + break; + + case 3: /* FISTP m32 */ + DIP("fistpl %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI32, get_roundingmode(), get_ST(0)) ); + fp_pop(); + break; + + case 5: { /* FLD extended-real */ + /* Uses dirty helper: + ULong amd64g_loadF80le ( ULong ) + addr holds the address. First, do a dirty call to + get hold of the data. */ + IRTemp val = newTemp(Ity_I64); + IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); + + IRDirty* d = unsafeIRDirty_1_N ( + val, + 0/*regparms*/, + "amd64g_dirtyhelper_loadF80le", + &amd64g_dirtyhelper_loadF80le, + args + ); + /* declare that we're reading memory */ + d->mFx = Ifx_Read; + d->mAddr = mkexpr(addr); + d->mSize = 10; + + /* execute the dirty call, dumping the result in val. */ + stmt( IRStmt_Dirty(d) ); + fp_push(); + put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); + + DIP("fldt %s\n", dis_buf); + break; + } + + case 7: { /* FSTP extended-real */ + /* Uses dirty helper: + void amd64g_storeF80le ( ULong addr, ULong data ) + */ + IRExpr** args + = mkIRExprVec_2( mkexpr(addr), + unop(Iop_ReinterpF64asI64, get_ST(0)) ); + + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "amd64g_dirtyhelper_storeF80le", + &amd64g_dirtyhelper_storeF80le, + args + ); + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 10; + + /* execute the dirty call. */ + stmt( IRStmt_Dirty(d) ); + fp_pop(); + + DIP("fstpt\n %s", dis_buf); + break; + } + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xDB\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC0; + DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondNB)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC8; + DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED( + 0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondNZ)), + get_ST(0), + get_ST(r_src) + ) + ); + break; + + case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD0; + DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED( + 0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondNBE)), + get_ST(0), + get_ST(r_src) + ) + ); + break; + + case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD8; + DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED( + 0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondNP)), + get_ST(0), + get_ST(r_src) + ) + ); + break; + + case 0xE2: + DIP("fnclex\n"); + break; + + case 0xE3: { + /* Uses dirty helper: + void amd64g_do_FINIT ( VexGuestAMD64State* ) */ + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "amd64g_dirtyhelper_FINIT", + &amd64g_dirtyhelper_FINIT, + mkIRExprVec_0() + ); + d->needsBBP = True; + + /* declare we're writing guest state */ + d->nFxState = 5; + + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(ULong); + + d->fxState[4].fx = Ifx_Write; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(ULong); + + stmt( IRStmt_Dirty(d) ); + + DIP("fninit\n"); + break; + } + + case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); + break; + + case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDC) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + + switch (gregLO3ofRM(modrm)) { + + case 0: /* FADD double-real */ + fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); + break; + + case 1: /* FMUL double-real */ + fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); + break; + + //.. case 2: /* FCOM double-real */ + //.. DIP("fcoml %s\n", dis_buf); + //.. /* This forces C1 to zero, which isn't right. */ + //.. put_C3210( + //.. binop( Iop_And32, + //.. binop(Iop_Shl32, + //.. binop(Iop_CmpF64, + //.. get_ST(0), + //.. loadLE(Ity_F64,mkexpr(addr))), + //.. mkU8(8)), + //.. mkU32(0x4500) + //.. )); + //.. break; + + case 3: /* FCOMP double-real */ + DIP("fcompl %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + unop(Iop_32Uto64, + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + loadLE(Ity_F64,mkexpr(addr))), + mkU8(8)), + mkU32(0x4500) + ))); + fp_pop(); + break; + + case 4: /* FSUB double-real */ + fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); + break; + + case 5: /* FSUBR double-real */ + fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); + break; + + case 6: /* FDIV double-real */ + fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); + break; + + case 7: /* FDIVR double-real */ + fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xDC\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ + fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); + break; + + case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ + fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); + break; + + case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); + break; + + case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ + fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); + break; + + case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); + break; + + case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ + fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); + break; + + default: + goto decode_fail; + } + + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDD) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + + switch (gregLO3ofRM(modrm)) { + + case 0: /* FLD double-real */ + DIP("fldl %s\n", dis_buf); + fp_push(); + put_ST(0, loadLE(Ity_F64, mkexpr(addr))); + break; + + case 1: /* FISTTPQ m64 (SSE3) */ + DIP("fistppll %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI64, mkU32(Irrm_ZERO), get_ST(0)) ); + fp_pop(); + break; + + case 2: /* FST double-real */ + DIP("fstl %s\n", dis_buf); + storeLE(mkexpr(addr), get_ST(0)); + break; + + case 3: /* FSTP double-real */ + DIP("fstpl %s\n", dis_buf); + storeLE(mkexpr(addr), get_ST(0)); + fp_pop(); + break; + + //.. case 4: { /* FRSTOR m108 */ + //.. /* Uses dirty helper: + //.. VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ + //.. IRTemp ew = newTemp(Ity_I32); + //.. IRDirty* d = unsafeIRDirty_0_N ( + //.. 0/*regparms*/, + //.. "x86g_dirtyhelper_FRSTOR", + //.. &x86g_dirtyhelper_FRSTOR, + //.. mkIRExprVec_1( mkexpr(addr) ) + //.. ); + //.. d->needsBBP = True; + //.. d->tmp = ew; + //.. /* declare we're reading memory */ + //.. d->mFx = Ifx_Read; + //.. d->mAddr = mkexpr(addr); + //.. d->mSize = 108; + //.. + //.. /* declare we're writing guest state */ + //.. d->nFxState = 5; + //.. + //.. d->fxState[0].fx = Ifx_Write; + //.. d->fxState[0].offset = OFFB_FTOP; + //.. d->fxState[0].size = sizeof(UInt); + //.. + //.. d->fxState[1].fx = Ifx_Write; + //.. d->fxState[1].offset = OFFB_FPREGS; + //.. d->fxState[1].size = 8 * sizeof(ULong); + //.. + //.. d->fxState[2].fx = Ifx_Write; + //.. d->fxState[2].offset = OFFB_FPTAGS; + //.. d->fxState[2].size = 8 * sizeof(UChar); + //.. + //.. d->fxState[3].fx = Ifx_Write; + //.. d->fxState[3].offset = OFFB_FPROUND; + //.. d->fxState[3].size = sizeof(UInt); + //.. + //.. d->fxState[4].fx = Ifx_Write; + //.. d->fxState[4].offset = OFFB_FC3210; + //.. d->fxState[4].size = sizeof(UInt); + //.. + //.. stmt( IRStmt_Dirty(d) ); + //.. + //.. /* ew contains any emulation warning we may need to + //.. issue. If needed, side-exit to the next insn, + //.. reporting the warning, so that Valgrind's dispatcher + //.. sees the warning. */ + //.. put_emwarn( mkexpr(ew) ); + //.. stmt( + //.. IRStmt_Exit( + //.. binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), + //.. Ijk_EmWarn, + //.. IRConst_U32( ((Addr32)guest_eip_bbstart)+delta) + //.. ) + //.. ); + //.. + //.. DIP("frstor %s\n", dis_buf); + //.. break; + //.. } + //.. + //.. case 6: { /* FNSAVE m108 */ + //.. /* Uses dirty helper: + //.. void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ + //.. IRDirty* d = unsafeIRDirty_0_N ( + //.. 0/*regparms*/, + //.. "x86g_dirtyhelper_FSAVE", + //.. &x86g_dirtyhelper_FSAVE, + //.. mkIRExprVec_1( mkexpr(addr) ) + //.. ); + //.. d->needsBBP = True; + //.. /* declare we're writing memory */ + //.. d->mFx = Ifx_Write; + //.. d->mAddr = mkexpr(addr); + //.. d->mSize = 108; + //.. + //.. /* declare we're reading guest state */ + //.. d->nFxState = 5; + //.. + //.. d->fxState[0].fx = Ifx_Read; + //.. d->fxState[0].offset = OFFB_FTOP; + //.. d->fxState[0].size = sizeof(UInt); + //.. + //.. d->fxState[1].fx = Ifx_Read; + //.. d->fxState[1].offset = OFFB_FPREGS; + //.. d->fxState[1].size = 8 * sizeof(ULong); + //.. + //.. d->fxState[2].fx = Ifx_Read; + //.. d->fxState[2].offset = OFFB_FPTAGS; + //.. d->fxState[2].size = 8 * sizeof(UChar); + //.. + //.. d->fxState[3].fx = Ifx_Read; + //.. d->fxState[3].offset = OFFB_FPROUND; + //.. d->fxState[3].size = sizeof(UInt); + //.. + //.. d->fxState[4].fx = Ifx_Read; + //.. d->fxState[4].offset = OFFB_FC3210; + //.. d->fxState[4].size = sizeof(UInt); + //.. + //.. stmt( IRStmt_Dirty(d) ); + //.. + //.. DIP("fnsave %s\n", dis_buf); + //.. break; + //.. } + + case 7: { /* FNSTSW m16 */ + IRExpr* sw = get_FPU_sw(); + vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); + storeLE( mkexpr(addr), sw ); + DIP("fnstsw %s\n", dis_buf); + break; + } + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xDD\n"); + goto decode_fail; + } + } else { + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FFREE %st(?) */ + r_dst = (UInt)modrm - 0xC0; + DIP("ffree %%st(%u)\n", r_dst); + put_ST_TAG ( r_dst, mkU8(0) ); + break; + + case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xD0; + DIP("fst %%st(0),%%st(%u)\n", r_dst); + /* P4 manual says: "If the destination operand is a + non-empty register, the invalid-operation exception + is not generated. Hence put_ST_UNCHECKED. */ + put_ST_UNCHECKED(r_dst, get_ST(0)); + break; + + case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xD8; + DIP("fstp %%st(0),%%st(%u)\n", r_dst); + /* P4 manual says: "If the destination operand is a + non-empty register, the invalid-operation exception + is not generated. Hence put_ST_UNCHECKED. */ + put_ST_UNCHECKED(r_dst, get_ST(0)); + fp_pop(); + break; + + case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xE0; + DIP("fucom %%st(0),%%st(%u)\n", r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + unop(Iop_32Uto64, + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + ))); + break; + + case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xE8; + DIP("fucomp %%st(0),%%st(%u)\n", r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + unop(Iop_32Uto64, + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + ))); + fp_pop(); + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDE) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IROp fop; + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + + switch (gregLO3ofRM(modrm)) { + + case 0: /* FIADD m16int */ /* ST(0) += m16int */ + DIP("fiaddw %s\n", dis_buf); + fop = Iop_AddF64; + goto do_fop_m16; + + case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ + DIP("fimulw %s\n", dis_buf); + fop = Iop_MulF64; + goto do_fop_m16; + + case 4: /* FISUB m16int */ /* ST(0) -= m16int */ + DIP("fisubw %s\n", dis_buf); + fop = Iop_SubF64; + goto do_fop_m16; + + case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ + DIP("fisubrw %s\n", dis_buf); + fop = Iop_SubF64; + goto do_foprev_m16; + + case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ + DIP("fisubw %s\n", dis_buf); + fop = Iop_DivF64; + goto do_fop_m16; + + case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ + DIP("fidivrw %s\n", dis_buf); + fop = Iop_DivF64; + goto do_foprev_m16; + + do_fop_m16: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(addr)))))); + break; + + do_foprev_m16: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(addr)))), + get_ST(0))); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xDE\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ + fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); + break; + + case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ + fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); + break; + + case 0xD9: /* FCOMPP %st(0),%st(1) */ + DIP("fcompp %%st(0),%%st(1)\n"); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + unop(Iop_32Uto64, + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(1)), + mkU8(8)), + mkU32(0x4500) + ))); + fp_pop(); + fp_pop(); + break; + + case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); + break; + + case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ + fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); + break; + + case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); + break; + + case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ + fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); + break; + + default: + goto decode_fail; + } + + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDF) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + + switch (gregLO3ofRM(modrm)) { + + case 0: /* FILD m16int */ + DIP("fildw %s\n", dis_buf); + fp_push(); + put_ST(0, unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(addr))))); + break; + + case 1: /* FISTTPS m16 (SSE3) */ + DIP("fisttps %s\n", dis_buf); + storeLE( mkexpr(addr), + x87ishly_qnarrow_32_to_16( + binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_ST(0)) )); + fp_pop(); + break; + + //.. case 2: /* FIST m16 */ + //.. DIP("fistp %s\n", dis_buf); + //.. storeLE( mkexpr(addr), + //.. binop(Iop_F64toI16, get_roundingmode(), get_ST(0)) ); + //.. break; + + case 3: /* FISTP m16 */ + DIP("fistps %s\n", dis_buf); + storeLE( mkexpr(addr), + x87ishly_qnarrow_32_to_16( + binop(Iop_F64toI32, get_roundingmode(), get_ST(0)) )); + fp_pop(); + break; + + case 5: /* FILD m64 */ + DIP("fildll %s\n", dis_buf); + fp_push(); + put_ST(0, binop(Iop_I64toF64, + get_roundingmode(), + loadLE(Ity_I64, mkexpr(addr)))); + break; + + case 7: /* FISTP m64 */ + DIP("fistpll %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI64, get_roundingmode(), get_ST(0)) ); + fp_pop(); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); + vex_printf("first_opcode == 0xDF\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0: /* FFREEP %st(0) */ + DIP("ffreep %%st(%d)\n", 0); + put_ST_TAG ( 0, mkU8(0) ); + fp_pop(); + break; + + case 0xE0: /* FNSTSW %ax */ + DIP("fnstsw %%ax\n"); + /* Invent a plausible-looking FPU status word value and + dump it in %AX: + ((ftop & 7) << 11) | (c3210 & 0x4700) + */ + putIRegRAX( + 2, + unop(Iop_32to16, + binop(Iop_Or32, + binop(Iop_Shl32, + binop(Iop_And32, get_ftop(), mkU32(7)), + mkU8(11)), + binop(Iop_And32, + unop(Iop_64to32, get_C3210()), + mkU32(0x4700)) + ))); + break; + + case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); + break; + + case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ + /* not really right since COMIP != UCOMIP */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); + break; + + default: + goto decode_fail; + } + } + + } + + else + goto decode_fail; + + *decode_ok = True; + return delta; + + decode_fail: + *decode_ok = False; + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- ---*/ + /*--- MMX INSTRUCTIONS ---*/ + /*--- ---*/ + /*------------------------------------------------------------*/ + + /* Effect of MMX insns on x87 FPU state (table 11-2 of + IA32 arch manual, volume 3): + + Read from, or write to MMX register (viz, any insn except EMMS): + * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero + * FP stack pointer set to zero + + EMMS: + * All tags set to Invalid (empty) -- FPTAGS[i] := zero + * FP stack pointer set to zero + */ + + static void do_MMX_preamble ( void ) + { + Int i; + IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + IRExpr* zero = mkU32(0); + IRExpr* tag1 = mkU8(1); + put_ftop(zero); + for (i = 0; i < 8; i++) + stmt( IRStmt_PutI( descr, zero, i, tag1 ) ); + } + + static void do_EMMS_preamble ( void ) + { + Int i; + IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + IRExpr* zero = mkU32(0); + IRExpr* tag0 = mkU8(0); + put_ftop(zero); + for (i = 0; i < 8; i++) + stmt( IRStmt_PutI( descr, zero, i, tag0 ) ); + } + + + static IRExpr* getMMXReg ( UInt archreg ) + { + vassert(archreg < 8); + return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); + } + + + static void putMMXReg ( UInt archreg, IRExpr* e ) + { + vassert(archreg < 8); + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); + stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); + } + + + /* Helper for non-shift MMX insns. Note this is incomplete in the + sense that it does not first call do_MMX_preamble() -- that is the + responsibility of its caller. */ + + static + ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi, + Prefix pfx, + Long delta, + UChar opc, + HChar* name, + Bool show_granularity ) + { + HChar dis_buf[50]; + UChar modrm = getUChar(delta); + Bool isReg = epartIsReg(modrm); + IRExpr* argL = NULL; + IRExpr* argR = NULL; + IRExpr* argG = NULL; + IRExpr* argE = NULL; + IRTemp res = newTemp(Ity_I64); + + Bool invG = False; + IROp op = Iop_INVALID; + void* hAddr = NULL; + HChar* hName = NULL; + Bool eLeft = False; + + # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) + + switch (opc) { + /* Original MMX ones */ + case 0xFC: op = Iop_Add8x8; break; + case 0xFD: op = Iop_Add16x4; break; + case 0xFE: op = Iop_Add32x2; break; + + case 0xEC: op = Iop_QAdd8Sx8; break; + case 0xED: op = Iop_QAdd16Sx4; break; + + case 0xDC: op = Iop_QAdd8Ux8; break; + case 0xDD: op = Iop_QAdd16Ux4; break; + + case 0xF8: op = Iop_Sub8x8; break; + case 0xF9: op = Iop_Sub16x4; break; + case 0xFA: op = Iop_Sub32x2; break; + + case 0xE8: op = Iop_QSub8Sx8; break; + case 0xE9: op = Iop_QSub16Sx4; break; + + case 0xD8: op = Iop_QSub8Ux8; break; + case 0xD9: op = Iop_QSub16Ux4; break; + + case 0xE5: op = Iop_MulHi16Sx4; break; + case 0xD5: op = Iop_Mul16x4; break; + case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; + + case 0x74: op = Iop_CmpEQ8x8; break; + case 0x75: op = Iop_CmpEQ16x4; break; + case 0x76: op = Iop_CmpEQ32x2; break; + + case 0x64: op = Iop_CmpGT8Sx8; break; + case 0x65: op = Iop_CmpGT16Sx4; break; + case 0x66: op = Iop_CmpGT32Sx2; break; + + case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break; + case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break; + case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break; + + case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; + case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; + case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; + + case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; + case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; + case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; + + case 0xDB: op = Iop_And64; break; + case 0xDF: op = Iop_And64; invG = True; break; + case 0xEB: op = Iop_Or64; break; + case 0xEF: /* Possibly do better here if argL and argR are the + same reg */ + op = Iop_Xor64; break; + + /* Introduced in SSE1 */ + case 0xE0: op = Iop_Avg8Ux8; break; + case 0xE3: op = Iop_Avg16Ux4; break; + case 0xEE: op = Iop_Max16Sx4; break; + case 0xDE: op = Iop_Max8Ux8; break; + case 0xEA: op = Iop_Min16Sx4; break; + case 0xDA: op = Iop_Min8Ux8; break; + case 0xE4: op = Iop_MulHi16Ux4; break; + case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; + + /* Introduced in SSE2 */ + case 0xD4: op = Iop_Add64; break; + case 0xFB: op = Iop_Sub64; break; + + default: + vex_printf("\n0x%x\n", (Int)opc); + vpanic("dis_MMXop_regmem_to_reg"); + } + + # undef XXX + + argG = getMMXReg(gregLO3ofRM(modrm)); + if (invG) + argG = unop(Iop_Not64, argG); + + if (isReg) { + delta++; + argE = getMMXReg(eregLO3ofRM(modrm)); + } else { + Int len; + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + argE = loadLE(Ity_I64, mkexpr(addr)); + } + + if (eLeft) { + argL = argE; + argR = argG; + } else { + argL = argG; + argR = argE; + } + + if (op != Iop_INVALID) { + vassert(hName == NULL); + vassert(hAddr == NULL); + assign(res, binop(op, argL, argR)); + } else { + vassert(hName != NULL); + vassert(hAddr != NULL); + assign( res, + mkIRExprCCall( + Ity_I64, + 0/*regparms*/, hName, hAddr, + mkIRExprVec_2( argL, argR ) + ) + ); + } + + putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); + + DIP("%s%s %s, %s\n", + name, show_granularity ? nameMMXGran(opc & 3) : "", + ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), + nameMMXReg(gregLO3ofRM(modrm)) ); + + return delta; + } + + + /* Vector by scalar shift of G by the amount specified at the bottom + of E. This is a straight copy of dis_SSE_shiftG_byE. */ + + static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen, size; + IRTemp addr; + Bool shl, shr, sar; + UChar rm = getUChar(delta); + IRTemp g0 = newTemp(Ity_I64); + IRTemp g1 = newTemp(Ity_I64); + IRTemp amt = newTemp(Ity_I64); + IRTemp amt8 = newTemp(Ity_I8); + + if (epartIsReg(rm)) { + assign( amt, getMMXReg(eregLO3ofRM(rm)) ); + DIP("%s %s,%s\n", opname, + nameMMXReg(eregLO3ofRM(rm)), + nameMMXReg(gregLO3ofRM(rm)) ); + delta++; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameMMXReg(gregLO3ofRM(rm)) ); + delta += alen; + } + assign( g0, getMMXReg(gregLO3ofRM(rm)) ); + assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x4: shl = True; size = 32; break; + case Iop_ShlN32x2: shl = True; size = 32; break; + case Iop_Shl64: shl = True; size = 64; break; + case Iop_ShrN16x4: shr = True; size = 16; break; + case Iop_ShrN32x2: shr = True; size = 32; break; + case Iop_Shr64: shr = True; size = 64; break; + case Iop_SarN16x4: sar = True; size = 16; break; + case Iop_SarN32x2: sar = True; size = 32; break; + default: vassert(0); + } + + if (shl || shr) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), + mkU64(0), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else + if (sar) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), + binop(op, mkexpr(g0), mkU8(size-1)), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else { + vassert(0); + } + + putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); + return delta; + } + + + /* Vector by scalar shift of E by an immediate byte. This is a + straight copy of dis_SSE_shiftE_imm. */ + + static + ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op ) + { + Bool shl, shr, sar; + UChar rm = getUChar(delta); + IRTemp e0 = newTemp(Ity_I64); + IRTemp e1 = newTemp(Ity_I64); + UChar amt, size; + vassert(epartIsReg(rm)); + vassert(gregLO3ofRM(rm) == 2 + || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); + amt = getUChar(delta+1); + delta += 2; + DIP("%s $%d,%s\n", opname, + (Int)amt, + nameMMXReg(eregLO3ofRM(rm)) ); + + assign( e0, getMMXReg(eregLO3ofRM(rm)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x4: shl = True; size = 16; break; + case Iop_ShlN32x2: shl = True; size = 32; break; + case Iop_Shl64: shl = True; size = 64; break; + case Iop_SarN16x4: sar = True; size = 16; break; + case Iop_SarN32x2: sar = True; size = 32; break; + case Iop_ShrN16x4: shr = True; size = 16; break; + case Iop_ShrN32x2: shr = True; size = 32; break; + case Iop_Shr64: shr = True; size = 64; break; + default: vassert(0); + } + + if (shl || shr) { + assign( e1, amt >= size + ? mkU64(0) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else + if (sar) { + assign( e1, amt >= size + ? binop(op, mkexpr(e0), mkU8(size-1)) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else { + vassert(0); + } + + putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); + return delta; + } + + + /* Completely handle all MMX instructions except emms. */ + + static + ULong dis_MMX ( Bool* decode_ok, + VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) + { + Int len; + UChar modrm; + HChar dis_buf[50]; + UChar opc = getUChar(delta); + delta++; + + /* dis_MMX handles all insns except emms. */ + do_MMX_preamble(); + + switch (opc) { + + case 0x6E: + if (sz == 4) { + /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; + putMMXReg( + gregLO3ofRM(modrm), + binop( Iop_32HLto64, + mkU32(0), + getIReg32(eregOfRexRM(pfx,modrm)) ) ); + DIP("movd %s, %s\n", + nameIReg32(eregOfRexRM(pfx,modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + putMMXReg( + gregLO3ofRM(modrm), + binop( Iop_32HLto64, + mkU32(0), + loadLE(Ity_I32, mkexpr(addr)) ) ); + DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); + } + } + else + if (sz == 8) { + /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; + putMMXReg( gregLO3ofRM(modrm), + getIReg64(eregOfRexRM(pfx,modrm)) ); + DIP("movd %s, %s\n", + nameIReg64(eregOfRexRM(pfx,modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + putMMXReg( gregLO3ofRM(modrm), + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); + } + } + else { + goto mmx_decode_failure; + } + break; + + case 0x7E: + if (sz == 4) { + /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; + putIReg32( eregOfRexRM(pfx,modrm), + unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); + DIP("movd %s, %s\n", + nameMMXReg(gregLO3ofRM(modrm)), + nameIReg32(eregOfRexRM(pfx,modrm))); + } else { + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + storeLE( mkexpr(addr), + unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); + DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); + } + } + else + if (sz == 8) { + /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; + putIReg64( eregOfRexRM(pfx,modrm), + getMMXReg(gregLO3ofRM(modrm)) ); + DIP("movd %s, %s\n", + nameMMXReg(gregLO3ofRM(modrm)), + nameIReg64(eregOfRexRM(pfx,modrm))); + } else { + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + storeLE( mkexpr(addr), + getMMXReg(gregLO3ofRM(modrm)) ); + DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); + } + } else { + goto mmx_decode_failure; + } + break; + + case 0x6F: + /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4 + && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) + goto mmx_decode_failure; + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; + putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); + DIP("movq %s, %s\n", + nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movq %s, %s\n", + dis_buf, nameMMXReg(gregLO3ofRM(modrm))); + } + break; + + case 0x7F: + /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ + if (sz != 4 + && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) + goto mmx_decode_failure; + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + /* Fall through. The assembler doesn't appear to generate + these. */ + goto mmx_decode_failure; + } else { + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); + DIP("mov(nt)q %s, %s\n", + nameMMXReg(gregLO3ofRM(modrm)), dis_buf); + } + break; + + case 0xFC: + case 0xFD: + case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); + break; + + case 0xEC: + case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4 + && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); + break; + + case 0xDC: + case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); + break; + + case 0xF8: + case 0xF9: + case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); + break; + + case 0xE8: + case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); + break; + + case 0xD8: + case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); + break; + + case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); + break; + + case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); + break; + + case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ + vassert(sz == 4); + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); + break; + + case 0x74: + case 0x75: + case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); + break; + + case 0x64: + case 0x65: + case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); + break; + + case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); + break; + + case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); + break; + + case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); + break; + + case 0x68: + case 0x69: + case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4 + && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); + break; + + case 0x60: + case 0x61: + case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4 + && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); + break; + + case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); + break; + + case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); + break; + + case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); + break; + + case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); + break; + + # define SHIFT_BY_REG(_name,_op) \ + delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ + break; + + /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); + case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); + case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); + + /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); + case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); + case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); + + /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); + case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); + + # undef SHIFT_BY_REG + + case 0x71: + case 0x72: + case 0x73: { + /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ + UChar byte2, subopc; + if (sz != 4) + goto mmx_decode_failure; + byte2 = getUChar(delta); /* amode / sub-opcode */ + subopc = toUChar( (byte2 >> 3) & 7 ); + + # define SHIFT_BY_IMM(_name,_op) \ + do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ + } while (0) + + if (subopc == 2 /*SRL*/ && opc == 0x71) + SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); + else if (subopc == 2 /*SRL*/ && opc == 0x72) + SHIFT_BY_IMM("psrld", Iop_ShrN32x2); + else if (subopc == 2 /*SRL*/ && opc == 0x73) + SHIFT_BY_IMM("psrlq", Iop_Shr64); + + else if (subopc == 4 /*SAR*/ && opc == 0x71) + SHIFT_BY_IMM("psraw", Iop_SarN16x4); + else if (subopc == 4 /*SAR*/ && opc == 0x72) + SHIFT_BY_IMM("psrad", Iop_SarN32x2); + + else if (subopc == 6 /*SHL*/ && opc == 0x71) + SHIFT_BY_IMM("psllw", Iop_ShlN16x4); + else if (subopc == 6 /*SHL*/ && opc == 0x72) + SHIFT_BY_IMM("pslld", Iop_ShlN32x2); + else if (subopc == 6 /*SHL*/ && opc == 0x73) + SHIFT_BY_IMM("psllq", Iop_Shl64); + + else goto mmx_decode_failure; + + # undef SHIFT_BY_IMM + break; + } + + case 0xF7: { + IRTemp addr = newTemp(Ity_I64); + IRTemp regD = newTemp(Ity_I64); + IRTemp regM = newTemp(Ity_I64); + IRTemp mask = newTemp(Ity_I64); + IRTemp olddata = newTemp(Ity_I64); + IRTemp newdata = newTemp(Ity_I64); + + modrm = getUChar(delta); + if (sz != 4 || (!epartIsReg(modrm))) + goto mmx_decode_failure; + delta++; + + assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); + assign( regM, getMMXReg( eregLO3ofRM(modrm) )); + assign( regD, getMMXReg( gregLO3ofRM(modrm) )); + assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); + assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); + assign( newdata, + binop(Iop_Or64, + binop(Iop_And64, + mkexpr(regD), + mkexpr(mask) ), + binop(Iop_And64, + mkexpr(olddata), + unop(Iop_Not64, mkexpr(mask)))) ); + storeLE( mkexpr(addr), mkexpr(newdata) ); + DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), + nameMMXReg( gregLO3ofRM(modrm) ) ); + break; + } + + /* --- MMX decode failure --- */ + default: + mmx_decode_failure: + *decode_ok = False; + return delta; /* ignored */ + + } + + *decode_ok = True; + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- More misc arithmetic and other obscure insns. ---*/ + /*------------------------------------------------------------*/ + + /* Generate base << amt with vacated places filled with stuff + from xtra. amt guaranteed in 0 .. 63. */ + static + IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) + { + /* if amt == 0 + then base + else (base << amt) | (xtra >>u (64-amt)) + */ + return + IRExpr_Mux0X( + mkexpr(amt), + mkexpr(base), + binop(Iop_Or64, + binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), + binop(Iop_Shr64, mkexpr(xtra), + binop(Iop_Sub8, mkU8(64), mkexpr(amt))) + ) + ); + } + + /* Generate base >>u amt with vacated places filled with stuff + from xtra. amt guaranteed in 0 .. 63. */ + static + IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) + { + /* if amt == 0 + then base + else (base >>u amt) | (xtra << (64-amt)) + */ + return + IRExpr_Mux0X( + mkexpr(amt), + mkexpr(base), + binop(Iop_Or64, + binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), + binop(Iop_Shl64, mkexpr(xtra), + binop(Iop_Sub8, mkU8(64), mkexpr(amt))) + ) + ); + } + + /* Double length left and right shifts. Apparently only required in + v-size (no b- variant). */ + static + ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi, + Prefix pfx, + Long delta, UChar modrm, + Int sz, + IRExpr* shift_amt, + Bool amt_is_literal, + HChar* shift_amt_txt, + Bool left_shift ) + { + /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used + for printing it. And eip on entry points at the modrm byte. */ + Int len; + HChar dis_buf[50]; + + IRType ty = szToITy(sz); + IRTemp gsrc = newTemp(ty); + IRTemp esrc = newTemp(ty); + IRTemp addr = IRTemp_INVALID; + IRTemp tmpSH = newTemp(Ity_I8); + IRTemp tmpSS = newTemp(Ity_I8); + IRTemp tmp64 = IRTemp_INVALID; + IRTemp res64 = IRTemp_INVALID; + IRTemp rss64 = IRTemp_INVALID; + IRTemp resTy = IRTemp_INVALID; + IRTemp rssTy = IRTemp_INVALID; + Int mask = sz==8 ? 63 : 31; + + vassert(sz == 2 || sz == 4 || sz == 8); + + /* The E-part is the destination; this is shifted. The G-part + supplies bits to be shifted into the E-part, but is not + changed. + + If shifting left, form a double-length word with E at the top + and G at the bottom, and shift this left. The result is then in + the high part. + + If shifting right, form a double-length word with G at the top + and E at the bottom, and shift this right. The result is then + at the bottom. */ + + /* Fetch the operands. */ + + assign( gsrc, getIRegG(sz, pfx, modrm) ); + + if (epartIsReg(modrm)) { + delta++; + assign( esrc, getIRegE(sz, pfx, modrm) ); + DIP("sh%cd%c %s, %s, %s\n", + ( left_shift ? 'l' : 'r' ), nameISize(sz), + shift_amt_txt, + nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); + } else { + addr = disAMode ( &len, vbi, pfx, delta, dis_buf, + /* # bytes following amode */ + amt_is_literal ? 1 : 0 ); + delta += len; + assign( esrc, loadLE(ty, mkexpr(addr)) ); + DIP("sh%cd%c %s, %s, %s\n", + ( left_shift ? 'l' : 'r' ), nameISize(sz), + shift_amt_txt, + nameIRegG(sz, pfx, modrm), dis_buf); + } + + /* Calculate the masked shift amount (tmpSH), the masked subshift + amount (tmpSS), the shifted value (res64) and the subshifted + value (rss64). */ + + assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); + assign( tmpSS, binop(Iop_And8, + binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), + mkU8(mask))); + + tmp64 = newTemp(Ity_I64); + res64 = newTemp(Ity_I64); + rss64 = newTemp(Ity_I64); + + if (sz == 2 || sz == 4) { + + /* G is xtra; E is data */ + /* what a freaking nightmare: */ + if (sz == 4 && left_shift) { + assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); + assign( res64, + binop(Iop_Shr64, + binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), + mkU8(32)) ); + assign( rss64, + binop(Iop_Shr64, + binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), + mkU8(32)) ); + } + else + if (sz == 4 && !left_shift) { + assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); + assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); + assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); + } + else + if (sz == 2 && left_shift) { + assign( tmp64, + binop(Iop_32HLto64, + binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), + binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) + )); + /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ + assign( res64, + binop(Iop_Shr64, + binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), + mkU8(48)) ); + /* subshift formed by shifting [esrc'0000'0000'0000] */ + assign( rss64, + binop(Iop_Shr64, + binop(Iop_Shl64, + binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), + mkU8(48)), + mkexpr(tmpSS)), + mkU8(48)) ); + } + else + if (sz == 2 && !left_shift) { + assign( tmp64, + binop(Iop_32HLto64, + binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), + binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) + )); + /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ + assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); + /* subshift formed by shifting [0000'0000'0000'esrc] */ + assign( rss64, binop(Iop_Shr64, + unop(Iop_16Uto64, mkexpr(esrc)), + mkexpr(tmpSS)) ); + } + + } else { + + vassert(sz == 8); + if (left_shift) { + assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); + assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); + } else { + assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); + assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); + } + + } + + resTy = newTemp(ty); + rssTy = newTemp(ty); + assign( resTy, narrowTo(ty, mkexpr(res64)) ); + assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); + + /* Put result back and write the flags thunk. */ + setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, + resTy, rssTy, ty, tmpSH ); + + if (epartIsReg(modrm)) { + putIRegE(sz, pfx, modrm, mkexpr(resTy)); + } else { + storeLE( mkexpr(addr), mkexpr(resTy) ); + } + + if (amt_is_literal) delta++; + return delta; + } + + + /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not + required. */ + + typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; + + static HChar* nameBtOp ( BtOp op ) + { + switch (op) { + case BtOpNone: return ""; + case BtOpSet: return "s"; + case BtOpReset: return "r"; + case BtOpComp: return "c"; + default: vpanic("nameBtOp(amd64)"); + } + } + + + static + ULong dis_bt_G_E ( VexAbiInfo* vbi, + Prefix pfx, Int sz, Long delta, BtOp op ) + { + HChar dis_buf[50]; + UChar modrm; + Int len; + IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, + t_addr1, t_rsp, t_mask, t_new; + + vassert(sz == 2 || sz == 4 || sz == 8); + + t_fetched = t_bitno0 = t_bitno1 = t_bitno2 + = t_addr0 = t_addr1 = t_rsp + = t_mask = t_new = IRTemp_INVALID; + + t_fetched = newTemp(Ity_I8); + t_new = newTemp(Ity_I8); + t_bitno0 = newTemp(Ity_I64); + t_bitno1 = newTemp(Ity_I64); + t_bitno2 = newTemp(Ity_I8); + t_addr1 = newTemp(Ity_I64); + modrm = getUChar(delta); + + assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); + + if (epartIsReg(modrm)) { + delta++; + /* Get it onto the client's stack. */ + t_rsp = newTemp(Ity_I64); + t_addr0 = newTemp(Ity_I64); + + assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)) ); + putIReg64(R_RSP, mkexpr(t_rsp)); + + storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); + + /* Make t_addr0 point at it. */ + assign( t_addr0, mkexpr(t_rsp) ); + + /* Mask out upper bits of the shift amount, since we're doing a + reg. */ + assign( t_bitno1, binop(Iop_And64, + mkexpr(t_bitno0), + mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); + + } else { + t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + assign( t_bitno1, mkexpr(t_bitno0) ); + } + + /* At this point: t_addr0 is the address being operated on. If it + was a reg, we will have pushed it onto the client's stack. + t_bitno1 is the bit number, suitably masked in the case of a + reg. */ + + /* Now the main sequence. */ + assign( t_addr1, + binop(Iop_Add64, + mkexpr(t_addr0), + binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); + + /* t_addr1 now holds effective address */ + + assign( t_bitno2, + unop(Iop_64to8, + binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); + + /* t_bitno2 contains offset of bit within byte */ + + if (op != BtOpNone) { + t_mask = newTemp(Ity_I8); + assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); + } + + /* t_mask is now a suitable byte mask */ + + assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); + + if (op != BtOpNone) { + switch (op) { + case BtOpSet: + assign( t_new, + binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); + break; + case BtOpComp: + assign( t_new, + binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); + break; + case BtOpReset: + assign( t_new, + binop(Iop_And8, mkexpr(t_fetched), + unop(Iop_Not8, mkexpr(t_mask))) ); + break; + default: + vpanic("dis_bt_G_E(amd64)"); + } + if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) { + casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, + mkexpr(t_new)/*new*/, + guest_RIP_curr_instr ); + } else { + storeLE( mkexpr(t_addr1), mkexpr(t_new) ); + } + } + + /* Side effect done; now get selected bit into Carry flag */ + /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop(Iop_And64, + binop(Iop_Shr64, + unop(Iop_8Uto64, mkexpr(t_fetched)), + mkexpr(t_bitno2)), + mkU64(1))) + ); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); + + /* Move reg operand from stack back to reg */ + if (epartIsReg(modrm)) { + /* t_rsp still points at it. */ + /* only write the reg if actually modifying it; doing otherwise + zeroes the top half erroneously when doing btl due to + standard zero-extend rule */ + if (op != BtOpNone) + putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); + putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(sz)) ); + } + + DIP("bt%s%c %s, %s\n", + nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), + ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); + + return delta; + } + + + + /* Handle BSF/BSR. Only v-size seems necessary. */ + static + ULong dis_bs_E_G ( VexAbiInfo* vbi, + Prefix pfx, Int sz, Long delta, Bool fwds ) + { + Bool isReg; + UChar modrm; + HChar dis_buf[50]; + + IRType ty = szToITy(sz); + IRTemp src = newTemp(ty); + IRTemp dst = newTemp(ty); + IRTemp src64 = newTemp(Ity_I64); + IRTemp dst64 = newTemp(Ity_I64); + IRTemp src8 = newTemp(Ity_I8); + + vassert(sz == 8 || sz == 4 || sz == 2); + + modrm = getUChar(delta); + isReg = epartIsReg(modrm); + if (isReg) { + delta++; + assign( src, getIRegE(sz, pfx, modrm) ); + } else { + Int len; + IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); + delta += len; + assign( src, loadLE(ty, mkexpr(addr)) ); + } + + DIP("bs%c%c %s, %s\n", + fwds ? 'f' : 'r', nameISize(sz), + ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), + nameIRegG(sz, pfx, modrm)); + + /* First, widen src to 64 bits if it is not already. */ + assign( src64, widenUto64(mkexpr(src)) ); + + /* Generate an 8-bit expression which is zero iff the + original is zero, and nonzero otherwise */ + assign( src8, + unop(Iop_1Uto8, + binop(Iop_CmpNE64, + mkexpr(src64), mkU64(0))) ); + + /* Flags: Z is 1 iff source value is zero. All others + are undefined -- we force them to zero. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + IRExpr_Mux0X( mkexpr(src8), + /* src==0 */ + mkU64(AMD64G_CC_MASK_Z), + /* src!=0 */ + mkU64(0) + ) + )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); + + /* Result: iff source value is zero, we can't use + Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. + But anyway, amd64 semantics say the result is undefined in + such situations. Hence handle the zero case specially. */ + + /* Bleh. What we compute: + + bsf64: if src == 0 then {dst is unchanged} + else Ctz64(src) + + bsr64: if src == 0 then {dst is unchanged} + else 63 - Clz64(src) + + bsf32: if src == 0 then {dst is unchanged} + else Ctz64(32Uto64(src)) + + bsr32: if src == 0 then {dst is unchanged} + else 63 - Clz64(32Uto64(src)) + + bsf16: if src == 0 then {dst is unchanged} + else Ctz64(32Uto64(16Uto32(src))) + + bsr16: if src == 0 then {dst is unchanged} + else 63 - Clz64(32Uto64(16Uto32(src))) + */ + + /* The main computation, guarding against zero. */ + assign( dst64, + IRExpr_Mux0X( + mkexpr(src8), + /* src == 0 -- leave dst unchanged */ + widenUto64( getIRegG( sz, pfx, modrm ) ), + /* src != 0 */ + fwds ? unop(Iop_Ctz64, mkexpr(src64)) + : binop(Iop_Sub64, + mkU64(63), + unop(Iop_Clz64, mkexpr(src64))) + ) + ); + + if (sz == 2) + assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); + else + if (sz == 4) + assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); + else + assign( dst, mkexpr(dst64) ); + + /* dump result back */ + putIRegG( sz, pfx, modrm, mkexpr(dst) ); + + return delta; + } + + + /* swap rAX with the reg specified by reg and REX.B */ + static + void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) + { + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + IRTemp t2 = newTemp(ty); + vassert(sz == 4 || sz == 8); + vassert(regLo3 < 8); + if (sz == 8) { + assign( t1, getIReg64(R_RAX) ); + assign( t2, getIRegRexB(8, pfx, regLo3) ); + putIReg64( R_RAX, mkexpr(t2) ); + putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); + } else { + assign( t1, getIReg32(R_RAX) ); + assign( t2, getIRegRexB(4, pfx, regLo3) ); + putIReg32( R_RAX, mkexpr(t2) ); + putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); + } + DIP("xchg%c %s, %s\n", + nameISize(sz), nameIRegRAX(sz), + nameIRegRexB(sz,pfx, regLo3)); + } + + + static + void codegen_SAHF ( void ) + { + /* Set the flags to: + (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) + -- retain the old O flag + | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A + |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) + */ + ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A + |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; + IRTemp oldflags = newTemp(Ity_I64); + assign( oldflags, mk_amd64g_calculate_rflags_all() ); + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, + binop(Iop_Or64, + binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), + binop(Iop_And64, + binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), + mkU64(mask_SZACP)) + ) + )); + } + + + static + void codegen_LAHF ( void ) + { + /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ + IRExpr* rax_with_hole; + IRExpr* new_byte; + IRExpr* new_rax; + ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A + |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; + + IRTemp flags = newTemp(Ity_I64); + assign( flags, mk_amd64g_calculate_rflags_all() ); + + rax_with_hole + = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); + new_byte + = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), + mkU64(1<<1)); + new_rax + = binop(Iop_Or64, rax_with_hole, + binop(Iop_Shl64, new_byte, mkU8(8))); + putIReg64(R_RAX, new_rax); + } + + + static + ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, + VexAbiInfo* vbi, + Prefix pfx, + Int size, + Long delta0 ) + { + HChar dis_buf[50]; + Int len; + + IRType ty = szToITy(size); + IRTemp acc = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dest = newTemp(ty); + IRTemp dest2 = newTemp(ty); + IRTemp acc2 = newTemp(ty); + IRTemp cond8 = newTemp(Ity_I8); + IRTemp addr = IRTemp_INVALID; + UChar rm = getUChar(delta0); + + /* There are 3 cases to consider: + + reg-reg: ignore any lock prefix, generate sequence based + on Mux0X + + reg-mem, not locked: ignore any lock prefix, generate sequence + based on Mux0X + + reg-mem, locked: use IRCAS + */ + + if (epartIsReg(rm)) { + /* case 1 */ + *ok = False; + return delta0; + /* awaiting test case */ + assign( dest, getIRegE(size, pfx, rm) ); + delta0++; + assign( src, getIRegG(size, pfx, rm) ); + assign( acc, getIRegRAX(size) ); + setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); + assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); + assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); + assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); + putIRegRAX(size, mkexpr(acc2)); + putIRegE(size, pfx, rm, mkexpr(dest2)); + DIP("cmpxchg%c %s,%s\n", nameISize(size), + nameIRegG(size,pfx,rm), + nameIRegE(size,pfx,rm) ); + } + else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { + /* case 2 */ + addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + assign( dest, loadLE(ty, mkexpr(addr)) ); + delta0 += len; + assign( src, getIRegG(size, pfx, rm) ); + assign( acc, getIRegRAX(size) ); + setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); + assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); + assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); + assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); + putIRegRAX(size, mkexpr(acc2)); + storeLE( mkexpr(addr), mkexpr(dest2) ); + DIP("cmpxchg%c %s,%s\n", nameISize(size), + nameIRegG(size,pfx,rm), dis_buf); + } + else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { + /* case 3 */ + /* src is new value. acc is expected value. dest is old value. + Compute success from the output of the IRCAS, and steer the + new value for RAX accordingly: in case of success, RAX is + unchanged. */ + addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + delta0 += len; + assign( src, getIRegG(size, pfx, rm) ); + assign( acc, getIRegRAX(size) ); + stmt( IRStmt_CAS( + mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), + NULL, mkexpr(acc), NULL, mkexpr(src) ) + )); + setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); + assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); + assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); + putIRegRAX(size, mkexpr(acc2)); + DIP("cmpxchg%c %s,%s\n", nameISize(size), + nameIRegG(size,pfx,rm), dis_buf); + } + else vassert(0); + + *ok = True; + return delta0; + } + + + /* Handle conditional move instructions of the form + cmovcc E(reg-or-mem), G(reg) + + E(src) is reg-or-mem + G(dst) is reg. + + If E is reg, --> GET %E, tmps + GET %G, tmpd + CMOVcc tmps, tmpd + PUT tmpd, %G + + If E is mem --> (getAddr E) -> tmpa + LD (tmpa), tmps + GET %G, tmpd + CMOVcc tmps, tmpd + PUT tmpd, %G + */ + static + ULong dis_cmov_E_G ( VexAbiInfo* vbi, + Prefix pfx, + Int sz, + AMD64Condcode cond, + Long delta0 ) + { + UChar rm = getUChar(delta0); + HChar dis_buf[50]; + Int len; + + IRType ty = szToITy(sz); + IRTemp tmps = newTemp(ty); + IRTemp tmpd = newTemp(ty); + + if (epartIsReg(rm)) { + assign( tmps, getIRegE(sz, pfx, rm) ); + assign( tmpd, getIRegG(sz, pfx, rm) ); + + putIRegG( sz, pfx, rm, + IRExpr_Mux0X( unop(Iop_1Uto8, + mk_amd64g_calculate_condition(cond)), + mkexpr(tmpd), + mkexpr(tmps) ) + ); + DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), + nameIRegE(sz,pfx,rm), + nameIRegG(sz,pfx,rm)); + return 1+delta0; + } + + /* E refers to memory */ + { + IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + assign( tmps, loadLE(ty, mkexpr(addr)) ); + assign( tmpd, getIRegG(sz, pfx, rm) ); + + putIRegG( sz, pfx, rm, + IRExpr_Mux0X( unop(Iop_1Uto8, + mk_amd64g_calculate_condition(cond)), + mkexpr(tmpd), + mkexpr(tmps) ) + ); + + DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), + dis_buf, + nameIRegG(sz,pfx,rm)); + return len+delta0; + } + } + + + static + ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, + VexAbiInfo* vbi, + Prefix pfx, Int sz, Long delta0 ) + { + Int len; + UChar rm = getUChar(delta0); + HChar dis_buf[50]; + + IRType ty = szToITy(sz); + IRTemp tmpd = newTemp(ty); + IRTemp tmpt0 = newTemp(ty); + IRTemp tmpt1 = newTemp(ty); + + /* There are 3 cases to consider: + + reg-reg: currently unhandled + + reg-mem, not locked: ignore any lock prefix, generate 'naive' + (non-atomic) sequence + + reg-mem, locked: use IRCAS + */ + + if (epartIsReg(rm)) { + /* case 1 */ + *decode_ok = False; + return delta0; + /* Currently we don't handle xadd_G_E with register operand. */ + } + else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { + /* case 2 */ + IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + assign( tmpd, loadLE(ty, mkexpr(addr)) ); + assign( tmpt0, getIRegG(sz, pfx, rm) ); + assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(tmpd), mkexpr(tmpt0)) ); + setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); + storeLE( mkexpr(addr), mkexpr(tmpt1) ); + putIRegG(sz, pfx, rm, mkexpr(tmpd)); + DIP("xadd%c %s, %s\n", + nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); + *decode_ok = True; + return len+delta0; + } + else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { + /* case 3 */ + IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); + assign( tmpd, loadLE(ty, mkexpr(addr)) ); + assign( tmpt0, getIRegG(sz, pfx, rm) ); + assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(tmpd), mkexpr(tmpt0)) ); + casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, + mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); + setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); + putIRegG(sz, pfx, rm, mkexpr(tmpd)); + DIP("xadd%c %s, %s\n", + nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); + *decode_ok = True; + return len+delta0; + } + /*UNREACHED*/ + vassert(0); + } + + //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ + //.. + //.. static + //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) + //.. { + //.. Int len; + //.. IRTemp addr; + //.. UChar rm = getUChar(delta0); + //.. HChar dis_buf[50]; + //.. + //.. if (epartIsReg(rm)) { + //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); + //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); + //.. return 1+delta0; + //.. } else { + //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); + //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); + //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); + //.. return len+delta0; + //.. } + //.. } + //.. + //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If + //.. dst is ireg and sz==4, zero out top half of it. */ + //.. + //.. static + //.. UInt dis_mov_Sw_Ew ( UChar sorb, + //.. Int sz, + //.. UInt delta0 ) + //.. { + //.. Int len; + //.. IRTemp addr; + //.. UChar rm = getUChar(delta0); + //.. HChar dis_buf[50]; + //.. + //.. vassert(sz == 2 || sz == 4); + //.. + //.. if (epartIsReg(rm)) { + //.. if (sz == 4) + //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); + //.. else + //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); + //.. + //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); + //.. return 1+delta0; + //.. } else { + //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); + //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); + //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); + //.. return len+delta0; + //.. } + //.. } + //.. + //.. + //.. static + //.. void dis_push_segreg ( UInt sreg, Int sz ) + //.. { + //.. IRTemp t1 = newTemp(Ity_I16); + //.. IRTemp ta = newTemp(Ity_I32); + //.. vassert(sz == 2 || sz == 4); + //.. + //.. assign( t1, getSReg(sreg) ); + //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); + //.. putIReg(4, R_ESP, mkexpr(ta)); + //.. storeLE( mkexpr(ta), mkexpr(t1) ); + //.. + //.. DIP("pushw %s\n", nameSReg(sreg)); + //.. } + //.. + //.. static + //.. void dis_pop_segreg ( UInt sreg, Int sz ) + //.. { + //.. IRTemp t1 = newTemp(Ity_I16); + //.. IRTemp ta = newTemp(Ity_I32); + //.. vassert(sz == 2 || sz == 4); + //.. + //.. assign( ta, getIReg(4, R_ESP) ); + //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); + //.. + //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); + //.. putSReg( sreg, mkexpr(t1) ); + //.. DIP("pop %s\n", nameSReg(sreg)); + //.. } + + static + void dis_ret ( VexAbiInfo* vbi, ULong d64 ) + { + IRTemp t1 = newTemp(Ity_I64); + IRTemp t2 = newTemp(Ity_I64); + IRTemp t3 = newTemp(Ity_I64); + assign(t1, getIReg64(R_RSP)); + assign(t2, loadLE(Ity_I64,mkexpr(t1))); + assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); + putIReg64(R_RSP, mkexpr(t3)); + make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); + jmp_treg(Ijk_Ret,t2); + } + + + /*------------------------------------------------------------*/ + /*--- SSE/SSE2/SSE3 helpers ---*/ + /*------------------------------------------------------------*/ + + /* Worker function; do not call directly. + Handles full width G = G `op` E and G = (not G) `op` E. + */ + + static ULong dis_SSE_E_to_G_all_wrk ( + VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op, + Bool invertG + ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getUChar(delta); + IRExpr* gpart + = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) + : getXMMReg(gregOfRexRM(pfx,rm)); + if (epartIsReg(rm)) { + putXMMReg( gregOfRexRM(pfx,rm), + binop(op, gpart, + getXMMReg(eregOfRexRM(pfx,rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,rm), + binop(op, gpart, + loadLE(Ity_V128, mkexpr(addr))) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+alen; + } + } + + + /* All lanes SSE binary operation, G = G `op` E. */ + + static + ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op ) + { + return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); + } + + /* All lanes SSE binary operation, G = (not G) `op` E. */ + + static + ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op ) + { + return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); + } + + + /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ + + static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getUChar(delta); + IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); + if (epartIsReg(rm)) { + putXMMReg( gregOfRexRM(pfx,rm), + binop(op, gpart, + getXMMReg(eregOfRexRM(pfx,rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+1; + } else { + /* We can only do a 32-bit memory read, so the upper 3/4 of the + E operand needs to be made simply of zeroes. */ + IRTemp epart = newTemp(Ity_V128); + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( epart, unop( Iop_32UtoV128, + loadLE(Ity_I32, mkexpr(addr))) ); + putXMMReg( gregOfRexRM(pfx,rm), + binop(op, gpart, mkexpr(epart)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+alen; + } + } + + + /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ + + static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getUChar(delta); + IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); + if (epartIsReg(rm)) { + putXMMReg( gregOfRexRM(pfx,rm), + binop(op, gpart, + getXMMReg(eregOfRexRM(pfx,rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+1; + } else { + /* We can only do a 64-bit memory read, so the upper half of the + E operand needs to be made simply of zeroes. */ + IRTemp epart = newTemp(Ity_V128); + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( epart, unop( Iop_64UtoV128, + loadLE(Ity_I64, mkexpr(addr))) ); + putXMMReg( gregOfRexRM(pfx,rm), + binop(op, gpart, mkexpr(epart)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+alen; + } + } + + + /* All lanes unary SSE operation, G = op(E). */ + + static ULong dis_SSE_E_to_G_unary_all ( + VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op + ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getUChar(delta); + if (epartIsReg(rm)) { + putXMMReg( gregOfRexRM(pfx,rm), + unop(op, getXMMReg(eregOfRexRM(pfx,rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,rm), + unop(op, loadLE(Ity_V128, mkexpr(addr))) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+alen; + } + } + + + /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ + + static ULong dis_SSE_E_to_G_unary_lo32 ( + VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op + ) + { + /* First we need to get the old G value and patch the low 32 bits + of the E operand into it. Then apply op and write back to G. */ + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getUChar(delta); + IRTemp oldG0 = newTemp(Ity_V128); + IRTemp oldG1 = newTemp(Ity_V128); + + assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); + + if (epartIsReg(rm)) { + assign( oldG1, + binop( Iop_SetV128lo32, + mkexpr(oldG0), + getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); + putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( oldG1, + binop( Iop_SetV128lo32, + mkexpr(oldG0), + loadLE(Ity_I32, mkexpr(addr)) )); + putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+alen; + } + } + + + /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ + + static ULong dis_SSE_E_to_G_unary_lo64 ( + VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op + ) + { + /* First we need to get the old G value and patch the low 64 bits + of the E operand into it. Then apply op and write back to G. */ + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getUChar(delta); + IRTemp oldG0 = newTemp(Ity_V128); + IRTemp oldG1 = newTemp(Ity_V128); + + assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); + + if (epartIsReg(rm)) { + assign( oldG1, + binop( Iop_SetV128lo64, + mkexpr(oldG0), + getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); + putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( oldG1, + binop( Iop_SetV128lo64, + mkexpr(oldG0), + loadLE(Ity_I64, mkexpr(addr)) )); + putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + return delta+alen; + } + } + + + /* SSE integer binary operation: + G = G `op` E (eLeft == False) + G = E `op` G (eLeft == True) + */ + static ULong dis_SSEint_E_to_G( + VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op, + Bool eLeft + ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getUChar(delta); + IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); + IRExpr* epart = NULL; + if (epartIsReg(rm)) { + epart = getXMMReg(eregOfRexRM(pfx,rm)); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + delta += 1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + epart = loadLE(Ity_V128, mkexpr(addr)); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + delta += alen; + } + putXMMReg( gregOfRexRM(pfx,rm), + eLeft ? binop(op, epart, gpart) + : binop(op, gpart, epart) ); + return delta; + } + + + /* Helper for doing SSE FP comparisons. */ + + static void findSSECmpOp ( Bool* needNot, IROp* op, + Int imm8, Bool all_lanes, Int sz ) + { + imm8 &= 7; + *needNot = False; + *op = Iop_INVALID; + if (imm8 >= 4) { + *needNot = True; + imm8 -= 4; + } + + if (sz == 4 && all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ32Fx4; return; + case 1: *op = Iop_CmpLT32Fx4; return; + case 2: *op = Iop_CmpLE32Fx4; return; + case 3: *op = Iop_CmpUN32Fx4; return; + default: break; + } + } + if (sz == 4 && !all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ32F0x4; return; + case 1: *op = Iop_CmpLT32F0x4; return; + case 2: *op = Iop_CmpLE32F0x4; return; + case 3: *op = Iop_CmpUN32F0x4; return; + default: break; + } + } + if (sz == 8 && all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ64Fx2; return; + case 1: *op = Iop_CmpLT64Fx2; return; + case 2: *op = Iop_CmpLE64Fx2; return; + case 3: *op = Iop_CmpUN64Fx2; return; + default: break; + } + } + if (sz == 8 && !all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ64F0x2; return; + case 1: *op = Iop_CmpLT64F0x2; return; + case 2: *op = Iop_CmpLE64F0x2; return; + case 3: *op = Iop_CmpUN64F0x2; return; + default: break; + } + } + vpanic("findSSECmpOp(amd64,guest)"); + } + + /* Handles SSE 32F/64F comparisons. */ + + static ULong dis_SSEcmp_E_to_G ( VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, Bool all_lanes, Int sz ) + { + HChar dis_buf[50]; + Int alen, imm8; + IRTemp addr; + Bool needNot = False; + IROp op = Iop_INVALID; + IRTemp plain = newTemp(Ity_V128); + UChar rm = getUChar(delta); + UShort mask = 0; + vassert(sz == 4 || sz == 8); + if (epartIsReg(rm)) { + imm8 = getUChar(delta+1); + findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); + assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), + getXMMReg(eregOfRexRM(pfx,rm))) ); + delta += 2; + DIP("%s $%d,%s,%s\n", opname, + (Int)imm8, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); + imm8 = getUChar(delta+alen); + findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); + assign( plain, + binop( + op, + getXMMReg(gregOfRexRM(pfx,rm)), + all_lanes ? loadLE(Ity_V128, mkexpr(addr)) + : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) + : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) + ) + ); + delta += alen+1; + DIP("%s $%d,%s,%s\n", opname, + (Int)imm8, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + } + + if (needNot && all_lanes) { + putXMMReg( gregOfRexRM(pfx,rm), + unop(Iop_NotV128, mkexpr(plain)) ); + } + else + if (needNot && !all_lanes) { + mask = toUShort(sz==4 ? 0x000F : 0x00FF); + putXMMReg( gregOfRexRM(pfx,rm), + binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); + } + else { + putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); + } + + return delta; + } + + + /* Vector by scalar shift of G by the amount specified at the bottom + of E. */ + + static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi, + Prefix pfx, Long delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen, size; + IRTemp addr; + Bool shl, shr, sar; + UChar rm = getUChar(delta); + IRTemp g0 = newTemp(Ity_V128); + IRTemp g1 = newTemp(Ity_V128); + IRTemp amt = newTemp(Ity_I32); + IRTemp amt8 = newTemp(Ity_I8); + if (epartIsReg(rm)) { + assign( amt, getXMMRegLane32(eregOfRexRM(pfx,rm), 0) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRexRM(pfx,rm)), + nameXMMReg(gregOfRexRM(pfx,rm)) ); + delta++; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,rm)) ); + delta += alen; + } + assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); + assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x8: shl = True; size = 32; break; + case Iop_ShlN32x4: shl = True; size = 32; break; + case Iop_ShlN64x2: shl = True; size = 64; break; + case Iop_SarN16x8: sar = True; size = 16; break; + case Iop_SarN32x4: sar = True; size = 32; break; + case Iop_ShrN16x8: shr = True; size = 16; break; + case Iop_ShrN32x4: shr = True; size = 32; break; + case Iop_ShrN64x2: shr = True; size = 64; break; + default: vassert(0); + } + + if (shl || shr) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8, + binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))), + mkV128(0x0000), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else + if (sar) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8, + binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))), + binop(op, mkexpr(g0), mkU8(size-1)), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else { + vassert(0); + } + + putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); + return delta; + } + + + /* Vector by scalar shift of E by an immediate byte. */ + + static + ULong dis_SSE_shiftE_imm ( Prefix pfx, + Long delta, HChar* opname, IROp op ) + { + Bool shl, shr, sar; + UChar rm = getUChar(delta); + IRTemp e0 = newTemp(Ity_V128); + IRTemp e1 = newTemp(Ity_V128); + UChar amt, size; + vassert(epartIsReg(rm)); + vassert(gregLO3ofRM(rm) == 2 + || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); + amt = getUChar(delta+1); + delta += 2; + DIP("%s $%d,%s\n", opname, + (Int)amt, + nameXMMReg(eregOfRexRM(pfx,rm)) ); + assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x8: shl = True; size = 16; break; + case Iop_ShlN32x4: shl = True; size = 32; break; + case Iop_ShlN64x2: shl = True; size = 64; break; + case Iop_SarN16x8: sar = True; size = 16; break; + case Iop_SarN32x4: sar = True; size = 32; break; + case Iop_ShrN16x8: shr = True; size = 16; break; + case Iop_ShrN32x4: shr = True; size = 32; break; + case Iop_ShrN64x2: shr = True; size = 64; break; + default: vassert(0); + } + + if (shl || shr) { + assign( e1, amt >= size + ? mkV128(0x0000) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else + if (sar) { + assign( e1, amt >= size + ? binop(op, mkexpr(e0), mkU8(size-1)) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else { + vassert(0); + } + + putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); + return delta; + } + + + /* Get the current SSE rounding mode. */ + + static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) + { + return + unop( Iop_64to32, + binop( Iop_And64, + IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), + mkU64(3) )); + } + + static void put_sse_roundingmode ( IRExpr* sseround ) + { + vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); + stmt( IRStmt_Put( OFFB_SSEROUND, + unop(Iop_32Uto64,sseround) ) ); + } + + /* Break a 128-bit value up into four 32-bit ints. */ + + static void breakup128to32s ( IRTemp t128, + /*OUTs*/ + IRTemp* t3, IRTemp* t2, + IRTemp* t1, IRTemp* t0 ) + { + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); + assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); + + vassert(t0 && *t0 == IRTemp_INVALID); + vassert(t1 && *t1 == IRTemp_INVALID); + vassert(t2 && *t2 == IRTemp_INVALID); + vassert(t3 && *t3 == IRTemp_INVALID); + + *t0 = newTemp(Ity_I32); + *t1 = newTemp(Ity_I32); + *t2 = newTemp(Ity_I32); + *t3 = newTemp(Ity_I32); + assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); + assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); + assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); + assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); + } + + /* Construct a 128-bit value from four 32-bit ints. */ + + static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, + IRTemp t1, IRTemp t0 ) + { + return + binop( Iop_64HLtoV128, + binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), + binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) + ); + } + + /* Break a 64-bit value up into four 16-bit ints. */ + + static void breakup64to16s ( IRTemp t64, + /*OUTs*/ + IRTemp* t3, IRTemp* t2, + IRTemp* t1, IRTemp* t0 ) + { + IRTemp hi32 = newTemp(Ity_I32); + IRTemp lo32 = newTemp(Ity_I32); + assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); + assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); + + vassert(t0 && *t0 == IRTemp_INVALID); + vassert(t1 && *t1 == IRTemp_INVALID); + vassert(t2 && *t2 == IRTemp_INVALID); + vassert(t3 && *t3 == IRTemp_INVALID); + + *t0 = newTemp(Ity_I16); + *t1 = newTemp(Ity_I16); + *t2 = newTemp(Ity_I16); + *t3 = newTemp(Ity_I16); + assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); + assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); + assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); + assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); + } + + /* Construct a 64-bit value from four 16-bit ints. */ + + static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, + IRTemp t1, IRTemp t0 ) + { + return + binop( Iop_32HLto64, + binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), + binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) + ); + } + + + /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit + values (aa,bb), computes, for each of the 4 16-bit lanes: + + (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 + */ + static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) + { + IRTemp aa = newTemp(Ity_I64); + IRTemp bb = newTemp(Ity_I64); + IRTemp aahi32s = newTemp(Ity_I64); + IRTemp aalo32s = newTemp(Ity_I64); + IRTemp bbhi32s = newTemp(Ity_I64); + IRTemp bblo32s = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + IRTemp one32x2 = newTemp(Ity_I64); + assign(aa, aax); + assign(bb, bbx); + assign( aahi32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), + mkU8(16) )); + assign( aalo32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), + mkU8(16) )); + assign( bbhi32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), + mkU8(16) )); + assign( bblo32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), + mkU8(16) )); + assign(one32x2, mkU64( (1ULL << 32) + 1 )); + assign( + rHi, + binop( + Iop_ShrN32x2, + binop( + Iop_Add32x2, + binop( + Iop_ShrN32x2, + binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), + mkU8(14) + ), + mkexpr(one32x2) + ), + mkU8(1) + ) + ); + assign( + rLo, + binop( + Iop_ShrN32x2, + binop( + Iop_Add32x2, + binop( + Iop_ShrN32x2, + binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), + mkU8(14) + ), + mkexpr(one32x2) + ), + mkU8(1) + ) + ); + return + binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); + } + + /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit + values (aa,bb), computes, for each lane: + + if aa_lane < 0 then - bb_lane + else if aa_lane > 0 then bb_lane + else 0 + */ + static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) + { + IRTemp aa = newTemp(Ity_I64); + IRTemp bb = newTemp(Ity_I64); + IRTemp zero = newTemp(Ity_I64); + IRTemp bbNeg = newTemp(Ity_I64); + IRTemp negMask = newTemp(Ity_I64); + IRTemp posMask = newTemp(Ity_I64); + IROp opSub = Iop_INVALID; + IROp opCmpGTS = Iop_INVALID; + + switch (laneszB) { + case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; + case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; + case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; + default: vassert(0); + } + + assign( aa, aax ); + assign( bb, bbx ); + assign( zero, mkU64(0) ); + assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); + assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); + assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); + + return + binop(Iop_Or64, + binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), + binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); + + } + + /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit + value aa, computes, for each lane + + if aa < 0 then -aa else aa + + Note that the result is interpreted as unsigned, so that the + absolute value of the most negative signed input can be + represented. + */ + static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) + { + IRTemp aa = newTemp(Ity_I64); + IRTemp zero = newTemp(Ity_I64); + IRTemp aaNeg = newTemp(Ity_I64); + IRTemp negMask = newTemp(Ity_I64); + IRTemp posMask = newTemp(Ity_I64); + IROp opSub = Iop_INVALID; + IROp opSarN = Iop_INVALID; + + switch (laneszB) { + case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; + case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; + case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; + default: vassert(0); + } + + assign( aa, aax ); + assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); + assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); + assign( zero, mkU64(0) ); + assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); + return + binop(Iop_Or64, + binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), + binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); + } + + static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, + IRTemp lo64, Long byteShift ) + { + vassert(byteShift >= 1 && byteShift <= 7); + return + binop(Iop_Or64, + binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), + binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) + ); + } + + /* Generate a SIGSEGV followed by a restart of the current instruction + if effective_addr is not 16-aligned. This is required behaviour + for some SSE3 instructions and all 128-bit SSSE3 instructions. + This assumes that guest_RIP_curr_instr is set correctly! */ + static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) + { + stmt( + IRStmt_Exit( + binop(Iop_CmpNE64, + binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)), + mkU64(0)), + Ijk_SigSEGV, + IRConst_U64(guest_RIP_curr_instr) + ) + ); + } + + + /* Helper for deciding whether a given insn (starting at the opcode + byte) may validly be used with a LOCK prefix. The following insns + may be used with LOCK when their destination operand is in memory. + AFAICS this is exactly the same for both 32-bit and 64-bit mode. + + ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 + OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 + ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 + SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 + AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 + SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 + XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 + + DEC FE /1, FF /1 + INC FE /0, FF /0 + + NEG F6 /3, F7 /3 + NOT F6 /2, F7 /2 + + XCHG 86, 87 + + BTC 0F BB, 0F BA /7 + BTR 0F B3, 0F BA /6 + BTS 0F AB, 0F BA /5 + + CMPXCHG 0F B0, 0F B1 + CMPXCHG8B 0F C7 /1 + + XADD 0F C0, 0F C1 + + ------------------------------ + + 80 /0 = addb $imm8, rm8 + 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 + 82 /0 = addb $imm8, rm8 + 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 + + 00 = addb r8, rm8 + 01 = addl r32, rm32 and addw r16, rm16 + + Same for ADD OR ADC SBB AND SUB XOR + + FE /1 = dec rm8 + FF /1 = dec rm32 and dec rm16 + + FE /0 = inc rm8 + FF /0 = inc rm32 and inc rm16 + + F6 /3 = neg rm8 + F7 /3 = neg rm32 and neg rm16 + + F6 /2 = not rm8 + F7 /2 = not rm32 and not rm16 + + 0F BB = btcw r16, rm16 and btcl r32, rm32 + OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 + + Same for BTS, BTR + */ + static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) + { + switch (opc[0]) { + case 0x00: case 0x01: case 0x08: case 0x09: + case 0x10: case 0x11: case 0x18: case 0x19: + case 0x20: case 0x21: case 0x28: case 0x29: + case 0x30: case 0x31: + if (!epartIsReg(opc[1])) + return True; + break; + + case 0x80: case 0x81: case 0x82: case 0x83: + if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 + && !epartIsReg(opc[1])) + return True; + break; + + case 0xFE: case 0xFF: + if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 + && !epartIsReg(opc[1])) + return True; + break; + + case 0xF6: case 0xF7: + if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 + && !epartIsReg(opc[1])) + return True; + break; + + case 0x86: case 0x87: + if (!epartIsReg(opc[1])) + return True; + break; + + case 0x0F: { + switch (opc[1]) { + case 0xBB: case 0xB3: case 0xAB: + if (!epartIsReg(opc[2])) + return True; + break; + case 0xBA: + if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 + && !epartIsReg(opc[2])) + return True; + break; + case 0xB0: case 0xB1: + if (!epartIsReg(opc[2])) + return True; + break; + case 0xC7: + if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) + return True; + break; + case 0xC0: case 0xC1: + if (!epartIsReg(opc[2])) + return True; + break; + default: + break; + } /* switch (opc[1]) */ + break; + } + + default: + break; + } /* switch (opc[0]) */ + + return False; + } + + + /*------------------------------------------------------------*/ + /*--- Disassemble a single instruction ---*/ + /*------------------------------------------------------------*/ + + /* Disassemble a single instruction into IR. The instruction is + located in host memory at &guest_code[delta]. */ + + static + DisResult disInstr_AMD64_WRK ( + /*OUT*/Bool* expect_CAS, + Bool put_IP, + Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), + void* callback_opaque, + Long delta64, + VexArchInfo* archinfo, + VexAbiInfo* vbi + ) + { + IRType ty; + IRTemp addr, t0, t1, t2, t3, t4, t5, t6; + Int alen; + UChar opc, modrm, abyte, pre; + Long d64; + HChar dis_buf[50]; + Int am_sz, d_sz, n, n_prefixes; + DisResult dres; + UChar* insn; /* used in SSE decoders */ + + /* The running delta */ + Long delta = delta64; + + /* Holds eip at the start of the insn, so that we can print + consistent error messages for unimplemented insns. */ + Long delta_start = delta; + + /* sz denotes the nominal data-op size of the insn; we change it to + 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of + conflict REX.W takes precedence. */ + Int sz = 4; + + /* pfx holds the summary of prefixes. */ + Prefix pfx = PFX_EMPTY; + + /* Set result defaults. */ + dres.whatNext = Dis_Continue; + dres.len = 0; + dres.continueAt = 0; + + *expect_CAS = False; + + vassert(guest_RIP_next_assumed == 0); + vassert(guest_RIP_next_mustcheck == False); + + addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; + + DIP("\t0x%llx: ", guest_RIP_bbstart+delta); + + /* We may be asked to update the guest RIP before going further. */ + if (put_IP) + stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) ); + + /* Spot "Special" instructions (see comment at top of file). */ + { + UChar* code = (UChar*)(guest_code + delta); + /* Spot the 16-byte preamble: + 48C1C703 rolq $3, %rdi + 48C1C70D rolq $13, %rdi + 48C1C73D rolq $61, %rdi + 48C1C733 rolq $51, %rdi + */ + if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7 + && code[ 3] == 0x03 && + code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7 + && code[ 7] == 0x0D && + code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7 + && code[11] == 0x3D && + code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7 + && code[15] == 0x33) { + /* Got a "Special" instruction preamble. Which one is it? */ + if (code[16] == 0x48 && code[17] == 0x87 + && code[18] == 0xDB /* xchgq %rbx,%rbx */) { + /* %RDX = client_request ( %RAX ) */ + DIP("%%rdx = client_request ( %%rax )\n"); + delta += 19; + jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta); + dres.whatNext = Dis_StopHere; + goto decode_success; + } + else + if (code[16] == 0x48 && code[17] == 0x87 + && code[18] == 0xC9 /* xchgq %rcx,%rcx */) { + /* %RAX = guest_NRADDR */ + DIP("%%rax = guest_NRADDR\n"); + delta += 19; + putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); + goto decode_success; + } + else + if (code[16] == 0x48 && code[17] == 0x87 + && code[18] == 0xD2 /* xchgq %rdx,%rdx */) { + /* call-noredir *%RAX */ + DIP("call-noredir *%%rax\n"); + delta += 19; + t1 = newTemp(Ity_I64); + assign(t1, getIRegRAX(8)); + t2 = newTemp(Ity_I64); + assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); + putIReg64(R_RSP, mkexpr(t2)); + storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); + jmp_treg(Ijk_NoRedir,t1); + dres.whatNext = Dis_StopHere; + goto decode_success; + } + /* We don't know what it is. */ + goto decode_failure; + /*NOTREACHED*/ + } + } + + /* Eat prefixes, summarising the result in pfx and sz, and rejecting + as many invalid combinations as possible. */ + n_prefixes = 0; + while (True) { + if (n_prefixes > 7) goto decode_failure; + pre = getUChar(delta); + switch (pre) { + case 0x66: pfx |= PFX_66; break; + case 0x67: pfx |= PFX_ASO; break; + case 0xF2: pfx |= PFX_F2; break; + case 0xF3: pfx |= PFX_F3; break; + case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break; + case 0x2E: pfx |= PFX_CS; break; + case 0x3E: pfx |= PFX_DS; break; + case 0x26: pfx |= PFX_ES; break; + case 0x64: pfx |= PFX_FS; break; + case 0x65: pfx |= PFX_GS; break; + case 0x36: pfx |= PFX_SS; break; + case 0x40 ... 0x4F: + pfx |= PFX_REX; + if (pre & (1<<3)) pfx |= PFX_REXW; + if (pre & (1<<2)) pfx |= PFX_REXR; + if (pre & (1<<1)) pfx |= PFX_REXX; + if (pre & (1<<0)) pfx |= PFX_REXB; + break; + default: + goto not_a_prefix; + } + n_prefixes++; + delta++; + } + + not_a_prefix: + + /* Dump invalid combinations */ + n = 0; + if (pfx & PFX_F2) n++; + if (pfx & PFX_F3) n++; + if (n > 1) + goto decode_failure; /* can't have both */ + + n = 0; + if (pfx & PFX_CS) n++; + if (pfx & PFX_DS) n++; + if (pfx & PFX_ES) n++; + if (pfx & PFX_FS) n++; + if (pfx & PFX_GS) n++; + if (pfx & PFX_SS) n++; + if (n > 1) + goto decode_failure; /* multiple seg overrides == illegal */ + + /* We have a %fs prefix. Reject it if there's no evidence in 'vbi' + that we should accept it. */ + if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero) + goto decode_failure; + + /* Ditto for %gs prefixes. */ + if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60) + goto decode_failure; + + /* Set up sz. */ + sz = 4; + if (pfx & PFX_66) sz = 2; + if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8; + + /* Now we should be looking at the primary opcode byte or the + leading F2 or F3. Check that any LOCK prefix is actually + allowed. */ + + if (pfx & PFX_LOCK) { + if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { + DIP("lock "); + } else { + *expect_CAS = False; + goto decode_failure; + } + } + + + /* ---------------------------------------------------- */ + /* --- The SSE/SSE2 decoder. --- */ + /* ---------------------------------------------------- */ + + /* What did I do to deserve SSE ? Perhaps I was really bad in a + previous life? */ + + /* Note, this doesn't handle SSE3 right now. All amd64s support + SSE2 as a minimum so there is no point distinguishing SSE1 vs + SSE2. */ + + insn = (UChar*)&guest_code[delta]; + + /* FXSAVE is spuriously at the start here only because it is + thusly placed in guest-x86/toIR.c. */ + + /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. + Note that REX.W 0F AE /0 writes a slightly different format and + we don't handle that here. */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) { + IRDirty* d; + modrm = getUChar(delta+2); + vassert(sz == 4); + vassert(!epartIsReg(modrm)); + /* REX.W must not be set. That should be assured us by sz == 4 + above. */ + vassert(!(pfx & PFX_REXW)); + + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + + DIP("fxsave %s\n", dis_buf); + + /* Uses dirty helper: + void amd64g_do_FXSAVE ( VexGuestAMD64State*, UInt ) */ + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "amd64g_dirtyhelper_FXSAVE", + &amd64g_dirtyhelper_FXSAVE, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 512; + + /* declare we're reading guest state */ + d->nFxState = 7; + + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Read; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(ULong); + + d->fxState[4].fx = Ifx_Read; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(ULong); + + d->fxState[5].fx = Ifx_Read; + d->fxState[5].offset = OFFB_XMM0; + d->fxState[5].size = 16 * sizeof(U128); + + d->fxState[6].fx = Ifx_Read; + d->fxState[6].offset = OFFB_SSEROUND; + d->fxState[6].size = sizeof(ULong); + + /* Be paranoid ... this assertion tries to ensure the 16 %xmm + images are packed back-to-back. If not, the value of + d->fxState[5].size is wrong. */ + vassert(16 == sizeof(U128)); + vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16)); + + stmt( IRStmt_Dirty(d) ); + + goto decode_success; + } + + /* ------ SSE decoder main ------ */ + + /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x58) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addps", Iop_Add32Fx4 ); + goto decode_success; + } + + /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x58) { + delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "addss", Iop_Add32F0x4 ); + goto decode_success; + } + + /* 0F 55 = ANDNPS -- G = (not G) and E */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x55) { + delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnps", Iop_AndV128 ); + goto decode_success; + } + + /* 0F 54 = ANDPS -- G = G and E */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x54) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andps", Iop_AndV128 ); + goto decode_success; + } + + /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xC2) { + delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpps", True, 4 ); + goto decode_success; + } + + /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xC2) { + delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpss", False, 4 ); + goto decode_success; + } + + /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ + /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { + IRTemp argL = newTemp(Ity_F32); + IRTemp argR = newTemp(Ity_F32); + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), + 0/*lowest lane*/ ) ); + delta += 2+1; + DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); + delta += 2+alen; + DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "", + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), + 0/*lowest lane*/ ) ); + + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop( Iop_And64, + unop( Iop_32Uto64, + binop(Iop_CmpF64, + unop(Iop_F32toF64,mkexpr(argL)), + unop(Iop_F32toF64,mkexpr(argR)))), + mkU64(0x45) + ))); + + goto decode_success; + } + + /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low + half xmm */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x2A) { + IRTemp arg64 = newTemp(Ity_I64); + IRTemp rmode = newTemp(Ity_I32); + + modrm = getUChar(delta+2); + do_MMX_preamble(); + if (epartIsReg(modrm)) { + assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 2+1; + DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtpi2ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + + putXMMRegLane32F( + gregOfRexRM(pfx,modrm), 0, + binop(Iop_F64toF32, + mkexpr(rmode), + unop(Iop_I32toF64, + unop(Iop_64to32, mkexpr(arg64)) )) ); + + putXMMRegLane32F( + gregOfRexRM(pfx,modrm), 1, + binop(Iop_F64toF32, + mkexpr(rmode), + unop(Iop_I32toF64, + unop(Iop_64HIto32, mkexpr(arg64)) )) ); + + goto decode_success; + } + + /* F3 0F 2A = CVTSI2SS + -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm + -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ + if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8) + && insn[0] == 0x0F && insn[1] == 0x2A) { + + IRTemp rmode = newTemp(Ity_I32); + assign( rmode, get_sse_roundingmode() ); + modrm = getUChar(delta+2); + + if (sz == 4) { + IRTemp arg32 = newTemp(Ity_I32); + if (epartIsReg(modrm)) { + assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtsi2ss %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + putXMMRegLane32F( + gregOfRexRM(pfx,modrm), 0, + binop(Iop_F64toF32, + mkexpr(rmode), + unop(Iop_I32toF64, mkexpr(arg32)) ) ); + } else { + /* sz == 8 */ + IRTemp arg64 = newTemp(Ity_I64); + if (epartIsReg(modrm)) { + assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtsi2ssq %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + putXMMRegLane32F( + gregOfRexRM(pfx,modrm), 0, + binop(Iop_F64toF32, + mkexpr(rmode), + binop(Iop_I64toF64, mkexpr(rmode), mkexpr(arg64)) ) ); + } + + goto decode_success; + } + + /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x + I32 in mmx, according to prevailing SSE rounding mode */ + /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x + I32 in mmx, rounding towards zero */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { + IRTemp dst64 = newTemp(Ity_I64); + IRTemp rmode = newTemp(Ity_I32); + IRTemp f32lo = newTemp(Ity_F32); + IRTemp f32hi = newTemp(Ity_F32); + Bool r2zero = toBool(insn[1] == 0x2C); + + do_MMX_preamble(); + modrm = getUChar(delta+2); + + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); + assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); + DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); + assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, + mkexpr(addr), + mkU64(4) ))); + delta += 2+alen; + DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + if (r2zero) { + assign(rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + assign( + dst64, + binop( Iop_32HLto64, + binop( Iop_F64toI32, + mkexpr(rmode), + unop( Iop_F32toF64, mkexpr(f32hi) ) ), + binop( Iop_F64toI32, + mkexpr(rmode), + unop( Iop_F32toF64, mkexpr(f32lo) ) ) + ) + ); + + putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); + goto decode_success; + } + + /* F3 0F 2D = CVTSS2SI + when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, + according to prevailing SSE rounding mode + when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, + according to prevailing SSE rounding mode + */ + /* F3 0F 2C = CVTTSS2SI + when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, + truncating towards zero + when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, + truncating towards zero + */ + if (haveF3no66noF2(pfx) + && insn[0] == 0x0F + && (insn[1] == 0x2D || insn[1] == 0x2C)) { + IRTemp rmode = newTemp(Ity_I32); + IRTemp f32lo = newTemp(Ity_F32); + Bool r2zero = toBool(insn[1] == 0x2C); + vassert(sz == 4 || sz == 8); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); + DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameIReg(sz, gregOfRexRM(pfx,modrm), False)); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); + delta += 2+alen; + DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameIReg(sz, gregOfRexRM(pfx,modrm), False)); + } + + if (r2zero) { + assign( rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + if (sz == 4) { + putIReg32( gregOfRexRM(pfx,modrm), + binop( Iop_F64toI32, + mkexpr(rmode), + unop(Iop_F32toF64, mkexpr(f32lo))) ); + } else { + putIReg64( gregOfRexRM(pfx,modrm), + binop( Iop_F64toI64, + mkexpr(rmode), + unop(Iop_F32toF64, mkexpr(f32lo))) ); + } + + goto decode_success; + } + + /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5E) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divps", Iop_Div32Fx4 ); + goto decode_success; + } + + /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5E) { + delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "divss", Iop_Div32F0x4 ); + goto decode_success; + } + + /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ + if (insn[0] == 0x0F && insn[1] == 0xAE + && haveNo66noF2noF3(pfx) + && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 2) { + + IRTemp t64 = newTemp(Ity_I64); + IRTemp ew = newTemp(Ity_I32); + + vassert(sz == 4); + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + DIP("ldmxcsr %s\n", dis_buf); + + /* The only thing we observe in %mxcsr is the rounding mode. + Therefore, pass the 32-bit value (SSE native-format control + word) to a clean helper, getting back a 64-bit value, the + lower half of which is the SSEROUND value to store, and the + upper half of which is the emulation-warning token which may + be generated. + */ + /* ULong amd64h_check_ldmxcsr ( ULong ); */ + assign( t64, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_check_ldmxcsr", + &amd64g_check_ldmxcsr, + mkIRExprVec_1( + unop(Iop_32Uto64, + loadLE(Ity_I32, mkexpr(addr)) + ) + ) + ) + ); + + put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); + assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); + put_emwarn( mkexpr(ew) ); + /* Finally, if an emulation warning was reported, side-exit to + the next insn, reporting the warning, so that Valgrind's + dispatcher sees the warning. */ + stmt( + IRStmt_Exit( + binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), + Ijk_EmWarn, + IRConst_U64(guest_RIP_bbstart+delta) + ) + ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F F7 = MASKMOVQ -- 8x8 masked store */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xF7) { + Bool ok = False; + delta = dis_MMX( &ok, vbi, pfx, sz, delta+1 ); + if (!ok) + goto decode_failure; + goto decode_success; + } + + /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5F) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxps", Iop_Max32Fx4 ); + goto decode_success; + } + + /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5F) { + delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "maxss", Iop_Max32F0x4 ); + goto decode_success; + } + + /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5D) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minps", Iop_Min32Fx4 ); + goto decode_success; + } + + /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5D) { + delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "minss", Iop_Min32F0x4 ); + goto decode_success; + } + + /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ + /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ + if (haveNo66noF2noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + putXMMReg( gregOfRexRM(pfx,modrm), + getXMMReg( eregOfRexRM(pfx,modrm) )); + DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("mov[ua]ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ + /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ + if (haveNo66noF2noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && (insn[1] == 0x29 || insn[1] == 0x11)) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through; awaiting test case */ + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + dis_buf ); + delta += 2+alen; + goto decode_success; + } + } + + /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ + /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ + if (haveNo66noF2noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x16) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, + getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); + DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movhps %s,%s\n", dis_buf, + nameXMMReg( gregOfRexRM(pfx,modrm) )); + } + goto decode_success; + } + + /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ + if (haveNo66noF2noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x17) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRexRM(pfx,insn[2]), + 1/*upper lane*/ ) ); + DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ + /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ + if (haveNo66noF2noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x12) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putXMMRegLane64( gregOfRexRM(pfx,modrm), + 0/*lower lane*/, + getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); + DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movlps %s, %s\n", + dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); + } + goto decode_success; + } + + /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ + if (haveNo66noF2noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x13) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRexRM(pfx,insn[2]), + 0/*lower lane*/ ) ); + DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) + to 4 lowest bits of ireg(G) */ + if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) + && insn[0] == 0x0F && insn[1] == 0x50) { + /* sz == 8 is a kludge to handle insns with REX.W redundantly + set to 1, which has been known to happen: + + 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d + + 20071106: Intel docs say that REX.W isn't redundant: when + present, a 64-bit register is written; when not present, only + the 32-bit half is written. However, testing on a Core2 + machine suggests the entire 64 bit register is written + irrespective of the status of REX.W. That could be because + of the default rule that says "if the lower half of a 32-bit + register is written, the upper half is zeroed". By using + putIReg32 here we inadvertantly produce the same behaviour as + the Core2, for the same reason -- putIReg32 implements said + rule. + + AMD docs give no indication that REX.W is even valid for this + insn. */ + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + Int src; + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I32); + delta += 2+1; + src = eregOfRexRM(pfx,modrm); + assign( t0, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), + mkU32(1) )); + assign( t1, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), + mkU32(2) )); + assign( t2, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), + mkU32(4) )); + assign( t3, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), + mkU32(8) )); + putIReg32( gregOfRexRM(pfx,modrm), + binop(Iop_Or32, + binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), + binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) + ) + ); + DIP("movmskps %s,%s\n", nameXMMReg(src), + nameIReg32(gregOfRexRM(pfx,modrm))); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ + /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ + if ( ( (haveNo66noF2noF3(pfx) && sz == 4) + || (have66noF2noF3(pfx) && sz == 2) + ) + && insn[0] == 0x0F && insn[1] == 0x2B) { + modrm = getUChar(delta+2); + if (!epartIsReg(modrm)) { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the + Intel manual does not say anything about the usual business of + the FP reg tags getting trashed whenever an MMX insn happens. + So we just leave them alone. + */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xE7) { + modrm = getUChar(delta+2); + if (!epartIsReg(modrm)) { + /* do_MMX_preamble(); Intel docs don't specify this */ + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); + DIP("movntq %s,%s\n", dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + } + + /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G + (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ + if (haveF3no66noF2(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x10) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, + getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); + DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, + loadLE(Ity_I32, mkexpr(addr)) ); + DIP("movss %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem + or lo 1/4 xmm). */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x11) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through, we don't yet have a test case */ + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), + getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); + DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + dis_buf); + delta += 2+alen; + goto decode_success; + } + } + + /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x59) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulps", Iop_Mul32Fx4 ); + goto decode_success; + } + + /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x59) { + delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "mulss", Iop_Mul32F0x4 ); + goto decode_success; + } + + /* 0F 56 = ORPS -- G = G and E */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x56) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orps", Iop_OrV128 ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xE0) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "pavgb", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xE3) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "pavgw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put + zero-extend of it in ireg(G). */ + if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) + && insn[0] == 0x0F && insn[1] == 0xC5) { + modrm = insn[2]; + if (epartIsReg(modrm)) { + IRTemp sV = newTemp(Ity_I64); + t5 = newTemp(Ity_I16); + do_MMX_preamble(); + assign(sV, getMMXReg(eregLO3ofRM(modrm))); + breakup64to16s( sV, &t3, &t2, &t1, &t0 ); + switch (insn[3] & 3) { + case 0: assign(t5, mkexpr(t0)); break; + case 1: assign(t5, mkexpr(t1)); break; + case 2: assign(t5, mkexpr(t2)); break; + case 3: assign(t5, mkexpr(t3)); break; + default: vassert(0); + } + if (sz == 8) + putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); + else + putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); + DIP("pextrw $%d,%s,%s\n", + (Int)insn[3], nameMMXReg(eregLO3ofRM(modrm)), + sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) + : nameIReg32(gregOfRexRM(pfx,modrm)) + ); + delta += 4; + goto decode_success; + } + /* else fall through */ + /* note, for anyone filling in the mem case: this insn has one + byte after the amode and therefore you must pass 1 as the + last arg to disAMode */ + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and + put it into the specified lane of mmx(G). */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xC4) { + /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the + mmx reg. t4 is the new lane value. t5 is the original + mmx value. t6 is the new mmx value. */ + Int lane; + t4 = newTemp(Ity_I16); + t5 = newTemp(Ity_I64); + t6 = newTemp(Ity_I64); + modrm = insn[2]; + do_MMX_preamble(); + + assign(t5, getMMXReg(gregLO3ofRM(modrm))); + breakup64to16s( t5, &t3, &t2, &t1, &t0 ); + + if (epartIsReg(modrm)) { + assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); + delta += 3+1; + lane = insn[3+1-1]; + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + nameIReg16(eregOfRexRM(pfx,modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 ); + delta += 3+alen; + lane = insn[3+alen-1]; + assign(t4, loadLE(Ity_I16, mkexpr(addr))); + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + switch (lane & 3) { + case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; + case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; + case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; + case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; + default: vassert(0); + } + putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F EE = PMAXSW -- 16x4 signed max */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xEE) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "pmaxsw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F DE = PMAXUB -- 8x8 unsigned max */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xDE) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "pmaxub", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F EA = PMINSW -- 16x4 signed min */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xEA) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "pminsw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F DA = PMINUB -- 8x8 unsigned min */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xDA) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "pminub", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in + mmx(G), turn them into a byte, and put zero-extend of it in + ireg(G). */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xD7) { + modrm = insn[2]; + if (epartIsReg(modrm)) { + do_MMX_preamble(); + t0 = newTemp(Ity_I64); + t1 = newTemp(Ity_I64); + assign(t0, getMMXReg(eregLO3ofRM(modrm))); + assign(t1, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_calculate_mmx_pmovmskb", + &amd64g_calculate_mmx_pmovmskb, + mkIRExprVec_1(mkexpr(t0)))); + putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1))); + DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameIReg32(gregOfRexRM(pfx,modrm))); + delta += 3; + goto decode_success; + } + /* else fall through */ + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xE4) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "pmuluh", False ); + goto decode_success; + } + + /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ + /* 0F 18 /1 = PREFETCH0 -- with various different hints */ + /* 0F 18 /2 = PREFETCH1 */ + /* 0F 18 /3 = PREFETCH2 */ + if (insn[0] == 0x0F && insn[1] == 0x18 + && haveNo66noF2noF3(pfx) + && !epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) >= 0 && gregLO3ofRM(insn[2]) <= 3) { + HChar* hintstr = "??"; + + modrm = getUChar(delta+2); + vassert(!epartIsReg(modrm)); + + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + + switch (gregLO3ofRM(modrm)) { + case 0: hintstr = "nta"; break; + case 1: hintstr = "t0"; break; + case 2: hintstr = "t1"; break; + case 3: hintstr = "t2"; break; + default: vassert(0); + } + + DIP("prefetch%s %s\n", hintstr, dis_buf); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xF6) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "psadbw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x70) { + Int order; + IRTemp sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_I64); + dV = newTemp(Ity_I64); + do_MMX_preamble(); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + order = (Int)insn[3]; + delta += 2+2; + DIP("pshufw $%d,%s,%s\n", order, + nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, + 1/*extra byte after amode*/ ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + order = (Int)insn[2+alen]; + delta += 3+alen; + DIP("pshufw $%d,%s,%s\n", order, + dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + breakup64to16s( sV, &s3, &s2, &s1, &s0 ); + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dV, + mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x53) { + delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, + "rcpps", Iop_Recip32Fx4 ); + goto decode_success; + } + + /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x53) { + delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, + "rcpss", Iop_Recip32F0x4 ); + goto decode_success; + } + + /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x52) { + delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, + "rsqrtps", Iop_RSqrt32Fx4 ); + goto decode_success; + } + + /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x52) { + delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, + "rsqrtss", Iop_RSqrt32F0x4 ); + goto decode_success; + } + + /* 0F AE /7 = SFENCE -- flush pending operations to memory */ + if (haveNo66noF2noF3(pfx) + && insn[0] == 0x0F && insn[1] == 0xAE + && epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7 + && sz == 4) { + delta += 3; + /* Insert a memory fence. It's sometimes important that these + are carried through to the generated code. */ + stmt( IRStmt_MBE(Imbe_Fence) ); + DIP("sfence\n"); + goto decode_success; + } + + /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xC6) { + Int select; + IRTemp sV, dV; + IRTemp s3, s2, s1, s0, d3, d2, d1, d0; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + select = (Int)insn[3]; + delta += 2+2; + DIP("shufps $%d,%s,%s\n", select, + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, + 1/*byte at end of insn*/ ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + select = (Int)insn[2+alen]; + delta += 3+alen; + DIP("shufps $%d,%s,%s\n", select, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + breakup128to32s( dV, &d3, &d2, &d1, &d0 ); + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) + # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + + putXMMReg( + gregOfRexRM(pfx,modrm), + mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), + SELD((select>>2)&3), SELD((select>>0)&3) ) + ); + + # undef SELD + # undef SELS + + goto decode_success; + } + + /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x51) { + delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, + "sqrtps", Iop_Sqrt32Fx4 ); + goto decode_success; + } + + /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x51) { + delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, + "sqrtss", Iop_Sqrt32F0x4 ); + goto decode_success; + } + + /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ + if (insn[0] == 0x0F && insn[1] == 0xAE + && haveNo66noF2noF3(pfx) + && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 3) { + + vassert(sz == 4); + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + + /* Fake up a native SSE mxcsr word. The only thing it depends + on is SSEROUND[1:0], so call a clean helper to cook it up. + */ + /* ULong amd64h_create_mxcsr ( ULong sseround ) */ + DIP("stmxcsr %s\n", dis_buf); + storeLE( + mkexpr(addr), + unop(Iop_64to32, + mkIRExprCCall( + Ity_I64, 0/*regp*/, + "amd64g_create_mxcsr", &amd64g_create_mxcsr, + mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) + ) + ) + ); + goto decode_success; + } + + /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5C) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subps", Iop_Sub32Fx4 ); + goto decode_success; + } + + /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5C) { + delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "subss", Iop_Sub32F0x4 ); + goto decode_success; + } + + /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ + /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ + /* These just appear to be special cases of SHUFPS */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { + IRTemp sV, dV; + IRTemp s3, s2, s1, s0, d3, d2, d1, d0; + Bool hi = toBool(insn[1] == 0x15); + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + breakup128to32s( dV, &d3, &d2, &d1, &d0 ); + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + if (hi) { + putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s3, d3, s2, d2 ) ); + } else { + putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s1, d1, s0, d0 ) ); + } + + goto decode_success; + } + + /* 0F 57 = XORPS -- G = G and E */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x57) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorps", Iop_XorV128 ); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSE decoder. --- */ + /* ---------------------------------------------------- */ + + /* ---------------------------------------------------- */ + /* --- start of the SSE2 decoder. --- */ + /* ---------------------------------------------------- */ + + /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ + if (have66noF2noF3(pfx) + && (sz == 2 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x58) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addpd", Iop_Add64Fx2 ); + goto decode_success; + } + + /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ + if (haveF2no66noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x58) { + delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "addsd", Iop_Add64F0x2 ); + goto decode_success; + } + + /* 66 0F 55 = ANDNPD -- G = (not G) and E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x55) { + delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnpd", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F 54 = ANDPD -- G = G and E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x54) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andpd", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xC2) { + delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmppd", True, 8 ); + goto decode_success; + } + + /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xC2) { + delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpsd", False, 8 ); + goto decode_success; + } + + /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ + /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { + IRTemp argL = newTemp(Ity_F64); + IRTemp argR = newTemp(Ity_F64); + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), + 0/*lowest lane*/ ) ); + delta += 2+1; + DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); + delta += 2+alen; + DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "", + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), + 0/*lowest lane*/ ) ); + + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop( Iop_And64, + unop( Iop_32Uto64, + binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), + mkU64(0x45) + ))); + + goto decode_success; + } + + /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x + F64 in xmm(G) */ + if (haveF3no66noF2(pfx) && insn[0] == 0x0F && insn[1] == 0xE6) { + IRTemp arg64 = newTemp(Ity_I64); + if (sz != 4) goto decode_failure; + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( arg64, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0) ); + delta += 2+1; + DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtdq2pd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + putXMMRegLane64F( + gregOfRexRM(pfx,modrm), 0, + unop(Iop_I32toF64, unop(Iop_64to32, mkexpr(arg64))) + ); + + putXMMRegLane64F( + gregOfRexRM(pfx,modrm), 1, + unop(Iop_I32toF64, unop(Iop_64HIto32, mkexpr(arg64))) + ); + + goto decode_success; + } + + /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in + xmm(G) */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5B) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtdq2ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + breakup128to32s( argV, &t3, &t2, &t1, &t0 ); + + # define CVT(_t) binop( Iop_F64toF32, \ + mkexpr(rmode), \ + unop(Iop_I32toF64,mkexpr(_t))) + + putXMMRegLane32F( gregOfRexRM(pfx,modrm), 3, CVT(t3) ); + putXMMRegLane32F( gregOfRexRM(pfx,modrm), 2, CVT(t2) ); + putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); + putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in + lo half xmm(G), and zero upper half, rounding towards zero */ + /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in + lo half xmm(G), according to prevailing rounding mode, and zero + upper half */ + if ( ( (haveF2no66noF3(pfx) && sz == 4) + || (have66noF2noF3(pfx) && sz == 2) + ) + && insn[0] == 0x0F && insn[1] == 0xE6) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + Bool r2zero = toBool(sz == 2); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + if (r2zero) { + assign(rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + t0 = newTemp(Ity_F64); + t1 = newTemp(Ity_F64); + assign( t0, unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, mkexpr(argV))) ); + assign( t1, unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, mkexpr(argV))) ); + + # define CVT(_t) binop( Iop_F64toI32, \ + mkexpr(rmode), \ + mkexpr(_t) ) + + putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x + I32 in mmx, according to prevailing SSE rounding mode */ + /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x + I32 in mmx, rounding towards zero */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { + IRTemp dst64 = newTemp(Ity_I64); + IRTemp rmode = newTemp(Ity_I32); + IRTemp f64lo = newTemp(Ity_F64); + IRTemp f64hi = newTemp(Ity_F64); + Bool r2zero = toBool(insn[1] == 0x2C); + + do_MMX_preamble(); + modrm = getUChar(delta+2); + + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); + assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); + DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); + assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, + mkexpr(addr), + mkU64(8) ))); + delta += 2+alen; + DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + if (r2zero) { + assign(rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + assign( + dst64, + binop( Iop_32HLto64, + binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64hi) ), + binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo) ) + ) + ); + + putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); + goto decode_success; + } + + /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in + lo half xmm(G), rounding according to prevailing SSE rounding + mode, and zero upper half */ + /* Note, this is practically identical to CVTPD2DQ. It would have + been nicer to merge them together, but the insn[] offsets differ + by one. */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x5A) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtpd2ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + t0 = newTemp(Ity_F64); + t1 = newTemp(Ity_F64); + assign( t0, unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, mkexpr(argV))) ); + assign( t1, unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, mkexpr(argV))) ); + + # define CVT(_t) binop( Iop_F64toF32, \ + mkexpr(rmode), \ + mkexpr(_t) ) + + putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) ); + putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); + putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in + xmm(G) */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x2A) { + IRTemp arg64 = newTemp(Ity_I64); + + modrm = getUChar(delta+2); + do_MMX_preamble(); + if (epartIsReg(modrm)) { + assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 2+1; + DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtpi2pd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + putXMMRegLane64F( + gregOfRexRM(pfx,modrm), 0, + unop(Iop_I32toF64, unop(Iop_64to32, mkexpr(arg64)) ) + ); + + putXMMRegLane64F( + gregOfRexRM(pfx,modrm), 1, + unop(Iop_I32toF64, unop(Iop_64HIto32, mkexpr(arg64)) ) + ); + + goto decode_success; + } + + /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in + xmm(G), rounding towards zero */ + /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in + xmm(G), as per the prevailing rounding mode */ + if ( ( (have66noF2noF3(pfx) && sz == 2) + || (haveF3no66noF2(pfx) && sz == 4) + ) + && insn[0] == 0x0F && insn[1] == 0x5B) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + Bool r2zero = toBool(sz == 4); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtps2dq %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + if (r2zero) { + assign( rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + breakup128to32s( argV, &t3, &t2, &t1, &t0 ); + + /* This is less than ideal. If it turns out to be a performance + bottleneck it can be improved. */ + # define CVT(_t) \ + binop( Iop_F64toI32, \ + mkexpr(rmode), \ + unop( Iop_F32toF64, \ + unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) + + putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, CVT(t3) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, CVT(t2) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); + putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x + F64 in xmm(G). */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5A) { + IRTemp f32lo = newTemp(Ity_F32); + IRTemp f32hi = newTemp(Ity_F32); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + assign( f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0) ); + assign( f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1) ); + delta += 2+1; + DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); + assign( f32hi, loadLE(Ity_F32, + binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); + delta += 2+alen; + DIP("cvtps2pd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + + putXMMRegLane64F( gregOfRexRM(pfx,modrm), 1, + unop(Iop_F32toF64, mkexpr(f32hi)) ); + putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, + unop(Iop_F32toF64, mkexpr(f32lo)) ); + + goto decode_success; + } + + /* F2 0F 2D = CVTSD2SI + when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, + according to prevailing SSE rounding mode + when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, + according to prevailing SSE rounding mode + */ + /* F2 0F 2C = CVTTSD2SI + when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, + truncating towards zero + when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, + truncating towards zero + */ + if (haveF2no66noF3(pfx) + && insn[0] == 0x0F + && (insn[1] == 0x2D || insn[1] == 0x2C)) { + IRTemp rmode = newTemp(Ity_I32); + IRTemp f64lo = newTemp(Ity_F64); + Bool r2zero = toBool(insn[1] == 0x2C); + vassert(sz == 4 || sz == 8); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); + DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameIReg(sz, gregOfRexRM(pfx,modrm), False)); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); + delta += 2+alen; + DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameIReg(sz, gregOfRexRM(pfx,modrm), False)); + } + + if (r2zero) { + assign( rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + if (sz == 4) { + putIReg32( gregOfRexRM(pfx,modrm), + binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo)) ); + } else { + putIReg64( gregOfRexRM(pfx,modrm), + binop( Iop_F64toI64, mkexpr(rmode), mkexpr(f64lo)) ); + } + + goto decode_success; + } + + /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in + low 1/4 xmm(G), according to prevailing SSE rounding mode */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5A) { + IRTemp rmode = newTemp(Ity_I32); + IRTemp f64lo = newTemp(Ity_F64); + vassert(sz == 4); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); + DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); + delta += 2+alen; + DIP("cvtsd2ss %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( rmode, get_sse_roundingmode() ); + putXMMRegLane32F( + gregOfRexRM(pfx,modrm), 0, + binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) + ); + + goto decode_success; + } + + /* F2 0F 2A = CVTSI2SD + when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm + when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm + */ + if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8) + && insn[0] == 0x0F && insn[1] == 0x2A) { + modrm = getUChar(delta+2); + + if (sz == 4) { + IRTemp arg32 = newTemp(Ity_I32); + if (epartIsReg(modrm)) { + assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvtsi2sd %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtsi2sd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, + unop(Iop_I32toF64, mkexpr(arg32)) + ); + } else { + /* sz == 8 */ + IRTemp arg64 = newTemp(Ity_I64); + if (epartIsReg(modrm)) { + assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtsi2sdq %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm)) ); + } + putXMMRegLane64F( + gregOfRexRM(pfx,modrm), + 0, + binop( Iop_I64toF64, + get_sse_roundingmode(), + mkexpr(arg64) + ) + ); + + } + + goto decode_success; + } + + /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in + low half xmm(G) */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5A) { + IRTemp f32lo = newTemp(Ity_F32); + + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); + DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); + delta += 2+alen; + DIP("cvtss2sd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, + unop( Iop_F32toF64, mkexpr(f32lo) ) ); + + goto decode_success; + } + + /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x5E) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divpd", Iop_Div64Fx2 ); + goto decode_success; + } + + /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ + if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x5E) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "divsd", Iop_Div64F0x2 ); + goto decode_success; + } + + /* 0F AE /5 = LFENCE -- flush pending operations to memory */ + /* 0F AE /6 = MFENCE -- flush pending operations to memory */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xAE + && epartIsReg(insn[2]) + && (gregLO3ofRM(insn[2]) == 5 || gregLO3ofRM(insn[2]) == 6)) { + delta += 3; + /* Insert a memory fence. It's sometimes important that these + are carried through to the generated code. */ + stmt( IRStmt_MBE(Imbe_Fence) ); + DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m"); + goto decode_success; + } + + /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x5F) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxpd", Iop_Max64Fx2 ); + goto decode_success; + } + + /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5F) { + delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "maxsd", Iop_Max64F0x2 ); + goto decode_success; + } + + /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x5D) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minpd", Iop_Min64Fx2 ); + goto decode_success; + } + + /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x5D) { + delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "minsd", Iop_Min64F0x2 ); + goto decode_success; + } + + /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ + /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ + /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ + if (have66noF2noF3(pfx) + && (sz == 2 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F + && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { + HChar* wot = insn[1]==0x28 ? "apd" : + insn[1]==0x10 ? "upd" : "dqa"; + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + putXMMReg( gregOfRexRM(pfx,modrm), + getXMMReg( eregOfRexRM(pfx,modrm) )); + DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("mov%s %s,%s\n", wot, dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ + /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F + && (insn[1] == 0x29 || insn[1] == 0x11)) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through; awaiting test case */ + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("mov[ua]pd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + dis_buf ); + delta += 2+alen; + goto decode_success; + } + } + + /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, zeroing high 3/4 of xmm. */ + /* or from ireg64/m64 to xmm lo 1/2, zeroing high 1/2 of xmm. */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x6E) { + vassert(sz == 2 || sz == 8); + if (sz == 2) sz = 4; + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + if (sz == 4) { + putXMMReg( + gregOfRexRM(pfx,modrm), + unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) + ); + DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + putXMMReg( + gregOfRexRM(pfx,modrm), + unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) + ); + DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + } else { + addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + putXMMReg( + gregOfRexRM(pfx,modrm), + sz == 4 + ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) + : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) + ); + DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + goto decode_success; + } + + /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ + /* or from xmm low 1/2 to ireg64 or m64. */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x7E) { + if (sz == 2) sz = 4; + vassert(sz == 4 || sz == 8); + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + if (sz == 4) { + putIReg32( eregOfRexRM(pfx,modrm), + getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); + DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + nameIReg32(eregOfRexRM(pfx,modrm))); + } else { + putIReg64( eregOfRexRM(pfx,modrm), + getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); + DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + nameIReg64(eregOfRexRM(pfx,modrm))); + } + } else { + addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + storeLE( mkexpr(addr), + sz == 4 + ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) + : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); + DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', + nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); + } + goto decode_success; + } + + /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x7F) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putXMMReg( eregOfRexRM(pfx,modrm), + getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + nameXMMReg(eregOfRexRM(pfx,modrm))); + } else { + addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); + } + goto decode_success; + } + + /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x6F) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + putXMMReg( gregOfRexRM(pfx,modrm), + getXMMReg( eregOfRexRM(pfx,modrm) )); + DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("movdqu %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x7F) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + goto decode_failure; /* awaiting test case */ + delta += 2+1; + putXMMReg( eregOfRexRM(pfx,modrm), + getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + nameXMMReg(eregOfRexRM(pfx,modrm))); + } else { + addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); + } + goto decode_success; + } + + /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xD6) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + do_MMX_preamble(); + putMMXReg( gregLO3ofRM(modrm), + getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); + DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + delta += 2+1; + goto decode_success; + } else { + /* apparently no mem case for this insn */ + goto decode_failure; + } + } + + /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ + /* These seems identical to MOVHPS. This instruction encoding is + completely crazy. */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x16) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through; apparently reg-reg is not possible */ + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movhpd %s,%s\n", dis_buf, + nameXMMReg( gregOfRexRM(pfx,modrm) )); + goto decode_success; + } + } + + /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ + /* Again, this seems identical to MOVHPS. */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x17) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRexRM(pfx,insn[2]), + 1/*upper lane*/ ) ); + DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ + /* Identical to MOVLPS ? */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x12) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through; apparently reg-reg is not possible */ + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + putXMMRegLane64( gregOfRexRM(pfx,modrm), + 0/*lower lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movlpd %s, %s\n", + dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); + goto decode_success; + } + } + + /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ + /* Identical to MOVLPS ? */ + if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x13) { + modrm = getUChar(delta+2); + if (!epartIsReg(modrm)) { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRexRM(pfx,modrm), + 0/*lower lane*/ ) ); + DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to + 2 lowest bits of ireg(G) */ + if (have66noF2noF3(pfx) && (sz == 2 || sz == 8) + && insn[0] == 0x0F && insn[1] == 0x50) { + /* sz == 8 is a kludge to handle insns with REX.W redundantly + set to 1, which has been known to happen: + 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d + 20071106: see further comments on MOVMSKPS implementation above. + */ + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + Int src; + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_I32); + delta += 2+1; + src = eregOfRexRM(pfx,modrm); + assign( t0, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), + mkU32(1) )); + assign( t1, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), + mkU32(2) )); + putIReg32( gregOfRexRM(pfx,modrm), + binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) + ); + DIP("movmskpd %s,%s\n", nameXMMReg(src), + nameIReg32(gregOfRexRM(pfx,modrm))); + goto decode_success; + } + /* else fall through */ + goto decode_failure; + } + + /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF7) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + IRTemp regD = newTemp(Ity_V128); + IRTemp mask = newTemp(Ity_V128); + IRTemp olddata = newTemp(Ity_V128); + IRTemp newdata = newTemp(Ity_V128); + addr = newTemp(Ity_I64); + + assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); + assign( regD, getXMMReg( gregOfRexRM(pfx,modrm) )); + + /* Unfortunately can't do the obvious thing with SarN8x16 + here since that can't be re-emitted as SSE2 code - no such + insn. */ + assign( + mask, + binop(Iop_64HLtoV128, + binop(Iop_SarN8x8, + getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), + mkU8(7) ), + binop(Iop_SarN8x8, + getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), + mkU8(7) ) )); + assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); + assign( newdata, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(regD), + mkexpr(mask) ), + binop(Iop_AndV128, + mkexpr(olddata), + unop(Iop_NotV128, mkexpr(mask)))) ); + storeLE( mkexpr(addr), mkexpr(newdata) ); + + delta += 2+1; + DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRexRM(pfx,modrm) ), + nameXMMReg( gregOfRexRM(pfx,modrm) ) ); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE7) { + modrm = getUChar(delta+2); + if (!epartIsReg(modrm)) { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); + DIP("movntdq %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + goto decode_failure; + } + + /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ + if (haveNo66noF2noF3(pfx) && + insn[0] == 0x0F && insn[1] == 0xC3) { + vassert(sz == 4 || sz == 8); + modrm = getUChar(delta+2); + if (!epartIsReg(modrm)) { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); + DIP("movnti %s,%s\n", dis_buf, + nameIRegG(sz, pfx, modrm)); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem + or lo half xmm). */ + if (have66noF2noF3(pfx) + && (sz == 2 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0xD6) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + /* fall through, awaiting test case */ + /* dst: lo half copied, hi half zeroed */ + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); + DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); + delta += 2+alen; + goto decode_success; + } + } + + /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero + hi half). */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xD6) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + do_MMX_preamble(); + putXMMReg( gregOfRexRM(pfx,modrm), + unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); + DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + goto decode_success; + } else { + /* apparently no mem case for this insn */ + goto decode_failure; + } + } + + /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to + G (lo half xmm). Upper half of G is zeroed out. */ + /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to + G (lo half xmm). If E is mem, upper half of G is zeroed out. + If E is reg, upper half of G is unchanged. */ + if ( (haveF2no66noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x10) + || + (haveF3no66noF2(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x7E) + ) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, + getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); + if (insn[1] == 0x7E/*MOVQ*/) { + /* zero bits 127:64 */ + putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); + } + DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); + putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movsd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem + or lo half xmm). */ + if (haveF2no66noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x11) { + modrm = getUChar(delta+2); + if (epartIsReg(modrm)) { + putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, + getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); + DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + nameXMMReg(eregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + storeLE( mkexpr(addr), + getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); + DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), + dis_buf); + delta += 2+alen; + } + goto decode_success; + } + + /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) + && (sz == 2 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x59) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulpd", Iop_Mul64Fx2 ); + goto decode_success; + } + + /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ + if (haveF2no66noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x59) { + delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "mulsd", Iop_Mul64F0x2 ); + goto decode_success; + } + + /* 66 0F 56 = ORPD -- G = G and E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x56) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orpd", Iop_OrV128 ); + goto decode_success; + } + + /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xC6) { + Int select; + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp s1 = newTemp(Ity_I64); + IRTemp s0 = newTemp(Ity_I64); + IRTemp d1 = newTemp(Ity_I64); + IRTemp d0 = newTemp(Ity_I64); + + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + select = (Int)insn[3]; + delta += 2+2; + DIP("shufpd $%d,%s,%s\n", select, + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + select = (Int)insn[2+alen]; + delta += 3+alen; + DIP("shufpd $%d,%s,%s\n", select, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); + assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); + + # define SELD(n) mkexpr((n)==0 ? d0 : d1) + # define SELS(n) mkexpr((n)==0 ? s0 : s1) + + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) + ); + + # undef SELD + # undef SELS + + goto decode_success; + } + + /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x51) { + delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, + "sqrtpd", Iop_Sqrt64Fx2 ); + goto decode_success; + } + + /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ + if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x51) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta+2, + "sqrtsd", Iop_Sqrt64F0x2 ); + goto decode_success; + } + + /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x5C) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subpd", Iop_Sub64Fx2 ); + goto decode_success; + } + + /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ + if (haveF2no66noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x5C) { + delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "subsd", Iop_Sub64F0x2 ); + goto decode_success; + } + + /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ + /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ + /* These just appear to be special cases of SHUFPS */ + if (have66noF2noF3(pfx) + && sz == 2 /* could be 8 if rex also present */ + && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { + IRTemp s1 = newTemp(Ity_I64); + IRTemp s0 = newTemp(Ity_I64); + IRTemp d1 = newTemp(Ity_I64); + IRTemp d0 = newTemp(Ity_I64); + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + Bool hi = toBool(insn[1] == 0x15); + + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); + assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); + + if (hi) { + putXMMReg( gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); + } else { + putXMMReg( gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); + } + + goto decode_success; + } + + /* 66 0F 57 = XORPD -- G = G xor E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x57) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorpd", Iop_XorV128 ); + goto decode_success; + } + + /* 66 0F 6B = PACKSSDW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x6B) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "packssdw", Iop_QNarrow32Sx4, True ); + goto decode_success; + } + + /* 66 0F 63 = PACKSSWB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x63) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "packsswb", Iop_QNarrow16Sx8, True ); + goto decode_success; + } + + /* 66 0F 67 = PACKUSWB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x67) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "packuswb", Iop_QNarrow16Ux8, True ); + goto decode_success; + } + + /* 66 0F FC = PADDB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xFC) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddb", Iop_Add8x16, False ); + goto decode_success; + } + + /* 66 0F FE = PADDD */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xFE) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddd", Iop_Add32x4, False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ + /* 0F D4 = PADDQ -- add 64x1 */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xD4) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "paddq", False ); + goto decode_success; + } + + /* 66 0F D4 = PADDQ */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD4) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddq", Iop_Add64x2, False ); + goto decode_success; + } + + /* 66 0F FD = PADDW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xFD) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddw", Iop_Add16x8, False ); + goto decode_success; + } + + /* 66 0F EC = PADDSB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xEC) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddsb", Iop_QAdd8Sx16, False ); + goto decode_success; + } + + /* 66 0F ED = PADDSW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xED) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddsw", Iop_QAdd16Sx8, False ); + goto decode_success; + } + + /* 66 0F DC = PADDUSB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xDC) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddusb", Iop_QAdd8Ux16, False ); + goto decode_success; + } + + /* 66 0F DD = PADDUSW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xDD) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "paddusw", Iop_QAdd16Ux8, False ); + goto decode_success; + } + + /* 66 0F DB = PAND */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xDB) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pand", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F DF = PANDN */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xDF) { + delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "pandn", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F E0 = PAVGB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE0) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pavgb", Iop_Avg8Ux16, False ); + goto decode_success; + } + + /* 66 0F E3 = PAVGW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE3) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pavgw", Iop_Avg16Ux8, False ); + goto decode_success; + } + + /* 66 0F 74 = PCMPEQB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x74) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pcmpeqb", Iop_CmpEQ8x16, False ); + goto decode_success; + } + + /* 66 0F 76 = PCMPEQD */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x76) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pcmpeqd", Iop_CmpEQ32x4, False ); + goto decode_success; + } + + /* 66 0F 75 = PCMPEQW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x75) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pcmpeqw", Iop_CmpEQ16x8, False ); + goto decode_success; + } + + /* 66 0F 64 = PCMPGTB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x64) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pcmpgtb", Iop_CmpGT8Sx16, False ); + goto decode_success; + } + + /* 66 0F 66 = PCMPGTD */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x66) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pcmpgtd", Iop_CmpGT32Sx4, False ); + goto decode_success; + } + + /* 66 0F 65 = PCMPGTW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x65) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pcmpgtw", Iop_CmpGT16Sx8, False ); + goto decode_success; + } + + /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put + zero-extend of it in ireg(G). */ + if (have66noF2noF3(pfx) + && (sz == 2 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0xC5) { + modrm = insn[2]; + if (epartIsReg(modrm)) { + t5 = newTemp(Ity_V128); + t4 = newTemp(Ity_I16); + assign(t5, getXMMReg(eregOfRexRM(pfx,modrm))); + breakup128to32s( t5, &t3, &t2, &t1, &t0 ); + switch (insn[3] & 7) { + case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; + case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; + case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; + case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; + case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; + case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; + case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; + case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; + default: vassert(0); + } + putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t4))); + DIP("pextrw $%d,%s,%s\n", + (Int)insn[3], nameXMMReg(eregOfRexRM(pfx,modrm)), + nameIReg32(gregOfRexRM(pfx,modrm))); + delta += 4; + goto decode_success; + } + /* else fall through */ + /* note, if memory case is ever filled in, there is 1 byte after + amode */ + } + + /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and + put it into the specified lane of xmm(G). */ + if (have66noF2noF3(pfx) + && (sz == 2 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0xC4) { + Int lane; + t4 = newTemp(Ity_I16); + modrm = insn[2]; + + if (epartIsReg(modrm)) { + assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); + delta += 3+1; + lane = insn[3+1-1]; + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + nameIReg16(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, + 1/*byte after the amode*/ ); + delta += 3+alen; + lane = insn[3+alen-1]; + assign(t4, loadLE(Ity_I16, mkexpr(addr))); + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + putXMMRegLane16( gregOfRexRM(pfx,modrm), lane & 7, mkexpr(t4) ); + goto decode_success; + } + + /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from + E(xmm or mem) to G(xmm) */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF5) { + IRTemp s1V = newTemp(Ity_V128); + IRTemp s2V = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp s1Hi = newTemp(Ity_I64); + IRTemp s1Lo = newTemp(Ity_I64); + IRTemp s2Hi = newTemp(Ity_I64); + IRTemp s2Lo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("pmaddwd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) ); + assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); + assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); + assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); + assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); + assign( dHi, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_calculate_mmx_pmaddwd", + &amd64g_calculate_mmx_pmaddwd, + mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) + )); + assign( dLo, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_calculate_mmx_pmaddwd", + &amd64g_calculate_mmx_pmaddwd, + mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) + )); + assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; + putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); + goto decode_success; + } + + /* 66 0F EE = PMAXSW -- 16x8 signed max */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xEE) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pmaxsw", Iop_Max16Sx8, False ); + goto decode_success; + } + + /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xDE) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pmaxub", Iop_Max8Ux16, False ); + goto decode_success; + } + + /* 66 0F EA = PMINSW -- 16x8 signed min */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xEA) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pminsw", Iop_Min16Sx8, False ); + goto decode_success; + } + + /* 66 0F DA = PMINUB -- 8x16 unsigned min */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xDA) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pminub", Iop_Min8Ux16, False ); + goto decode_success; + } + + /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in + xmm(E), turn them into a byte, and put zero-extend of it in + ireg(G). Doing this directly is just too cumbersome; give up + therefore and call a helper. */ + /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ + if (have66noF2noF3(pfx) + && (sz == 2 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0xD7) { + modrm = insn[2]; + if (epartIsReg(modrm)) { + t0 = newTemp(Ity_I64); + t1 = newTemp(Ity_I64); + assign(t0, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0)); + assign(t1, getXMMRegLane64(eregOfRexRM(pfx,modrm), 1)); + t5 = newTemp(Ity_I64); + assign(t5, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_calculate_sse_pmovmskb", + &amd64g_calculate_sse_pmovmskb, + mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); + putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t5))); + DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameIReg32(gregOfRexRM(pfx,modrm))); + delta += 3; + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE4) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pmulhuw", Iop_MulHi16Ux8, False ); + goto decode_success; + } + + /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE5) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pmulhw", Iop_MulHi16Sx8, False ); + goto decode_success; + } + + /* 66 0F D5 = PMULHL -- 16x8 multiply */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD5) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "pmullw", Iop_Mul16x8, False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ + /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x + 0 to form 64-bit result */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xF4) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + t1 = newTemp(Ity_I32); + t0 = newTemp(Ity_I32); + modrm = insn[2]; + + do_MMX_preamble(); + assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 2+1; + DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("pmuludq %s,%s\n", dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + assign( t0, unop(Iop_64to32, mkexpr(dV)) ); + assign( t1, unop(Iop_64to32, mkexpr(sV)) ); + putMMXReg( gregLO3ofRM(modrm), + binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); + goto decode_success; + } + + /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x + 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit + half */ + /* This is a really poor translation -- could be improved if + performance critical */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF4) { + IRTemp sV, dV; + IRTemp s3, s2, s1, s0, d3, d2, d1, d0; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; + t1 = newTemp(Ity_I64); + t0 = newTemp(Ity_I64); + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("pmuludq %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + breakup128to32s( dV, &d3, &d2, &d1, &d0 ); + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); + putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) ); + assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); + putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) ); + goto decode_success; + } + + /* 66 0F EB = POR */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xEB) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "por", Iop_OrV128 ); + goto decode_success; + } + + /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs + from E(xmm or mem) to G(xmm) */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF6) { + IRTemp s1V = newTemp(Ity_V128); + IRTemp s2V = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp s1Hi = newTemp(Ity_I64); + IRTemp s1Lo = newTemp(Ity_I64); + IRTemp s2Hi = newTemp(Ity_I64); + IRTemp s2Lo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 2+1; + DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("psadbw %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) ); + assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); + assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); + assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); + assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); + assign( dHi, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_calculate_mmx_psadbw", + &amd64g_calculate_mmx_psadbw, + mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) + )); + assign( dLo, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "amd64g_calculate_mmx_psadbw", + &amd64g_calculate_mmx_psadbw, + mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) + )); + assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; + putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); + goto decode_success; + } + + /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x70) { + Int order; + IRTemp sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + order = (Int)insn[3]; + delta += 3+1; + DIP("pshufd $%d,%s,%s\n", order, + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, + 1/*byte after the amode*/ ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + order = (Int)insn[2+alen]; + delta += 2+alen+1; + DIP("pshufd $%d,%s,%s\n", order, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dV, + mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or + mem) to G(xmm), and copy lower half */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x70) { + Int order; + IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + sVhi = newTemp(Ity_I64); + dVhi = newTemp(Ity_I64); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + order = (Int)insn[3]; + delta += 3+1; + DIP("pshufhw $%d,%s,%s\n", order, + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, + 1/*byte after the amode*/ ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + order = (Int)insn[2+alen]; + delta += 2+alen+1; + DIP("pshufhw $%d,%s,%s\n", order, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); + breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); + + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dVhi, + mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + assign(dV, binop( Iop_64HLtoV128, + mkexpr(dVhi), + unop(Iop_V128to64, mkexpr(sV))) ); + putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or + mem) to G(xmm), and copy upper half */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x70) { + Int order; + IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + sVlo = newTemp(Ity_I64); + dVlo = newTemp(Ity_I64); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + order = (Int)insn[3]; + delta += 3+1; + DIP("pshuflw $%d,%s,%s\n", order, + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, + 1/*byte after the amode*/ ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + order = (Int)insn[2+alen]; + delta += 2+alen+1; + DIP("pshuflw $%d,%s,%s\n", order, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); + breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); + + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dVlo, + mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + assign(dV, binop( Iop_64HLtoV128, + unop(Iop_V128HIto64, mkexpr(sV)), + mkexpr(dVlo) ) ); + putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* 66 0F 72 /6 ib = PSLLD by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x72 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 6) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "pslld", Iop_ShlN32x4 ); + goto decode_success; + } + + /* 66 0F F2 = PSLLD by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF2) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "pslld", Iop_ShlN32x4 ); + goto decode_success; + } + + /* 66 0F 73 /7 ib = PSLLDQ by immediate */ + /* note, if mem case ever filled in, 1 byte after amode */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 7) { + IRTemp sV, dV, hi64, lo64, hi64r, lo64r; + Int imm = (Int)insn[3]; + Int reg = eregOfRexRM(pfx,insn[2]); + DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); + vassert(imm >= 0 && imm <= 255); + delta += 4; + + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + hi64 = newTemp(Ity_I64); + lo64 = newTemp(Ity_I64); + hi64r = newTemp(Ity_I64); + lo64r = newTemp(Ity_I64); + + if (imm >= 16) { + putXMMReg(reg, mkV128(0x0000)); + goto decode_success; + } + + assign( sV, getXMMReg(reg) ); + assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); + + if (imm == 0) { + assign( lo64r, mkexpr(lo64) ); + assign( hi64r, mkexpr(hi64) ); + } + else + if (imm == 8) { + assign( lo64r, mkU64(0) ); + assign( hi64r, mkexpr(lo64) ); + } + else + if (imm > 8) { + assign( lo64r, mkU64(0) ); + assign( hi64r, binop( Iop_Shl64, + mkexpr(lo64), + mkU8( 8*(imm-8) ) )); + } else { + assign( lo64r, binop( Iop_Shl64, + mkexpr(lo64), + mkU8(8 * imm) )); + assign( hi64r, + binop( Iop_Or64, + binop(Iop_Shl64, mkexpr(hi64), + mkU8(8 * imm)), + binop(Iop_Shr64, mkexpr(lo64), + mkU8(8 * (8 - imm)) ) + ) + ); + } + assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); + putXMMReg(reg, mkexpr(dV)); + goto decode_success; + } + + /* 66 0F 73 /6 ib = PSLLQ by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 6) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllq", Iop_ShlN64x2 ); + goto decode_success; + } + + /* 66 0F F3 = PSLLQ by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF3) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllq", Iop_ShlN64x2 ); + goto decode_success; + } + + /* 66 0F 71 /6 ib = PSLLW by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x71 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 6) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllw", Iop_ShlN16x8 ); + goto decode_success; + } + + /* 66 0F F1 = PSLLW by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF1) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllw", Iop_ShlN16x8 ); + goto decode_success; + } + + /* 66 0F 72 /4 ib = PSRAD by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x72 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 4) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrad", Iop_SarN32x4 ); + goto decode_success; + } + + /* 66 0F E2 = PSRAD by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE2) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrad", Iop_SarN32x4 ); + goto decode_success; + } + + /* 66 0F 71 /4 ib = PSRAW by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x71 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 4) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "psraw", Iop_SarN16x8 ); + goto decode_success; + } + + /* 66 0F E1 = PSRAW by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE1) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psraw", Iop_SarN16x8 ); + goto decode_success; + } + + /* 66 0F 72 /2 ib = PSRLD by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x72 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 2) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrld", Iop_ShrN32x4 ); + goto decode_success; + } + + /* 66 0F D2 = PSRLD by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD2) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrld", Iop_ShrN32x4 ); + goto decode_success; + } + + /* 66 0F 73 /3 ib = PSRLDQ by immediate */ + /* note, if mem case ever filled in, 1 byte after amode */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 3) { + IRTemp sV, dV, hi64, lo64, hi64r, lo64r; + Int imm = (Int)insn[3]; + Int reg = eregOfRexRM(pfx,insn[2]); + DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); + vassert(imm >= 0 && imm <= 255); + delta += 4; + + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + hi64 = newTemp(Ity_I64); + lo64 = newTemp(Ity_I64); + hi64r = newTemp(Ity_I64); + lo64r = newTemp(Ity_I64); + + if (imm >= 16) { + putXMMReg(reg, mkV128(0x0000)); + goto decode_success; + } + + assign( sV, getXMMReg(reg) ); + assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); + + if (imm == 0) { + assign( lo64r, mkexpr(lo64) ); + assign( hi64r, mkexpr(hi64) ); + } + else + if (imm == 8) { + assign( hi64r, mkU64(0) ); + assign( lo64r, mkexpr(hi64) ); + } + else + if (imm > 8) { + assign( hi64r, mkU64(0) ); + assign( lo64r, binop( Iop_Shr64, + mkexpr(hi64), + mkU8( 8*(imm-8) ) )); + } else { + assign( hi64r, binop( Iop_Shr64, + mkexpr(hi64), + mkU8(8 * imm) )); + assign( lo64r, + binop( Iop_Or64, + binop(Iop_Shr64, mkexpr(lo64), + mkU8(8 * imm)), + binop(Iop_Shl64, mkexpr(hi64), + mkU8(8 * (8 - imm)) ) + ) + ); + } + + assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); + putXMMReg(reg, mkexpr(dV)); + goto decode_success; + } + + /* 66 0F 73 /2 ib = PSRLQ by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 2) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlq", Iop_ShrN64x2 ); + goto decode_success; + } + + /* 66 0F D3 = PSRLQ by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD3) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlq", Iop_ShrN64x2 ); + goto decode_success; + } + + /* 66 0F 71 /2 ib = PSRLW by immediate */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x71 + && epartIsReg(insn[2]) + && gregLO3ofRM(insn[2]) == 2) { + delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlw", Iop_ShrN16x8 ); + goto decode_success; + } + + /* 66 0F D1 = PSRLW by E */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD1) { + delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlw", Iop_ShrN16x8 ); + goto decode_success; + } + + /* 66 0F F8 = PSUBB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF8) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubb", Iop_Sub8x16, False ); + goto decode_success; + } + + /* 66 0F FA = PSUBD */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xFA) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubd", Iop_Sub32x4, False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ + /* 0F FB = PSUBQ -- sub 64x1 */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xFB) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + vbi, pfx, delta+2, insn[1], "psubq", False ); + goto decode_success; + } + + /* 66 0F FB = PSUBQ */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xFB) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubq", Iop_Sub64x2, False ); + goto decode_success; + } + + /* 66 0F F9 = PSUBW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xF9) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubw", Iop_Sub16x8, False ); + goto decode_success; + } + + /* 66 0F E8 = PSUBSB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE8) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubsb", Iop_QSub8Sx16, False ); + goto decode_success; + } + + /* 66 0F E9 = PSUBSW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xE9) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubsw", Iop_QSub16Sx8, False ); + goto decode_success; + } + + /* 66 0F D8 = PSUBSB */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD8) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubusb", Iop_QSub8Ux16, False ); + goto decode_success; + } + + /* 66 0F D9 = PSUBSW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD9) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "psubusw", Iop_QSub16Ux8, False ); + goto decode_success; + } + + /* 66 0F 68 = PUNPCKHBW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x68) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpckhbw", + Iop_InterleaveHI8x16, True ); + goto decode_success; + } + + /* 66 0F 6A = PUNPCKHDQ */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x6A) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpckhdq", + Iop_InterleaveHI32x4, True ); + goto decode_success; + } + + /* 66 0F 6D = PUNPCKHQDQ */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x6D) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpckhqdq", + Iop_InterleaveHI64x2, True ); + goto decode_success; + } + + /* 66 0F 69 = PUNPCKHWD */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x69) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpckhwd", + Iop_InterleaveHI16x8, True ); + goto decode_success; + } + + /* 66 0F 60 = PUNPCKLBW */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x60) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpcklbw", + Iop_InterleaveLO8x16, True ); + goto decode_success; + } + + /* 66 0F 62 = PUNPCKLDQ */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x62) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpckldq", + Iop_InterleaveLO32x4, True ); + goto decode_success; + } + + /* 66 0F 6C = PUNPCKLQDQ */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x6C) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpcklqdq", + Iop_InterleaveLO64x2, True ); + goto decode_success; + } + + /* 66 0F 61 = PUNPCKLWD */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x61) { + delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, + "punpcklwd", + Iop_InterleaveLO16x8, True ); + goto decode_success; + } + + /* 66 0F EF = PXOR */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xEF) { + delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pxor", Iop_XorV128 ); + goto decode_success; + } + + //.. //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ + //.. //-- if (insn[0] == 0x0F && insn[1] == 0xAE + //.. //-- && (!epartIsReg(insn[2])) + //.. //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { + //.. //-- Bool store = gregOfRM(insn[2]) == 0; + //.. //-- vg_assert(sz == 4); + //.. //-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); + //.. //-- t1 = LOW24(pair); + //.. //-- eip += 2+HI8(pair); + //.. //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, + //.. //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], + //.. //-- Lit16, (UShort)insn[2], + //.. //-- TempReg, t1 ); + //.. //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); + //.. //-- goto decode_success; + //.. //-- } + + /* 0F AE /7 = CLFLUSH -- flush cache line */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7) { + + /* This is something of a hack. We need to know the size of the + cache line containing addr. Since we don't (easily), assume + 256 on the basis that no real cache would have a line that + big. It's safe to invalidate more stuff than we need, just + inefficient. */ + ULong lineszB = 256ULL; + + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + + /* Round addr down to the start of the containing block. */ + stmt( IRStmt_Put( + OFFB_TISTART, + binop( Iop_And64, + mkexpr(addr), + mkU64( ~(lineszB-1) ))) ); + + stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) ); + + irsb->jumpkind = Ijk_TInval; + irsb->next = mkU64(guest_RIP_bbstart+delta); + dres.whatNext = Dis_StopHere; + + DIP("clflush %s\n", dis_buf); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSE/SSE2 decoder. --- */ + /* ---------------------------------------------------- */ + + /* ---------------------------------------------------- */ + /* --- start of the SSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), + duplicating some lanes (2:2:0:0). */ + /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), + duplicating some lanes (3:3:1:1). */ + if (haveF3no66noF2(pfx) && sz == 4 + && insn[0] == 0x0F && (insn[1] == 0x12 || insn[1] == 0x16)) { + IRTemp s3, s2, s1, s0; + IRTemp sV = newTemp(Ity_V128); + Bool isH = insn[1] == 0x16; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); + DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + putXMMReg( gregOfRexRM(pfx,modrm), + isH ? mk128from32s( s3, s3, s1, s1 ) + : mk128from32s( s2, s2, s0, s0 ) ); + goto decode_success; + } + + /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), + duplicating some lanes (0:1:0:1). */ + if (haveF2no66noF3(pfx) + && (sz == 4 || /* ignore redundant REX.W */ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x12) { + IRTemp sV = newTemp(Ity_V128); + IRTemp d0 = newTemp(Ity_I64); + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); + DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movddup %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + + putXMMReg( gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); + goto decode_success; + } + + /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xD0) { + IRTemp a3, a2, a1, a0, s3, s2, s1, s0; + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp addV = newTemp(Ity_V128); + IRTemp subV = newTemp(Ity_V128); + a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); + DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("addsubps %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + + assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); + assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); + + breakup128to32s( addV, &a3, &a2, &a1, &a0 ); + breakup128to32s( subV, &s3, &s2, &s1, &s0 ); + + putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 )); + goto decode_success; + } + + /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0xD0) { + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp addV = newTemp(Ity_V128); + IRTemp subV = newTemp(Ity_V128); + IRTemp a1 = newTemp(Ity_I64); + IRTemp s0 = newTemp(Ity_I64); + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); + DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("addsubpd %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + + assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); + assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); + + assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); + assign( s0, unop(Iop_V128to64, mkexpr(subV) )); + + putXMMReg( gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); + goto decode_success; + } + + /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ + /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { + IRTemp e3, e2, e1, e0, g3, g2, g1, g0; + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp leftV = newTemp(Ity_V128); + IRTemp rightV = newTemp(Ity_V128); + Bool isAdd = insn[1] == 0x7C; + HChar* str = isAdd ? "add" : "sub"; + e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); + DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("h%sps %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + + assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + breakup128to32s( eV, &e3, &e2, &e1, &e0 ); + breakup128to32s( gV, &g3, &g2, &g1, &g0 ); + + assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); + assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); + + putXMMReg( gregOfRexRM(pfx,modrm), + binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, + mkexpr(leftV), mkexpr(rightV) ) ); + goto decode_success; + } + + /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ + /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { + IRTemp e1 = newTemp(Ity_I64); + IRTemp e0 = newTemp(Ity_I64); + IRTemp g1 = newTemp(Ity_I64); + IRTemp g0 = newTemp(Ity_I64); + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp leftV = newTemp(Ity_V128); + IRTemp rightV = newTemp(Ity_V128); + Bool isAdd = insn[1] == 0x7C; + HChar* str = isAdd ? "add" : "sub"; + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); + DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("h%spd %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + + assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); + assign( e0, unop(Iop_V128to64, mkexpr(eV) )); + assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); + assign( g0, unop(Iop_V128to64, mkexpr(gV) )); + + assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); + assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); + + putXMMReg( gregOfRexRM(pfx,modrm), + binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, + mkexpr(leftV), mkexpr(rightV) ) ); + goto decode_success; + } + + /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ + if (haveF2no66noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xF0) { + modrm = insn[2]; + if (epartIsReg(modrm)) { + goto decode_failure; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + putXMMReg( gregOfRexRM(pfx,modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("lddqu %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /* ---------------------------------------------------- */ + /* --- start of the SSSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and + Unsigned Bytes (MMX) */ + if (haveNo66noF2noF3(pfx) + && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + IRTemp sVoddsSX = newTemp(Ity_I64); + IRTemp sVevensSX = newTemp(Ity_I64); + IRTemp dVoddsZX = newTemp(Ity_I64); + IRTemp dVevensZX = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 3+1; + DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmaddubsw %s,%s\n", dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + /* compute dV unsigned x sV signed */ + assign( sVoddsSX, + binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); + assign( sVevensSX, + binop(Iop_SarN16x4, + binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), + mkU8(8)) ); + assign( dVoddsZX, + binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); + assign( dVevensZX, + binop(Iop_ShrN16x4, + binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), + mkU8(8)) ); + + putMMXReg( + gregLO3ofRM(modrm), + binop(Iop_QAdd16Sx4, + binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), + binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) + ) + ); + goto decode_success; + } + + /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and + Unsigned Bytes (XMM) */ + if (have66noF2noF3(pfx) + && (sz == 2 || /*redundant REX.W*/ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sVoddsSX = newTemp(Ity_V128); + IRTemp sVevensSX = newTemp(Ity_V128); + IRTemp dVoddsZX = newTemp(Ity_V128); + IRTemp dVevensZX = newTemp(Ity_V128); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 3+1; + DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmaddubsw %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + /* compute dV unsigned x sV signed */ + assign( sVoddsSX, + binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); + assign( sVevensSX, + binop(Iop_SarN16x8, + binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), + mkU8(8)) ); + assign( dVoddsZX, + binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); + assign( dVevensZX, + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), + mkU8(8)) ); + + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_QAdd16Sx8, + binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), + binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) + ) + ); + goto decode_success; + } + + /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ + /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or + mmx) and G to G (mmx). */ + /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or + mmx) and G to G (mmx). */ + /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G + to G (mmx). */ + + if (haveNo66noF2noF3(pfx) + && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 + || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { + HChar* str = "???"; + IROp opV64 = Iop_INVALID; + IROp opCatO = Iop_CatOddLanes16x4; + IROp opCatE = Iop_CatEvenLanes16x4; + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + + switch (insn[2]) { + case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; + case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; + case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; + case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; + case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; + case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; + default: vassert(0); + } + if (insn[2] == 0x02 || insn[2] == 0x06) { + opCatO = Iop_InterleaveHI32x2; + opCatE = Iop_InterleaveLO32x2; + } + + do_MMX_preamble(); + assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 3+1; + DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("ph%s %s,%s\n", str, dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + putMMXReg( + gregLO3ofRM(modrm), + binop(opV64, + binop(opCatE,mkexpr(sV),mkexpr(dV)), + binop(opCatO,mkexpr(sV),mkexpr(dV)) + ) + ); + goto decode_success; + } + + /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or + xmm) and G to G (xmm). */ + /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or + xmm) and G to G (xmm). */ + /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and + G to G (xmm). */ + + if (have66noF2noF3(pfx) + && (sz == 2 || /*redundant REX.W*/ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 + || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { + HChar* str = "???"; + IROp opV64 = Iop_INVALID; + IROp opCatO = Iop_CatOddLanes16x4; + IROp opCatE = Iop_CatEvenLanes16x4; + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + + modrm = insn[3]; + + switch (insn[2]) { + case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; + case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; + case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; + case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; + case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; + case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; + default: vassert(0); + } + if (insn[2] == 0x02 || insn[2] == 0x06) { + opCatO = Iop_InterleaveHI32x2; + opCatE = Iop_InterleaveLO32x2; + } + + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); + DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("ph%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + delta += 3+alen; + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + /* This isn't a particularly efficient way to compute the + result, but at least it avoids a proliferation of IROps, + hence avoids complication all the backends. */ + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, + binop(opV64, + binop(opCatE,mkexpr(sHi),mkexpr(sLo)), + binop(opCatO,mkexpr(sHi),mkexpr(sLo)) + ), + binop(opV64, + binop(opCatE,mkexpr(dHi),mkexpr(dLo)), + binop(opCatO,mkexpr(dHi),mkexpr(dLo)) + ) + ) + ); + goto decode_success; + } + + /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale + (MMX) */ + if (haveNo66noF2noF3(pfx) + && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 3+1; + DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmulhrsw %s,%s\n", dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + putMMXReg( + gregLO3ofRM(modrm), + dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) + ); + goto decode_success; + } + + /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and + Scale (XMM) */ + if (have66noF2noF3(pfx) + && (sz == 2 || /*redundant REX.W*/ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 3+1; + DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmulhrsw %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, + dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), + dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) + ) + ); + goto decode_success; + } + + /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ + /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ + /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ + if (haveNo66noF2noF3(pfx) + && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x08: laneszB = 1; str = "b"; break; + case 0x09: laneszB = 2; str = "w"; break; + case 0x0A: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 3+1; + DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("psign%s %s,%s\n", str, dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + putMMXReg( + gregLO3ofRM(modrm), + dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) + ); + goto decode_success; + } + + /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ + /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ + /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ + if (have66noF2noF3(pfx) + && (sz == 2 || /*redundant REX.W*/ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x08: laneszB = 1; str = "b"; break; + case 0x09: laneszB = 2; str = "w"; break; + case 0x0A: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 3+1; + DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("psign%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, + dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), + dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) + ) + ); + goto decode_success; + } + + /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ + /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ + /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ + if (haveNo66noF2noF3(pfx) + && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { + IRTemp sV = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x1C: laneszB = 1; str = "b"; break; + case 0x1D: laneszB = 2; str = "w"; break; + case 0x1E: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + do_MMX_preamble(); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 3+1; + DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pabs%s %s,%s\n", str, dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + putMMXReg( + gregLO3ofRM(modrm), + dis_PABS_helper( mkexpr(sV), laneszB ) + ); + goto decode_success; + } + + /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ + /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ + /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ + if (have66noF2noF3(pfx) + && (sz == 2 || /*redundant REX.W*/ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { + IRTemp sV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x1C: laneszB = 1; str = "b"; break; + case 0x1D: laneszB = 2; str = "w"; break; + case 0x1E: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 3+1; + DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pabs%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, + dis_PABS_helper( mkexpr(sHi), laneszB ), + dis_PABS_helper( mkexpr(sLo), laneszB ) + ) + ); + goto decode_success; + } + + /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + IRTemp res = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + d64 = (Long)insn[3+1]; + delta += 3+1+1; + DIP("palignr $%d,%s,%s\n", (Int)d64, + nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + d64 = (Long)insn[3+alen]; + delta += 3+alen+1; + DIP("palignr $%d%s,%s\n", (Int)d64, + dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + if (d64 == 0) { + assign( res, mkexpr(sV) ); + } + else if (d64 >= 1 && d64 <= 7) { + assign(res, + binop(Iop_Or64, + binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)), + binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64)) + ))); + } + else if (d64 == 8) { + assign( res, mkexpr(dV) ); + } + else if (d64 >= 9 && d64 <= 15) { + assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) ); + } + else if (d64 >= 16 && d64 <= 255) { + assign( res, mkU64(0) ); + } + else + vassert(0); + + putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); + goto decode_success; + } + + /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ + if (have66noF2noF3(pfx) + && (sz == 2 || /*redundant REX.W*/ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + d64 = (Long)insn[3+1]; + delta += 3+1+1; + DIP("palignr $%d,%s,%s\n", (Int)d64, + nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + d64 = (Long)insn[3+alen]; + delta += 3+alen+1; + DIP("palignr $%d,%s,%s\n", (Int)d64, + dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + if (d64 == 0) { + assign( rHi, mkexpr(sHi) ); + assign( rLo, mkexpr(sLo) ); + } + else if (d64 >= 1 && d64 <= 7) { + assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) ); + assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) ); + } + else if (d64 == 8) { + assign( rHi, mkexpr(dLo) ); + assign( rLo, mkexpr(sHi) ); + } + else if (d64 >= 9 && d64 <= 15) { + assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) ); + assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) ); + } + else if (d64 == 16) { + assign( rHi, mkexpr(dHi) ); + assign( rLo, mkexpr(dLo) ); + } + else if (d64 >= 17 && d64 <= 23) { + assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) ); + assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) ); + } + else if (d64 == 24) { + assign( rHi, mkU64(0) ); + assign( rLo, mkexpr(dHi) ); + } + else if (d64 >= 25 && d64 <= 31) { + assign( rHi, mkU64(0) ); + assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-24))) ); + } + else if (d64 >= 32 && d64 <= 255) { + assign( rHi, mkU64(0) ); + assign( rLo, mkU64(0) ); + } + else + vassert(0); + + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) + ); + goto decode_success; + } + + /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ + if (haveNo66noF2noF3(pfx) + && sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); + delta += 3+1; + DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), + nameMMXReg(gregLO3ofRM(modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pshufb %s,%s\n", dis_buf, + nameMMXReg(gregLO3ofRM(modrm))); + } + + putMMXReg( + gregLO3ofRM(modrm), + binop( + Iop_And64, + /* permute the lanes */ + binop( + Iop_Perm8x8, + mkexpr(dV), + binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) + ), + /* mask off lanes which have (index & 0x80) == 0x80 */ + unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) + ) + ); + goto decode_success; + } + + /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ + if (have66noF2noF3(pfx) + && (sz == 2 || /*redundant REX.W*/ sz == 8) + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + IRTemp sevens = newTemp(Ity_I64); + IRTemp mask0x80hi = newTemp(Ity_I64); + IRTemp mask0x80lo = newTemp(Ity_I64); + IRTemp maskBit3hi = newTemp(Ity_I64); + IRTemp maskBit3lo = newTemp(Ity_I64); + IRTemp sAnd7hi = newTemp(Ity_I64); + IRTemp sAnd7lo = newTemp(Ity_I64); + IRTemp permdHi = newTemp(Ity_I64); + IRTemp permdLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 3+1; + DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pshufb %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + assign( sevens, mkU64(0x0707070707070707ULL) ); + + /* + mask0x80hi = Not(SarN8x8(sHi,7)) + maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) + sAnd7hi = And(sHi,sevens) + permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), + And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) + rHi = And(permdHi,mask0x80hi) + */ + assign( + mask0x80hi, + unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); + + assign( + maskBit3hi, + binop(Iop_SarN8x8, + binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), + mkU8(7))); + + assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); + + assign( + permdHi, + binop( + Iop_Or64, + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), + mkexpr(maskBit3hi)), + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), + unop(Iop_Not64,mkexpr(maskBit3hi))) )); + + assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); + + /* And the same for the lower half of the result. What fun. */ + + assign( + mask0x80lo, + unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); + + assign( + maskBit3lo, + binop(Iop_SarN8x8, + binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), + mkU8(7))); + + assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); + + assign( + permdLo, + binop( + Iop_Or64, + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), + mkexpr(maskBit3lo)), + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), + unop(Iop_Not64,mkexpr(maskBit3lo))) )); + + assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); + + putXMMReg( + gregOfRexRM(pfx,modrm), + binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) + ); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /*after_sse_decoders:*/ + + /* Get the primary opcode. */ + opc = getUChar(delta); delta++; + + /* We get here if the current insn isn't SSE, or this CPU doesn't + support SSE. */ + + switch (opc) { + + /* ------------------------ Control flow --------------- */ + + case 0xC2: /* RET imm16 */ + if (have66orF2orF3(pfx)) goto decode_failure; + d64 = getUDisp16(delta); + delta += 2; + dis_ret(vbi, d64); + dres.whatNext = Dis_StopHere; + DIP("ret %lld\n", d64); + break; + + case 0xC3: /* RET */ + if (have66orF2(pfx)) goto decode_failure; + /* F3 is acceptable on AMD. */ + dis_ret(vbi, 0); + dres.whatNext = Dis_StopHere; + DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); + break; + + case 0xE8: /* CALL J4 */ + if (haveF2orF3(pfx)) goto decode_failure; + d64 = getSDisp32(delta); delta += 4; + d64 += (guest_RIP_bbstart+delta); + /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ + t1 = newTemp(Ity_I64); + assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); + putIReg64(R_RSP, mkexpr(t1)); + storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta)); + t2 = newTemp(Ity_I64); + assign(t2, mkU64((Addr64)d64)); + make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32"); + if (resteerOkFn( callback_opaque, (Addr64)d64) ) { + /* follow into the call target. */ + dres.whatNext = Dis_Resteer; + dres.continueAt = d64; + } else { + jmp_lit(Ijk_Call,d64); + dres.whatNext = Dis_StopHere; + } + DIP("call 0x%llx\n",d64); + break; + + //.. //-- case 0xC8: /* ENTER */ + //.. //-- d32 = getUDisp16(eip); eip += 2; + //.. //-- abyte = getUChar(delta); delta++; + //.. //-- + //.. //-- vg_assert(sz == 4); + //.. //-- vg_assert(abyte == 0); + //.. //-- + //.. //-- t1 = newTemp(cb); t2 = newTemp(cb); + //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); + //.. //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); + //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); + //.. //-- uLiteral(cb, sz); + //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); + //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); + //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); + //.. //-- if (d32) { + //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); + //.. //-- uLiteral(cb, d32); + //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); + //.. //-- } + //.. //-- DIP("enter 0x%x, 0x%x", d32, abyte); + //.. //-- break; + + case 0xC9: /* LEAVE */ + /* In 64-bit mode this defaults to a 64-bit operand size. There + is no way to encode a 32-bit variant. Hence sz==4 but we do + it as if sz=8. */ + if (sz != 4) + goto decode_failure; + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign(t1, getIReg64(R_RBP)); + /* First PUT RSP looks redundant, but need it because RSP must + always be up-to-date for Memcheck to work... */ + putIReg64(R_RSP, mkexpr(t1)); + assign(t2, loadLE(Ity_I64,mkexpr(t1))); + putIReg64(R_RBP, mkexpr(t2)); + putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) ); + DIP("leave\n"); + break; + + //.. //-- /* ---------------- Misc weird-ass insns --------------- */ + //.. //-- + //.. //-- case 0x27: /* DAA */ + //.. //-- case 0x2F: /* DAS */ + //.. //-- t1 = newTemp(cb); + //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t1); + //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */ + //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); + //.. //-- uWiden(cb, 1, False); + //.. //-- uInstr0(cb, CALLM_S, 0); + //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); + //.. //-- uInstr1(cb, CALLM, 0, Lit16, + //.. //-- opc == 0x27 ? VGOFF_(helper_DAA) : VGOFF_(helper_DAS) ); + //.. //-- uFlagsRWU(cb, FlagsAC, FlagsSZACP, FlagO); + //.. //-- uInstr1(cb, POP, 4, TempReg, t1); + //.. //-- uInstr0(cb, CALLM_E, 0); + //.. //-- uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, R_AL); + //.. //-- DIP(opc == 0x27 ? "daa\n" : "das\n"); + //.. //-- break; + //.. //-- + //.. //-- case 0x37: /* AAA */ + //.. //-- case 0x3F: /* AAS */ + //.. //-- t1 = newTemp(cb); + //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); + //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */ + //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); + //.. //-- uWiden(cb, 2, False); + //.. //-- uInstr0(cb, CALLM_S, 0); + //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); + //.. //-- uInstr1(cb, CALLM, 0, Lit16, + //.. //-- opc == 0x37 ? VGOFF_(helper_AAA) : VGOFF_(helper_AAS) ); + //.. //-- uFlagsRWU(cb, FlagA, FlagsAC, FlagsEmpty); + //.. //-- uInstr1(cb, POP, 4, TempReg, t1); + //.. //-- uInstr0(cb, CALLM_E, 0); + //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); + //.. //-- DIP(opc == 0x37 ? "aaa\n" : "aas\n"); + //.. //-- break; + //.. //-- + //.. //-- case 0xD4: /* AAM */ + //.. //-- case 0xD5: /* AAD */ + //.. //-- d32 = getUChar(delta); delta++; + //.. //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !"); + //.. //-- t1 = newTemp(cb); + //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); + //.. //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */ + //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); + //.. //-- uWiden(cb, 2, False); + //.. //-- uInstr0(cb, CALLM_S, 0); + //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); + //.. //-- uInstr1(cb, CALLM, 0, Lit16, + //.. //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) ); + //.. //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty); + //.. //-- uInstr1(cb, POP, 4, TempReg, t1); + //.. //-- uInstr0(cb, CALLM_E, 0); + //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); + //.. //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n"); + //.. //-- break; + + /* ------------------------ CWD/CDQ -------------------- */ + + case 0x98: /* CBW */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz == 8) { + putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) ); + DIP(/*"cdqe\n"*/"cltq"); + break; + } + if (sz == 4) { + putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) ); + DIP("cwtl\n"); + break; + } + if (sz == 2) { + putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) ); + DIP("cbw\n"); + break; + } + goto decode_failure; + + case 0x99: /* CWD/CDQ/CQO */ + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 2 || sz == 4 || sz == 8); + ty = szToITy(sz); + putIRegRDX( sz, + binop(mkSizedOp(ty,Iop_Sar8), + getIRegRAX(sz), + mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) ); + DIP(sz == 2 ? "cwd\n" + : (sz == 4 ? /*"cdq\n"*/ "cltd\n" + : "cqo\n")); + break; + + /* ------------------------ FPU ops -------------------- */ + + case 0x9E: /* SAHF */ + codegen_SAHF(); + DIP("sahf\n"); + break; + + case 0x9F: /* LAHF */ + codegen_LAHF(); + DIP("lahf\n"); + break; + + case 0x9B: /* FWAIT */ + /* ignore? */ + DIP("fwait\n"); + break; + + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: { + Bool redundantREXWok = False; + + if (haveF2orF3(pfx)) + goto decode_failure; + + /* kludge to tolerate redundant rex.w prefixes (should do this + properly one day) */ + /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */ + if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ ) + redundantREXWok = True; + + if ( (sz == 4 + || (sz == 8 && redundantREXWok)) + && haveNo66noF2noF3(pfx)) { + Long delta0 = delta; + Bool decode_OK = False; + delta = dis_FPU ( &decode_OK, vbi, pfx, delta ); + if (!decode_OK) { + delta = delta0; + goto decode_failure; + } + break; + } else { + goto decode_failure; + } + } + + /* ------------------------ INT ------------------------ */ + + case 0xCC: /* INT 3 */ + jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta); + dres.whatNext = Dis_StopHere; + DIP("int $0x3\n"); + break; + + case 0xCD: { /* INT imm8 */ + IRJumpKind jk = Ijk_Boring; + if (have66orF2orF3(pfx)) goto decode_failure; + d64 = getUChar(delta); delta++; + switch (d64) { + case 32: jk = Ijk_Sys_int32; break; + default: goto decode_failure; + } + guest_RIP_next_mustcheck = True; + guest_RIP_next_assumed = guest_RIP_bbstart + delta; + jmp_lit(jk, guest_RIP_next_assumed); + /* It's important that all ArchRegs carry their up-to-date value + at this point. So we declare an end-of-block here, which + forces any TempRegs caching ArchRegs to be flushed. */ + dres.whatNext = Dis_StopHere; + DIP("int $0x%02x\n", (UInt)d64); + break; + } + + /* ------------------------ Jcond, byte offset --------- */ + + case 0xEB: /* Jb (jump, byte offset) */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 4) + goto decode_failure; /* JRS added 2004 July 11 */ + d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); + delta++; + if (resteerOkFn(callback_opaque,d64)) { + dres.whatNext = Dis_Resteer; + dres.continueAt = d64; + } else { + jmp_lit(Ijk_Boring,d64); + dres.whatNext = Dis_StopHere; + } + DIP("jmp-8 0x%llx\n", d64); + break; + + case 0xE9: /* Jv (jump, 16/32 offset) */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 4) + goto decode_failure; /* JRS added 2004 July 11 */ + d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); + delta += sz; + if (resteerOkFn(callback_opaque,d64)) { + dres.whatNext = Dis_Resteer; + dres.continueAt = d64; + } else { + jmp_lit(Ijk_Boring,d64); + dres.whatNext = Dis_StopHere; + } + DIP("jmp 0x%llx\n", d64); + break; + + case 0x70: + case 0x71: + case 0x72: /* JBb/JNAEb (jump below) */ + case 0x73: /* JNBb/JAEb (jump not below) */ + case 0x74: /* JZb/JEb (jump zero) */ + case 0x75: /* JNZb/JNEb (jump not zero) */ + case 0x76: /* JBEb/JNAb (jump below or equal) */ + case 0x77: /* JNBEb/JAb (jump not below or equal) */ + case 0x78: /* JSb (jump negative) */ + case 0x79: /* JSb (jump not negative) */ + case 0x7A: /* JP (jump parity even) */ + case 0x7B: /* JNP/JPO (jump parity odd) */ + case 0x7C: /* JLb/JNGEb (jump less) */ + case 0x7D: /* JGEb/JNLb (jump greater or equal) */ + case 0x7E: /* JLEb/JNGb (jump less or equal) */ + case 0x7F: /* JGb/JNLEb (jump greater) */ + if (haveF2orF3(pfx)) goto decode_failure; + d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); + delta++; + jcc_01( (AMD64Condcode)(opc - 0x70), + guest_RIP_bbstart+delta, + d64 ); + dres.whatNext = Dis_StopHere; + DIP("j%s-8 0x%llx\n", name_AMD64Condcode(opc - 0x70), d64); + break; + + case 0xE3: + /* JRCXZ or JECXZ, depending address size override. */ + if (have66orF2orF3(pfx)) goto decode_failure; + d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); + delta++; + if (haveASO(pfx)) { + /* 32-bit */ + stmt( IRStmt_Exit( binop(Iop_CmpEQ64, + unop(Iop_32Uto64, getIReg32(R_RCX)), + mkU64(0)), + Ijk_Boring, + IRConst_U64(d64)) + ); + DIP("jecxz 0x%llx\n", d64); + } else { + /* 64-bit */ + stmt( IRStmt_Exit( binop(Iop_CmpEQ64, + getIReg64(R_RCX), + mkU64(0)), + Ijk_Boring, + IRConst_U64(d64)) + ); + DIP("jrcxz 0x%llx\n", d64); + } + break; + + case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ + case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ + case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ + { /* The docs say this uses rCX as a count depending on the + address size override, not the operand one. Since we don't + handle address size overrides, I guess that means RCX. */ + IRExpr* zbit = NULL; + IRExpr* count = NULL; + IRExpr* cond = NULL; + HChar* xtra = NULL; + + if (have66orF2orF3(pfx) || haveASO(pfx)) goto decode_failure; + d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta); + delta++; + putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1))); + + count = getIReg64(R_RCX); + cond = binop(Iop_CmpNE64, count, mkU64(0)); + switch (opc) { + case 0xE2: + xtra = ""; + break; + case 0xE1: + xtra = "e"; + zbit = mk_amd64g_calculate_condition( AMD64CondZ ); + cond = mkAnd1(cond, zbit); + break; + case 0xE0: + xtra = "ne"; + zbit = mk_amd64g_calculate_condition( AMD64CondNZ ); + cond = mkAnd1(cond, zbit); + break; + default: + vassert(0); + } + stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) ); + + DIP("loop%s 0x%llx\n", xtra, d64); + break; + } + + /* ------------------------ IMUL ----------------------- */ + + case 0x69: /* IMUL Iv, Ev, Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz ); + break; + case 0x6B: /* IMUL Ib, Ev, Gv */ + delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 ); + break; + + /* ------------------------ MOV ------------------------ */ + + case 0x88: /* MOV Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_mov_G_E(vbi, pfx, 1, delta); + break; + + case 0x89: /* MOV Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_mov_G_E(vbi, pfx, sz, delta); + break; + + case 0x8A: /* MOV Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_mov_E_G(vbi, pfx, 1, delta); + break; + + case 0x8B: /* MOV Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_mov_E_G(vbi, pfx, sz, delta); + break; + + case 0x8D: /* LEA M,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 4 && sz != 8) + goto decode_failure; + modrm = getUChar(delta); + if (epartIsReg(modrm)) + goto decode_failure; + /* NOTE! this is the one place where a segment override prefix + has no effect on the address calculation. Therefore we clear + any segment override bits in pfx. */ + addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 ); + delta += alen; + /* This is a hack. But it isn't clear that really doing the + calculation at 32 bits is really worth it. Hence for leal, + do the full 64-bit calculation and then truncate it. */ + putIRegG( sz, pfx, modrm, + sz == 4 + ? unop(Iop_64to32, mkexpr(addr)) + : mkexpr(addr) + ); + DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, + nameIRegG(sz,pfx,modrm)); + break; + + //.. case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ + //.. delta = dis_mov_Sw_Ew(sorb, sz, delta); + //.. break; + //.. + //.. case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ + //.. delta = dis_mov_Ew_Sw(sorb, delta); + //.. break; + + case 0xA0: /* MOV Ob,AL */ + if (have66orF2orF3(pfx)) goto decode_failure; + sz = 1; + /* Fall through ... */ + case 0xA1: /* MOV Ov,eAX */ + if (sz != 8 && sz != 4 && sz != 2 && sz != 1) + goto decode_failure; + d64 = getDisp64(delta); + delta += 8; + ty = szToITy(sz); + addr = newTemp(Ity_I64); + assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); + putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); + DIP("mov%c %s0x%llx, %s\n", nameISize(sz), + segRegTxt(pfx), d64, + nameIRegRAX(sz)); + break; + + case 0xA2: /* MOV AL,Ob */ + if (have66orF2orF3(pfx)) goto decode_failure; + sz = 1; + /* Fall through ... */ + case 0xA3: /* MOV eAX,Ov */ + if (sz != 8 && sz != 4 && sz != 2 && sz != 1) + goto decode_failure; + d64 = getDisp64(delta); + delta += 8; + ty = szToITy(sz); + addr = newTemp(Ity_I64); + assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); + storeLE( mkexpr(addr), getIRegRAX(sz) ); + DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), + segRegTxt(pfx), d64); + break; + + /* XXXX be careful here with moves to AH/BH/CH/DH */ + case 0xB0: /* MOV imm,AL */ + case 0xB1: /* MOV imm,CL */ + case 0xB2: /* MOV imm,DL */ + case 0xB3: /* MOV imm,BL */ + case 0xB4: /* MOV imm,AH */ + case 0xB5: /* MOV imm,CH */ + case 0xB6: /* MOV imm,DH */ + case 0xB7: /* MOV imm,BH */ + if (haveF2orF3(pfx)) goto decode_failure; + d64 = getUChar(delta); + delta += 1; + putIRegRexB(1, pfx, opc-0xB0, mkU8(d64)); + DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0)); + break; + + case 0xB8: /* MOV imm,eAX */ + case 0xB9: /* MOV imm,eCX */ + case 0xBA: /* MOV imm,eDX */ + case 0xBB: /* MOV imm,eBX */ + case 0xBC: /* MOV imm,eSP */ + case 0xBD: /* MOV imm,eBP */ + case 0xBE: /* MOV imm,eSI */ + case 0xBF: /* MOV imm,eDI */ + /* This is the one-and-only place where 64-bit literals are + allowed in the instruction stream. */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz == 8) { + d64 = getDisp64(delta); + delta += 8; + putIRegRexB(8, pfx, opc-0xB8, mkU64(d64)); + DIP("movabsq $%lld,%s\n", (Long)d64, + nameIRegRexB(8,pfx,opc-0xB8)); + } else { + d64 = getSDisp(imin(4,sz),delta); + delta += imin(4,sz); + putIRegRexB(sz, pfx, opc-0xB8, + mkU(szToITy(sz), d64 & mkSizeMask(sz))); + DIP("mov%c $%lld,%s\n", nameISize(sz), + (Long)d64, + nameIRegRexB(sz,pfx,opc-0xB8)); + } + break; + + case 0xC6: /* MOV Ib,Eb */ + sz = 1; + goto do_Mov_I_E; + case 0xC7: /* MOV Iv,Ev */ + goto do_Mov_I_E; + + do_Mov_I_E: + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; /* mod/rm byte */ + d64 = getSDisp(imin(4,sz),delta); + delta += imin(4,sz); + putIRegE(sz, pfx, modrm, + mkU(szToITy(sz), d64 & mkSizeMask(sz))); + DIP("mov%c $%lld, %s\n", nameISize(sz), + (Long)d64, + nameIRegE(sz,pfx,modrm)); + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, + /*xtra*/imin(4,sz) ); + delta += alen; + d64 = getSDisp(imin(4,sz),delta); + delta += imin(4,sz); + storeLE(mkexpr(addr), + mkU(szToITy(sz), d64 & mkSizeMask(sz))); + DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf); + } + break; + + /* ------------------------ MOVx ------------------------ */ + + case 0x63: /* MOVSX */ + if (haveF2orF3(pfx)) goto decode_failure; + if (haveREX(pfx) && 1==getRexW(pfx)) { + vassert(sz == 8); + /* movsx r/m32 to r64 */ + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; + putIRegG(8, pfx, modrm, + unop(Iop_32Sto64, + getIRegE(4, pfx, modrm))); + DIP("movslq %s,%s\n", + nameIRegE(4, pfx, modrm), + nameIRegG(8, pfx, modrm)); + break; + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + putIRegG(8, pfx, modrm, + unop(Iop_32Sto64, + loadLE(Ity_I32, mkexpr(addr)))); + DIP("movslq %s,%s\n", dis_buf, + nameIRegG(8, pfx, modrm)); + break; + } + } else { + goto decode_failure; + } + + /* ------------------------ opl imm, A ----------------- */ + + case 0x04: /* ADD Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); + break; + case 0x05: /* ADD Iv, eAX */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" ); + break; + + case 0x0C: /* OR Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); + break; + case 0x0D: /* OR Iv, eAX */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); + break; + + case 0x14: /* ADC Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); + break; + //.. //-- case 0x15: /* ADC Iv, eAX */ + //.. //-- delta = dis_op_imm_A( sz, ADC, True, delta, "adc" ); + //.. //-- break; + + case 0x1C: /* SBB Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); + break; + //.. //-- case 0x1D: /* SBB Iv, eAX */ + //.. //-- delta = dis_op_imm_A( sz, SBB, True, delta, "sbb" ); + //.. //-- break; + //.. //-- + case 0x24: /* AND Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); + break; + case 0x25: /* AND Iv, eAX */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); + break; + + case 0x2C: /* SUB Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" ); + break; + case 0x2D: /* SUB Iv, eAX */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); + break; + + case 0x34: /* XOR Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); + break; + case 0x35: /* XOR Iv, eAX */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); + break; + + case 0x3C: /* CMP Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); + break; + case 0x3D: /* CMP Iv, eAX */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); + break; + + case 0xA8: /* TEST Ib, AL */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); + break; + case 0xA9: /* TEST Iv, eAX */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); + break; + + /* ------------------------ opl Ev, Gv ----------------- */ + + case 0x02: /* ADD Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); + break; + case 0x03: /* ADD Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); + break; + + case 0x0A: /* OR Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); + break; + case 0x0B: /* OR Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); + break; + + case 0x12: /* ADC Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); + break; + case 0x13: /* ADC Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); + break; + + //.. //-- case 0x1A: /* SBB Eb,Gb */ + //.. //-- delta = dis_op2_E_G ( sorb, True, SBB, True, 1, delta, "sbb" ); + //.. //-- break; + case 0x1B: /* SBB Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); + break; + + case 0x22: /* AND Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); + break; + case 0x23: /* AND Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); + break; + + case 0x2A: /* SUB Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); + break; + case 0x2B: /* SUB Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); + break; + + case 0x32: /* XOR Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); + break; + case 0x33: /* XOR Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); + break; + + case 0x3A: /* CMP Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); + break; + case 0x3B: /* CMP Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); + break; + + case 0x84: /* TEST Eb,Gb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" ); + break; + case 0x85: /* TEST Ev,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" ); + break; + + /* ------------------------ opl Gv, Ev ----------------- */ + + case 0x00: /* ADD Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); + break; + case 0x01: /* ADD Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); + break; + + case 0x08: /* OR Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); + break; + case 0x09: /* OR Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); + break; + + case 0x10: /* ADC Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); + break; + case 0x11: /* ADC Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); + break; + + case 0x18: /* SBB Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); + break; + case 0x19: /* SBB Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); + break; + + case 0x20: /* AND Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); + break; + case 0x21: /* AND Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); + break; + + case 0x28: /* SUB Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); + break; + case 0x29: /* SUB Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); + break; + + case 0x30: /* XOR Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); + break; + case 0x31: /* XOR Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); + break; + + case 0x38: /* CMP Gb,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); + break; + case 0x39: /* CMP Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); + break; + + /* ------------------------ POP ------------------------ */ + + case 0x58: /* POP eAX */ + case 0x59: /* POP eCX */ + case 0x5A: /* POP eDX */ + case 0x5B: /* POP eBX */ + case 0x5D: /* POP eBP */ + case 0x5E: /* POP eSI */ + case 0x5F: /* POP eDI */ + case 0x5C: /* POP eSP */ + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 2 || sz == 4 || sz == 8); + if (sz == 4) + sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */ + t1 = newTemp(szToITy(sz)); + t2 = newTemp(Ity_I64); + assign(t2, getIReg64(R_RSP)); + assign(t1, loadLE(szToITy(sz),mkexpr(t2))); + putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); + putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1)); + DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58)); + break; + + case 0x9D: /* POPF */ + /* Note. There is no encoding for a 32-bit popf in 64-bit mode. + So sz==4 actually means sz==8. */ + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 2 || sz == 4); + if (sz == 4) sz = 8; + if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists + t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64); + assign(t2, getIReg64(R_RSP)); + assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2)))); + putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); + /* t1 is the flag word. Mask out everything except OSZACP and + set the flags thunk to AMD64G_CC_OP_COPY. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, + binop(Iop_And64, + mkexpr(t1), + mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P + | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z + | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O ) + ) + ) + ); + + /* Also need to set the D flag, which is held in bit 10 of t1. + If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ + stmt( IRStmt_Put( + OFFB_DFLAG, + IRExpr_Mux0X( + unop(Iop_32to8, + unop(Iop_64to32, + binop(Iop_And64, + binop(Iop_Shr64, mkexpr(t1), mkU8(10)), + mkU64(1)))), + mkU64(1), + mkU64(0xFFFFFFFFFFFFFFFFULL))) + ); + + /* And set the ID flag */ + stmt( IRStmt_Put( + OFFB_IDFLAG, + IRExpr_Mux0X( + unop(Iop_32to8, + unop(Iop_64to32, + binop(Iop_And64, + binop(Iop_Shr64, mkexpr(t1), mkU8(21)), + mkU64(1)))), + mkU64(0), + mkU64(1))) + ); + + DIP("popf%c\n", nameISize(sz)); + break; + + //.. case 0x61: /* POPA */ + //.. /* This is almost certainly wrong for sz==2. So ... */ + //.. if (sz != 4) goto decode_failure; + //.. + //.. /* t5 is the old %ESP value. */ + //.. t5 = newTemp(Ity_I32); + //.. assign( t5, getIReg(4, R_ESP) ); + //.. + //.. /* Reload all the registers, except %esp. */ + //.. putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); + //.. putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); + //.. putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); + //.. putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); + //.. /* ignore saved %ESP */ + //.. putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); + //.. putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); + //.. putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); + //.. + //.. /* and move %ESP back up */ + //.. putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); + //.. + //.. DIP("pusha%c\n", nameISize(sz)); + //.. break; + + case 0x8F: { /* POPQ m64 / POPW m16 */ + Int len; + UChar rm; + /* There is no encoding for 32-bit pop in 64-bit mode. + So sz==4 actually means sz==8. */ + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 2 || sz == 4); + if (sz == 4) sz = 8; + if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists + + rm = getUChar(delta); + + /* make sure this instruction is correct POP */ + if (epartIsReg(rm) || gregLO3ofRM(rm) != 0) + goto decode_failure; + /* and has correct size */ + vassert(sz == 8); + + t1 = newTemp(Ity_I64); + t3 = newTemp(Ity_I64); + assign( t1, getIReg64(R_RSP) ); + assign( t3, loadLE(Ity_I64, mkexpr(t1)) ); + + /* Increase RSP; must be done before the STORE. Intel manual + says: If the RSP register is used as a base register for + addressing a destination operand in memory, the POP + instruction computes the effective address of the operand + after it increments the RSP register. */ + putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) ); + + addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); + storeLE( mkexpr(addr), mkexpr(t3) ); + + DIP("popl %s\n", dis_buf); + + delta += len; + break; + } + + //.. //-- case 0x1F: /* POP %DS */ + //.. //-- dis_pop_segreg( cb, R_DS, sz ); break; + //.. //-- case 0x07: /* POP %ES */ + //.. //-- dis_pop_segreg( cb, R_ES, sz ); break; + //.. //-- case 0x17: /* POP %SS */ + //.. //-- dis_pop_segreg( cb, R_SS, sz ); break; + + /* ------------------------ PUSH ----------------------- */ + + case 0x50: /* PUSH eAX */ + case 0x51: /* PUSH eCX */ + case 0x52: /* PUSH eDX */ + case 0x53: /* PUSH eBX */ + case 0x55: /* PUSH eBP */ + case 0x56: /* PUSH eSI */ + case 0x57: /* PUSH eDI */ + case 0x54: /* PUSH eSP */ + /* This is the Right Way, in that the value to be pushed is + established before %rsp is changed, so that pushq %rsp + correctly pushes the old value. */ + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 2 || sz == 4 || sz == 8); + if (sz == 4) + sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */ + ty = sz==2 ? Ity_I16 : Ity_I64; + t1 = newTemp(ty); + t2 = newTemp(Ity_I64); + assign(t1, getIRegRexB(sz, pfx, opc-0x50)); + assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz))); + putIReg64(R_RSP, mkexpr(t2) ); + storeLE(mkexpr(t2),mkexpr(t1)); + DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50)); + break; + + case 0x68: /* PUSH Iv */ + if (haveF2orF3(pfx)) goto decode_failure; + /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ + if (sz == 4) sz = 8; + d64 = getSDisp(imin(4,sz),delta); + delta += imin(4,sz); + goto do_push_I; + case 0x6A: /* PUSH Ib, sign-extended to sz */ + if (haveF2orF3(pfx)) goto decode_failure; + /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ + if (sz == 4) sz = 8; + d64 = getSDisp8(delta); delta += 1; + goto do_push_I; + do_push_I: + ty = szToITy(sz); + t1 = newTemp(Ity_I64); + t2 = newTemp(ty); + assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); + putIReg64(R_RSP, mkexpr(t1) ); + /* stop mkU16 asserting if d32 is a negative 16-bit number + (bug #132813) */ + if (ty == Ity_I16) + d64 &= 0xFFFF; + storeLE( mkexpr(t1), mkU(ty,d64) ); + DIP("push%c $%lld\n", nameISize(sz), (Long)d64); + break; + + case 0x9C: /* PUSHF */ { + /* Note. There is no encoding for a 32-bit pushf in 64-bit + mode. So sz==4 actually means sz==8. */ + /* 24 July 06: has also been seen with a redundant REX prefix, + so must also allow sz==8. */ + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 2 || sz == 4 || sz == 8); + if (sz == 4) sz = 8; + if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists + + t1 = newTemp(Ity_I64); + assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); + putIReg64(R_RSP, mkexpr(t1) ); + + t2 = newTemp(Ity_I64); + assign( t2, mk_amd64g_calculate_rflags_all() ); + + /* Patch in the D flag. This can simply be a copy of bit 10 of + baseBlock[OFFB_DFLAG]. */ + t3 = newTemp(Ity_I64); + assign( t3, binop(Iop_Or64, + mkexpr(t2), + binop(Iop_And64, + IRExpr_Get(OFFB_DFLAG,Ity_I64), + mkU64(1<<10))) + ); + + /* And patch in the ID flag. */ + t4 = newTemp(Ity_I64); + assign( t4, binop(Iop_Or64, + mkexpr(t3), + binop(Iop_And64, + binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64), + mkU8(21)), + mkU64(1<<21))) + ); + + /* if sz==2, the stored value needs to be narrowed. */ + if (sz == 2) + storeLE( mkexpr(t1), unop(Iop_32to16, + unop(Iop_64to32,mkexpr(t4))) ); + else + storeLE( mkexpr(t1), mkexpr(t4) ); + + DIP("pushf%c\n", nameISize(sz)); + break; + } + + //.. case 0x60: /* PUSHA */ + //.. /* This is almost certainly wrong for sz==2. So ... */ + //.. if (sz != 4) goto decode_failure; + //.. + //.. /* This is the Right Way, in that the value to be pushed is + //.. established before %esp is changed, so that pusha + //.. correctly pushes the old %esp value. New value of %esp is + //.. pushed at start. */ + //.. /* t0 is the %ESP value we're going to push. */ + //.. t0 = newTemp(Ity_I32); + //.. assign( t0, getIReg(4, R_ESP) ); + //.. + //.. /* t5 will be the new %ESP value. */ + //.. t5 = newTemp(Ity_I32); + //.. assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); + //.. + //.. /* Update guest state before prodding memory. */ + //.. putIReg(4, R_ESP, mkexpr(t5)); + //.. + //.. /* Dump all the registers. */ + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); + //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); + //.. + //.. DIP("pusha%c\n", nameISize(sz)); + //.. break; + //.. + //.. + //.. //-- case 0x0E: /* PUSH %CS */ + //.. //-- dis_push_segreg( cb, R_CS, sz ); break; + //.. //-- case 0x1E: /* PUSH %DS */ + //.. //-- dis_push_segreg( cb, R_DS, sz ); break; + //.. //-- case 0x06: /* PUSH %ES */ + //.. //-- dis_push_segreg( cb, R_ES, sz ); break; + //.. //-- case 0x16: /* PUSH %SS */ + //.. //-- dis_push_segreg( cb, R_SS, sz ); break; + //.. + //.. /* ------------------------ SCAS et al ----------------- */ + //.. + //.. case 0xA4: /* MOVS, no REP prefix */ + //.. case 0xA5: + //.. dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); + //.. break; + //.. + //.. case 0xA6: /* CMPSb, no REP prefix */ + //.. //-- case 0xA7: + //.. dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); + //.. break; + //.. //-- + //.. //-- + case 0xAC: /* LODS, no REP prefix */ + case 0xAD: + dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx ); + break; + //.. + //.. case 0xAE: /* SCAS, no REP prefix */ + //.. case 0xAF: + //.. dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); + //.. break; + + + case 0xFC: /* CLD */ + if (haveF2orF3(pfx)) goto decode_failure; + stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) ); + DIP("cld\n"); + break; + + case 0xFD: /* STD */ + if (haveF2orF3(pfx)) goto decode_failure; + stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) ); + DIP("std\n"); + break; + + case 0xF8: /* CLC */ + case 0xF9: /* STC */ + case 0xF5: /* CMC */ + t0 = newTemp(Ity_I64); + t1 = newTemp(Ity_I64); + assign( t0, mk_amd64g_calculate_rflags_all() ); + switch (opc) { + case 0xF8: + assign( t1, binop(Iop_And64, mkexpr(t0), + mkU64(~AMD64G_CC_MASK_C))); + DIP("clc\n"); + break; + case 0xF9: + assign( t1, binop(Iop_Or64, mkexpr(t0), + mkU64(AMD64G_CC_MASK_C))); + DIP("stc\n"); + break; + case 0xF5: + assign( t1, binop(Iop_Xor64, mkexpr(t0), + mkU64(AMD64G_CC_MASK_C))); + DIP("cmc\n"); + break; + default: + vpanic("disInstr(x64)(clc/stc/cmc)"); + } + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); + break; + + //.. /* REPNE prefix insn */ + //.. case 0xF2: { + //.. Addr32 eip_orig = guest_eip_bbstart + delta - 1; + //.. vassert(sorb == 0); + //.. abyte = getUChar(delta); delta++; + //.. + //.. if (abyte == 0x66) { sz = 2; abyte = getUChar(delta); delta++; } + //.. whatNext = Dis_StopHere; + //.. + //.. switch (abyte) { + //.. /* According to the Intel manual, "repne movs" should never occur, but + //.. * in practice it has happened, so allow for it here... */ + //.. case 0xA4: sz = 1; /* REPNE MOVS */ + //.. goto decode_failure; + //.. //-- case 0xA5: + //.. // dis_REP_op ( CondNZ, dis_MOVS, sz, eip_orig, + //.. // guest_eip_bbstart+delta, "repne movs" ); + //.. // break; + //.. //-- + //.. //-- case 0xA6: sz = 1; /* REPNE CMPS */ + //.. //-- case 0xA7: + //.. //-- dis_REP_op ( cb, CondNZ, dis_CMPS, sz, eip_orig, eip, "repne cmps" ); + //.. //-- break; + //.. //-- + //.. case 0xAE: sz = 1; /* REPNE SCAS */ + //.. case 0xAF: + //.. dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig, + //.. guest_eip_bbstart+delta, "repne scas" ); + //.. break; + //.. + //.. default: + //.. goto decode_failure; + //.. } + //.. break; + //.. } + + /* ------ AE: SCAS variants ------ */ + case 0xAE: + case 0xAF: + /* F2 AE/AF: repne scasb/repne scas{w,l,q} */ + if (haveASO(pfx)) + goto decode_failure; + if (haveF2(pfx) && !haveF3(pfx)) { + if (opc == 0xAE) + sz = 1; + dis_REP_op ( AMD64CondNZ, dis_SCAS, sz, + guest_RIP_curr_instr, + guest_RIP_bbstart+delta, "repne scas", pfx ); + dres.whatNext = Dis_StopHere; + break; + } + /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ + if (haveASO(pfx)) + goto decode_failure; + if (!haveF2(pfx) && haveF3(pfx)) { + if (opc == 0xAE) + sz = 1; + dis_REP_op ( AMD64CondZ, dis_SCAS, sz, + guest_RIP_curr_instr, + guest_RIP_bbstart+delta, "repe scas", pfx ); + dres.whatNext = Dis_StopHere; + break; + } + /* AE/AF: scasb/scas{w,l,q} */ + if (!haveF2(pfx) && !haveF3(pfx)) { + if (opc == 0xAE) + sz = 1; + dis_string_op( dis_SCAS, sz, "scas", pfx ); + break; + } + goto decode_failure; + + /* ------ A6, A7: CMPS variants ------ */ + case 0xA6: + case 0xA7: + /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */ + if (haveASO(pfx)) + goto decode_failure; + if (haveF3(pfx) && !haveF2(pfx)) { + if (opc == 0xA6) + sz = 1; + dis_REP_op ( AMD64CondZ, dis_CMPS, sz, + guest_RIP_curr_instr, + guest_RIP_bbstart+delta, "repe cmps", pfx ); + dres.whatNext = Dis_StopHere; + break; + } + goto decode_failure; + + /* ------ AA, AB: STOS variants ------ */ + case 0xAA: + case 0xAB: + /* F3 AA/AB: rep stosb/rep stos{w,l,q} */ + if (haveASO(pfx)) + goto decode_failure; + if (haveF3(pfx) && !haveF2(pfx)) { + if (opc == 0xAA) + sz = 1; + dis_REP_op ( AMD64CondAlways, dis_STOS, sz, + guest_RIP_curr_instr, + guest_RIP_bbstart+delta, "rep stos", pfx ); + dres.whatNext = Dis_StopHere; + break; + } + /* AA/AB: stosb/stos{w,l,q} */ + if (!haveF3(pfx) && !haveF2(pfx)) { + if (opc == 0xAA) + sz = 1; + dis_string_op( dis_STOS, sz, "stos", pfx ); + break; + } + goto decode_failure; + + /* ------ A4, A5: MOVS variants ------ */ + case 0xA4: + case 0xA5: + /* F3 A4: rep movsb */ + if (haveASO(pfx)) + goto decode_failure; + if (haveF3(pfx) && !haveF2(pfx)) { + if (opc == 0xA4) + sz = 1; + dis_REP_op ( AMD64CondAlways, dis_MOVS, sz, + guest_RIP_curr_instr, + guest_RIP_bbstart+delta, "rep movs", pfx ); + dres.whatNext = Dis_StopHere; + break; + } + /* A4: movsb */ + if (!haveF3(pfx) && !haveF2(pfx)) { + if (opc == 0xA4) + sz = 1; + dis_string_op( dis_MOVS, sz, "movs", pfx ); + break; + } + goto decode_failure; + + + /* ------------------------ XCHG ----------------------- */ + + /* XCHG reg,mem automatically asserts LOCK# even without a LOCK + prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock) + and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is + used with an explicit LOCK prefix, we don't want to end up with + two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by + the generic LOCK logic at the top of disInstr. */ + case 0x86: /* XCHG Gb,Eb */ + sz = 1; + /* Fall through ... */ + case 0x87: /* XCHG Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + ty = szToITy(sz); + t1 = newTemp(ty); t2 = newTemp(ty); + if (epartIsReg(modrm)) { + assign(t1, getIRegE(sz, pfx, modrm)); + assign(t2, getIRegG(sz, pfx, modrm)); + putIRegG(sz, pfx, modrm, mkexpr(t1)); + putIRegE(sz, pfx, modrm, mkexpr(t2)); + delta++; + DIP("xchg%c %s, %s\n", + nameISize(sz), nameIRegG(sz, pfx, modrm), + nameIRegE(sz, pfx, modrm)); + } else { + *expect_CAS = True; + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + assign( t1, loadLE(ty, mkexpr(addr)) ); + assign( t2, getIRegG(sz, pfx, modrm) ); + casLE( mkexpr(addr), + mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); + putIRegG( sz, pfx, modrm, mkexpr(t1) ); + delta += alen; + DIP("xchg%c %s, %s\n", nameISize(sz), + nameIRegG(sz, pfx, modrm), dis_buf); + } + break; + + case 0x90: /* XCHG eAX,eAX */ + /* detect and handle F3 90 (rep nop) specially */ + if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) { + DIP("rep nop (P4 pause)\n"); + /* "observe" the hint. The Vex client needs to be careful not + to cause very long delays as a result, though. */ + jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta); + dres.whatNext = Dis_StopHere; + break; + } + /* detect and handle NOPs specially */ + if (/* F2/F3 probably change meaning completely */ + !haveF2orF3(pfx) + /* If REX.B is 1, we're not exchanging rAX with itself */ + && getRexB(pfx)==0 ) { + DIP("nop\n"); + break; + } + /* else fall through to normal case. */ + case 0x91: /* XCHG rAX,rCX */ + case 0x92: /* XCHG rAX,rDX */ + case 0x93: /* XCHG rAX,rBX */ + case 0x94: /* XCHG rAX,rSP */ + case 0x95: /* XCHG rAX,rBP */ + case 0x96: /* XCHG rAX,rSI */ + case 0x97: /* XCHG rAX,rDI */ + + /* guard against mutancy */ + if (haveF2orF3(pfx)) goto decode_failure; + + /* sz == 2 could legitimately happen, but we don't handle it yet */ + if (sz == 2) goto decode_failure; /* awaiting test case */ + + codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 ); + break; + + //.. //-- /* ------------------------ XLAT ----------------------- */ + //.. //-- + //.. //-- case 0xD7: /* XLAT */ + //.. //-- t1 = newTemp(cb); t2 = newTemp(cb); + //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBX, TempReg, t1); /* get eBX */ + //.. //-- handleAddrOverrides( cb, sorb, t1 ); /* make t1 DS:eBX */ + //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t2); /* get AL */ + //.. //-- /* Widen %AL to 32 bits, so it's all defined when we add it. */ + //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t2); + //.. //-- uWiden(cb, 1, False); + //.. //-- uInstr2(cb, ADD, sz, TempReg, t2, TempReg, t1); /* add AL to eBX */ + //.. //-- uInstr2(cb, LOAD, 1, TempReg, t1, TempReg, t2); /* get byte at t1 into t2 */ + //.. //-- uInstr2(cb, PUT, 1, TempReg, t2, ArchReg, R_AL); /* put byte into AL */ + //.. //-- + //.. //-- DIP("xlat%c [ebx]\n", nameISize(sz)); + //.. //-- break; + + /* ------------------------ IN / OUT ----------------------- */ + + case 0xE4: /* IN imm8, AL */ + sz = 1; + t1 = newTemp(Ity_I64); + abyte = getUChar(delta); delta++; + assign(t1, mkU64( abyte & 0xFF )); + DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); + goto do_IN; + case 0xE5: /* IN imm8, eAX */ + if (!(sz == 2 || sz == 4)) goto decode_failure; + t1 = newTemp(Ity_I64); + abyte = getUChar(delta); delta++; + assign(t1, mkU64( abyte & 0xFF )); + DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); + goto do_IN; + case 0xEC: /* IN %DX, AL */ + sz = 1; + t1 = newTemp(Ity_I64); + assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); + DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), + nameIRegRAX(sz)); + goto do_IN; + case 0xED: /* IN %DX, eAX */ + if (!(sz == 2 || sz == 4)) goto decode_failure; + t1 = newTemp(Ity_I64); + assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); + DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), + nameIRegRAX(sz)); + goto do_IN; + do_IN: { + /* At this point, sz indicates the width, and t1 is a 64-bit + value giving port number. */ + IRDirty* d; + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 1 || sz == 2 || sz == 4); + ty = szToITy(sz); + t2 = newTemp(Ity_I64); + d = unsafeIRDirty_1_N( + t2, + 0/*regparms*/, + "amd64g_dirtyhelper_IN", + &amd64g_dirtyhelper_IN, + mkIRExprVec_2( mkexpr(t1), mkU64(sz) ) + ); + /* do the call, dumping the result in t2. */ + stmt( IRStmt_Dirty(d) ); + putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) ); + break; + } + + case 0xE6: /* OUT AL, imm8 */ + sz = 1; + t1 = newTemp(Ity_I64); + abyte = getUChar(delta); delta++; + assign( t1, mkU64( abyte & 0xFF ) ); + DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); + goto do_OUT; + case 0xE7: /* OUT eAX, imm8 */ + if (!(sz == 2 || sz == 4)) goto decode_failure; + t1 = newTemp(Ity_I64); + abyte = getUChar(delta); delta++; + assign( t1, mkU64( abyte & 0xFF ) ); + DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); + goto do_OUT; + case 0xEE: /* OUT AL, %DX */ + sz = 1; + t1 = newTemp(Ity_I64); + assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); + DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), + nameIRegRDX(2)); + goto do_OUT; + case 0xEF: /* OUT eAX, %DX */ + if (!(sz == 2 || sz == 4)) goto decode_failure; + t1 = newTemp(Ity_I64); + assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); + DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), + nameIRegRDX(2)); + goto do_OUT; + do_OUT: { + /* At this point, sz indicates the width, and t1 is a 64-bit + value giving port number. */ + IRDirty* d; + if (haveF2orF3(pfx)) goto decode_failure; + vassert(sz == 1 || sz == 2 || sz == 4); + ty = szToITy(sz); + d = unsafeIRDirty_0_N( + 0/*regparms*/, + "amd64g_dirtyhelper_OUT", + &amd64g_dirtyhelper_OUT, + mkIRExprVec_3( mkexpr(t1), + widenUto64( getIRegRAX(sz) ), + mkU64(sz) ) + ); + stmt( IRStmt_Dirty(d) ); + break; + } + + /* ------------------------ (Grp1 extensions) ---------- */ + + case 0x80: /* Grp1 Ib,Eb */ + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + sz = 1; + d_sz = 1; + d64 = getSDisp8(delta + am_sz); + delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); + break; + + case 0x81: /* Grp1 Iv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = imin(sz,4); + d64 = getSDisp(d_sz, delta + am_sz); + delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); + break; + + case 0x83: /* Grp1 Ib,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = 1; + d64 = getSDisp8(delta + am_sz); + delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); + break; + + /* ------------------------ (Grp2 extensions) ---------- */ + + case 0xC0: { /* Grp2 Ib,Eb */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = 1; + d64 = getUChar(delta + am_sz); + sz = 1; + delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, + mkU8(d64 & 0xFF), NULL, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + case 0xC1: { /* Grp2 Ib,Ev */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = 1; + d64 = getUChar(delta + am_sz); + delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, + mkU8(d64 & 0xFF), NULL, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + case 0xD0: { /* Grp2 1,Eb */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = 0; + d64 = 1; + sz = 1; + delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, + mkU8(d64), NULL, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + case 0xD1: { /* Grp2 1,Ev */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = 0; + d64 = 1; + delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, + mkU8(d64), NULL, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + case 0xD2: { /* Grp2 CL,Eb */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = 0; + sz = 1; + delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, + getIRegCL(), "%cl", &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + case 0xD3: { /* Grp2 CL,Ev */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d_sz = 0; + delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, + getIRegCL(), "%cl", &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + + /* ------------------------ (Grp3 extensions) ---------- */ + + case 0xF6: { /* Grp3 Eb */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + case 0xF7: { /* Grp3 Ev */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + + /* ------------------------ (Grp4 extensions) ---------- */ + + case 0xFE: { /* Grp4 Eb */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + + /* ------------------------ (Grp5 extensions) ---------- */ + + case 0xFF: { /* Grp5 Ev */ + Bool decode_OK = True; + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_Grp5 ( vbi, pfx, sz, delta, &dres, &decode_OK ); + if (!decode_OK) goto decode_failure; + break; + } + + /* ------------------------ Escapes to 2-byte opcodes -- */ + + case 0x0F: { + opc = getUChar(delta); delta++; + switch (opc) { + + /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ + + case 0xBA: { /* Grp8 Ib,Ev */ + Bool decode_OK = False; + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + am_sz = lengthAMode(pfx,delta); + d64 = getSDisp8(delta + am_sz); + delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64, + &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + + /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ + + case 0xBC: /* BSF Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_bs_E_G ( vbi, pfx, sz, delta, True ); + break; + case 0xBD: /* BSR Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_bs_E_G ( vbi, pfx, sz, delta, False ); + break; + + /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ + + case 0xC8: /* BSWAP %eax */ + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: /* BSWAP %edi */ + if (haveF2orF3(pfx)) goto decode_failure; + /* According to the AMD64 docs, this insn can have size 4 or + 8. */ + if (sz == 4) { + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + assign( t1, getIRegRexB(4, pfx, opc-0xC8) ); + assign( t2, + binop(Iop_Or32, + binop(Iop_Shl32, mkexpr(t1), mkU8(24)), + binop(Iop_Or32, + binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), + mkU32(0x00FF0000)), + binop(Iop_Or32, + binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), + mkU32(0x0000FF00)), + binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), + mkU32(0x000000FF) ) + ))) + ); + putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2)); + DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8)); + break; + } + else if (sz == 8) { + IRTemp m8 = newTemp(Ity_I64); + IRTemp s8 = newTemp(Ity_I64); + IRTemp m16 = newTemp(Ity_I64); + IRTemp s16 = newTemp(Ity_I64); + IRTemp m32 = newTemp(Ity_I64); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign( t1, getIRegRexB(8, pfx, opc-0xC8) ); + + assign( m8, mkU64(0xFF00FF00FF00FF00ULL) ); + assign( s8, + binop(Iop_Or64, + binop(Iop_Shr64, + binop(Iop_And64,mkexpr(t1),mkexpr(m8)), + mkU8(8)), + binop(Iop_And64, + binop(Iop_Shl64,mkexpr(t1),mkU8(8)), + mkexpr(m8)) + ) + ); + + assign( m16, mkU64(0xFFFF0000FFFF0000ULL) ); + assign( s16, + binop(Iop_Or64, + binop(Iop_Shr64, + binop(Iop_And64,mkexpr(s8),mkexpr(m16)), + mkU8(16)), + binop(Iop_And64, + binop(Iop_Shl64,mkexpr(s8),mkU8(16)), + mkexpr(m16)) + ) + ); + + assign( m32, mkU64(0xFFFFFFFF00000000ULL) ); + assign( t2, + binop(Iop_Or64, + binop(Iop_Shr64, + binop(Iop_And64,mkexpr(s16),mkexpr(m32)), + mkU8(32)), + binop(Iop_And64, + binop(Iop_Shl64,mkexpr(s16),mkU8(32)), + mkexpr(m32)) + ) + ); + + putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2)); + DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8)); + break; + } else { + goto decode_failure; + } + + /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ + + /* All of these are possible at sizes 2, 4 and 8, but until a + size 2 test case shows up, only handle sizes 4 and 8. */ + + case 0xA3: /* BT Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; + delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone ); + break; + case 0xB3: /* BTR Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; + delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset ); + break; + case 0xAB: /* BTS Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; + delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet ); + break; + case 0xBB: /* BTC Gv,Ev */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; + delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp ); + break; + + /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ + + case 0x40: + case 0x41: + case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ + case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ + case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ + case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ + case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ + case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ + case 0x48: /* CMOVSb (cmov negative) */ + case 0x49: /* CMOVSb (cmov not negative) */ + case 0x4A: /* CMOVP (cmov parity even) */ + case 0x4B: /* CMOVNP (cmov parity odd) */ + case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ + case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ + case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ + case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta); + break; + + /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ + + case 0xB0: { /* CMPXCHG Gb,Eb */ + Bool ok = True; + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta ); + if (!ok) goto decode_failure; + break; + } + case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */ + Bool ok = True; + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 2 && sz != 4 && sz != 8) goto decode_failure; + delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta ); + if (!ok) goto decode_failure; + break; + } + + case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */ + IRType elemTy = sz==4 ? Ity_I32 : Ity_I64; + IRTemp expdHi = newTemp(elemTy); + IRTemp expdLo = newTemp(elemTy); + IRTemp dataHi = newTemp(elemTy); + IRTemp dataLo = newTemp(elemTy); + IRTemp oldHi = newTemp(elemTy); + IRTemp oldLo = newTemp(elemTy); + IRTemp flags_old = newTemp(Ity_I64); + IRTemp flags_new = newTemp(Ity_I64); + IRTemp success = newTemp(Ity_I1); + IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64; + IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64; + IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64; + IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0); + IRTemp expdHi64 = newTemp(Ity_I64); + IRTemp expdLo64 = newTemp(Ity_I64); + + /* Translate this using a DCAS, even if there is no LOCK + prefix. Life is too short to bother with generating two + different translations for the with/without-LOCK-prefix + cases. */ + *expect_CAS = True; + + /* Decode, and generate address. */ + if (have66orF2orF3(pfx)) goto decode_failure; + if (sz != 4 && sz != 8) goto decode_failure; + if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) + goto decode_failure; + modrm = getUChar(delta); + if (epartIsReg(modrm)) goto decode_failure; + if (gregLO3ofRM(modrm) != 1) goto decode_failure; + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + + /* cmpxchg16b requires an alignment check. */ + if (sz == 8) + gen_SEGV_if_not_16_aligned( addr ); + + /* Get the expected and new values. */ + assign( expdHi64, getIReg64(R_RDX) ); + assign( expdLo64, getIReg64(R_RAX) ); + + /* These are the correctly-sized expected and new values. + However, we also get expdHi64/expdLo64 above as 64-bits + regardless, because we will need them later in the 32-bit + case (paradoxically). */ + assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64)) + : mkexpr(expdHi64) ); + assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64)) + : mkexpr(expdLo64) ); + assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) ); + assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) ); + + /* Do the DCAS */ + stmt( IRStmt_CAS( + mkIRCAS( oldHi, oldLo, + Iend_LE, mkexpr(addr), + mkexpr(expdHi), mkexpr(expdLo), + mkexpr(dataHi), mkexpr(dataLo) + ))); + + /* success when oldHi:oldLo == expdHi:expdLo */ + assign( success, + binop(opCasCmpEQ, + binop(opOR, + binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)), + binop(opXOR, mkexpr(oldLo), mkexpr(expdLo)) + ), + zero + )); + + /* If the DCAS is successful, that is to say oldHi:oldLo == + expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX, + which is where they came from originally. Both the actual + contents of these two regs, and any shadow values, are + unchanged. If the DCAS fails then we're putting into + RDX:RAX the value seen in memory. */ + /* Now of course there's a complication in the 32-bit case + (bah!): if the DCAS succeeds, we need to leave RDX:RAX + unchanged; but if we use the same scheme as in the 64-bit + case, we get hit by the standard rule that a write to the + bottom 32 bits of an integer register zeros the upper 32 + bits. And so the upper halves of RDX and RAX mysteriously + become zero. So we have to stuff back in the original + 64-bit values which we previously stashed in + expdHi64:expdLo64, even if we're doing a cmpxchg8b. */ + /* It's just _so_ much fun ... */ + putIRegRDX( 8, + IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), + sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi)) + : mkexpr(oldHi), + mkexpr(expdHi64) + )); + putIRegRAX( 8, + IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), + sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo)) + : mkexpr(oldLo), + mkexpr(expdLo64) + )); + + /* Copy the success bit into the Z flag and leave the others + unchanged */ + assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all())); + assign( + flags_new, + binop(Iop_Or64, + binop(Iop_And64, mkexpr(flags_old), + mkU64(~AMD64G_CC_MASK_Z)), + binop(Iop_Shl64, + binop(Iop_And64, + unop(Iop_1Uto64, mkexpr(success)), mkU64(1)), + mkU8(AMD64G_CC_SHIFT_Z)) )); + + stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); + /* Set NDEP even though it isn't used. This makes + redundant-PUT elimination of previous stores to this field + work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); + + /* Sheesh. Aren't you glad it was me and not you that had to + write and validate all this grunge? */ + + DIP("cmpxchg8b %s\n", dis_buf); + break; + + } + + /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ + + case 0xA2: { /* CPUID */ + /* Uses dirty helper: + void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) + declared to mod rax, wr rbx, rcx, rdx + */ + IRDirty* d = NULL; + HChar* fName = NULL; + void* fAddr = NULL; + if (haveF2orF3(pfx)) goto decode_failure; + if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3 + |VEX_HWCAPS_AMD64_CX16)) { + fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16"; + fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16; + /* This is a Core-2-like machine */ + } + else { + /* Give a CPUID for at least a baseline machine, no SSE2 + and no CX16 */ + fName = "amd64g_dirtyhelper_CPUID_baseline"; + fAddr = &amd64g_dirtyhelper_CPUID_baseline; + } + + vassert(fName); vassert(fAddr); + d = unsafeIRDirty_0_N ( 0/*regparms*/, + fName, fAddr, mkIRExprVec_0() ); + /* declare guest state effects */ + d->needsBBP = True; + d->nFxState = 4; + d->fxState[0].fx = Ifx_Modify; + d->fxState[0].offset = OFFB_RAX; + d->fxState[0].size = 8; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_RBX; + d->fxState[1].size = 8; + d->fxState[2].fx = Ifx_Modify; + d->fxState[2].offset = OFFB_RCX; + d->fxState[2].size = 8; + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_RDX; + d->fxState[3].size = 8; + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); + /* CPUID is a serialising insn. So, just in case someone is + using it as a memory fence ... */ + stmt( IRStmt_MBE(Imbe_Fence) ); + DIP("cpuid\n"); + break; + } + + /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ + + case 0xB6: /* MOVZXb Eb,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 2 && sz != 4 && sz != 8) + goto decode_failure; + delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False ); + break; + case 0xB7: /* MOVZXw Ew,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 4 && sz != 8) + goto decode_failure; + delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False ); + break; + + case 0xBE: /* MOVSXb Eb,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 2 && sz != 4 && sz != 8) + goto decode_failure; + delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True ); + break; + case 0xBF: /* MOVSXw Ew,Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + if (sz != 4 && sz != 8) + goto decode_failure; + delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True ); + break; + + //.. //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ + //.. //-- + //.. //-- case 0xC3: /* MOVNTI Gv,Ev */ + //.. //-- vg_assert(sz == 4); + //.. //-- modrm = getUChar(eip); + //.. //-- vg_assert(!epartIsReg(modrm)); + //.. //-- t1 = newTemp(cb); + //.. //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); + //.. //-- pair = disAMode ( cb, sorb, eip, dis_buf ); + //.. //-- t2 = LOW24(pair); + //.. //-- eip += HI8(pair); + //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); + //.. //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); + //.. //-- break; + + /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ + + case 0xAF: /* IMUL Ev, Gv */ + if (haveF2orF3(pfx)) goto decode_failure; + delta = dis_mul_E_G ( vbi, pfx, sz, delta ); + break; + + /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ + + case 0x1F: + if (haveF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + if (epartIsReg(modrm)) goto decode_failure; + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + DIP("nop%c %s\n", nameISize(sz), dis_buf); + break; + + /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ + case 0x80: + case 0x81: + case 0x82: /* JBb/JNAEb (jump below) */ + case 0x83: /* JNBb/JAEb (jump not below) */ + case 0x84: /* JZb/JEb (jump zero) */ + case 0x85: /* JNZb/JNEb (jump not zero) */ + case 0x86: /* JBEb/JNAb (jump below or equal) */ + case 0x87: /* JNBEb/JAb (jump not below or equal) */ + case 0x88: /* JSb (jump negative) */ + case 0x89: /* JSb (jump not negative) */ + case 0x8A: /* JP (jump parity even) */ + case 0x8B: /* JNP/JPO (jump parity odd) */ + case 0x8C: /* JLb/JNGEb (jump less) */ + case 0x8D: /* JGEb/JNLb (jump greater or equal) */ + case 0x8E: /* JLEb/JNGb (jump less or equal) */ + case 0x8F: /* JGb/JNLEb (jump greater) */ + if (haveF2orF3(pfx)) goto decode_failure; + d64 = (guest_RIP_bbstart+delta+4) + getSDisp32(delta); + delta += 4; + jcc_01( (AMD64Condcode)(opc - 0x80), + guest_RIP_bbstart+delta, + d64 ); + dres.whatNext = Dis_StopHere; + DIP("j%s-32 0x%llx\n", name_AMD64Condcode(opc - 0x80), d64); + break; + + /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */ + case 0x0D: /* 0F 0D /0 -- prefetch mem8 */ + /* 0F 0D /1 -- prefetchw mem8 */ + if (have66orF2orF3(pfx)) goto decode_failure; + modrm = getUChar(delta); + if (epartIsReg(modrm)) goto decode_failure; + if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) + goto decode_failure; + + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + + switch (gregLO3ofRM(modrm)) { + case 0: DIP("prefetch %s\n", dis_buf); break; + case 1: DIP("prefetchw %s\n", dis_buf); break; + default: vassert(0); /*NOTREACHED*/ + } + break; + + /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ + case 0x31: { /* RDTSC */ + IRTemp val = newTemp(Ity_I64); + IRExpr** args = mkIRExprVec_0(); + IRDirty* d = unsafeIRDirty_1_N ( + val, + 0/*regparms*/, + "amd64g_dirtyhelper_RDTSC", + &amd64g_dirtyhelper_RDTSC, + args + ); + if (have66orF2orF3(pfx)) goto decode_failure; + /* execute the dirty call, dumping the result in val. */ + stmt( IRStmt_Dirty(d) ); + putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val))); + putIRegRAX(4, unop(Iop_64to32, mkexpr(val))); + DIP("rdtsc\n"); + break; + } + + //.. /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ + //.. + //.. case 0xA1: /* POP %FS */ + //.. dis_pop_segreg( R_FS, sz ); break; + //.. case 0xA9: /* POP %GS */ + //.. dis_pop_segreg( R_GS, sz ); break; + //.. + //.. case 0xA0: /* PUSH %FS */ + //.. dis_push_segreg( R_FS, sz ); break; + //.. case 0xA8: /* PUSH %GS */ + //.. dis_push_segreg( R_GS, sz ); break; + + /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ + case 0x90: + case 0x91: + case 0x92: /* set-Bb/set-NAEb (set if below) */ + case 0x93: /* set-NBb/set-AEb (set if not below) */ + case 0x94: /* set-Zb/set-Eb (set if zero) */ + case 0x95: /* set-NZb/set-NEb (set if not zero) */ + case 0x96: /* set-BEb/set-NAb (set if below or equal) */ + case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */ + case 0x98: /* set-Sb (set if negative) */ + case 0x99: /* set-Sb (set if not negative) */ + case 0x9A: /* set-P (set if parity even) */ + case 0x9B: /* set-NP (set if parity odd) */ + case 0x9C: /* set-Lb/set-NGEb (set if less) */ + case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */ + case 0x9E: /* set-LEb/set-NGb (set if less or equal) */ + case 0x9F: /* set-Gb/set-NLEb (set if greater) */ + if (haveF2orF3(pfx)) goto decode_failure; + t1 = newTemp(Ity_I8); + assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) ); + modrm = getUChar(delta); + if (epartIsReg(modrm)) { + delta++; + putIRegE(1, pfx, modrm, mkexpr(t1)); + DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), + nameIRegE(1,pfx,modrm)); + } else { + addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); + delta += alen; + storeLE( mkexpr(addr), mkexpr(t1) ); + DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf); + } + break; + + /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ + + case 0xA4: /* SHLDv imm8,Gv,Ev */ + modrm = getUChar(delta); + d64 = delta + lengthAMode(pfx, delta); + vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); + delta = dis_SHLRD_Gv_Ev ( + vbi, pfx, delta, modrm, sz, + mkU8(getUChar(d64)), True, /* literal */ + dis_buf, True /* left */ ); + break; + case 0xA5: /* SHLDv %cl,Gv,Ev */ + modrm = getUChar(delta); + delta = dis_SHLRD_Gv_Ev ( + vbi, pfx, delta, modrm, sz, + getIRegCL(), False, /* not literal */ + "%cl", True /* left */ ); + break; + + case 0xAC: /* SHRDv imm8,Gv,Ev */ + modrm = getUChar(delta); + d64 = delta + lengthAMode(pfx, delta); + vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); + delta = dis_SHLRD_Gv_Ev ( + vbi, pfx, delta, modrm, sz, + mkU8(getUChar(d64)), True, /* literal */ + dis_buf, False /* right */ ); + break; + case 0xAD: /* SHRDv %cl,Gv,Ev */ + modrm = getUChar(delta); + delta = dis_SHLRD_Gv_Ev ( + vbi, pfx, delta, modrm, sz, + getIRegCL(), False, /* not literal */ + "%cl", False /* right */); + break; + + /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */ + case 0x05: /* SYSCALL */ + guest_RIP_next_mustcheck = True; + guest_RIP_next_assumed = guest_RIP_bbstart + delta; + putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) ); + /* It's important that all guest state is up-to-date + at this point. So we declare an end-of-block here, which + forces any cached guest state to be flushed. */ + jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed); + dres.whatNext = Dis_StopHere; + DIP("syscall\n"); + break; + + /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ + + case 0xC0: { /* XADD Gb,Eb */ + Bool decode_OK = False; + delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta ); + if (!decode_OK) + goto decode_failure; + break; + } + case 0xC1: { /* XADD Gv,Ev */ + Bool decode_OK = False; + delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta ); + if (!decode_OK) + goto decode_failure; + break; + } + + /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ + + case 0x71: + case 0x72: + case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ + + case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ + case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ + case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ + case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xFC: + case 0xFD: + case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xEC: + case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xDC: + case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xF8: + case 0xF9: + case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xE8: + case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xD8: + case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x74: + case 0x75: + case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x64: + case 0x65: + case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ + case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ + case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x68: + case 0x69: + case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x60: + case 0x61: + case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xF2: + case 0xF3: + + case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xD2: + case 0xD3: + + case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xE2: + { + Long delta0 = delta-1; + Bool decode_OK = False; + + /* If sz==2 this is SSE, and we assume sse idec has + already spotted those cases by now. */ + if (sz != 4 && sz != 8) + goto decode_failure; + if (have66orF2orF3(pfx)) + goto decode_failure; + + delta = dis_MMX ( &decode_OK, vbi, pfx, sz, delta-1 ); + if (!decode_OK) { + delta = delta0; + goto decode_failure; + } + break; + } + + case 0x0E: /* FEMMS */ + case 0x77: /* EMMS */ + if (sz != 4) + goto decode_failure; + do_EMMS_preamble(); + DIP("{f}emms\n"); + break; + + /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ + + default: + goto decode_failure; + } /* switch (opc) for the 2-byte opcodes */ + goto decode_success; + } /* case 0x0F: of primary opcode */ + + /* ------------------------ ??? ------------------------ */ + + default: + decode_failure: + /* All decode failures end up here. */ + vex_printf("vex amd64->IR: unhandled instruction bytes: " + "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + (Int)getUChar(delta_start+0), + (Int)getUChar(delta_start+1), + (Int)getUChar(delta_start+2), + (Int)getUChar(delta_start+3), + (Int)getUChar(delta_start+4), + (Int)getUChar(delta_start+5) ); + + /* Tell the dispatcher that this insn cannot be decoded, and so has + not been executed, and (is currently) the next to be executed. + RIP should be up-to-date since it made so at the start of each + insn, but nevertheless be paranoid and update it again right + now. */ + stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); + jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr); + dres.whatNext = Dis_StopHere; + dres.len = 0; + /* We also need to say that a CAS is not expected now, regardless + of what it might have been set to at the start of the function, + since the IR that we've emitted just above (to synthesis a + SIGILL) does not involve any CAS, and presumably no other IR has + been emitted for this (non-decoded) insn. */ + *expect_CAS = False; + return dres; + + } /* switch (opc) for the main (primary) opcode switch. */ + + decode_success: + /* All decode successes end up here. */ + DIP("\n"); + dres.len = (Int)toUInt(delta - delta_start); + return dres; + } + + #undef DIP + #undef DIS + + + /*------------------------------------------------------------*/ + /*--- Top-level fn ---*/ + /*------------------------------------------------------------*/ + + /* Disassemble a single instruction into IR. The instruction + is located in host memory at &guest_code[delta]. */ + + DisResult disInstr_AMD64 ( IRSB* irsb_IN, + Bool put_IP, + Bool (*resteerOkFn) ( void*, Addr64 ), + void* callback_opaque, + UChar* guest_code_IN, + Long delta, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian_IN ) + { + Int i, x1, x2; + Bool expect_CAS, has_CAS; + DisResult dres; + + /* Set globals (see top of this file) */ + vassert(guest_arch == VexArchAMD64); + guest_code = guest_code_IN; + irsb = irsb_IN; + host_is_bigendian = host_bigendian_IN; + guest_RIP_curr_instr = guest_IP; + guest_RIP_bbstart = guest_IP - delta; + + /* We'll consult these after doing disInstr_AMD64_WRK. */ + guest_RIP_next_assumed = 0; + guest_RIP_next_mustcheck = False; + + x1 = irsb_IN->stmts_used; + expect_CAS = False; + dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, + callback_opaque, + delta, archinfo, abiinfo ); + x2 = irsb_IN->stmts_used; + vassert(x2 >= x1); + + /* If disInstr_AMD64_WRK tried to figure out the next rip, check it + got it right. Failure of this assertion is serious and denotes + a bug in disInstr. */ + if (guest_RIP_next_mustcheck + && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) { + vex_printf("\n"); + vex_printf("assumed next %%rip = 0x%llx\n", + guest_RIP_next_assumed ); + vex_printf(" actual next %%rip = 0x%llx\n", + guest_RIP_curr_instr + dres.len ); + vpanic("disInstr_AMD64: disInstr miscalculated next %rip"); + } + + /* See comment at the top of disInstr_AMD64_WRK for meaning of + expect_CAS. Here, we (sanity-)check for the presence/absence of + IRCAS as directed by the returned expect_CAS value. */ + has_CAS = False; + for (i = x1; i < x2; i++) { + if (irsb_IN->stmts[i]->tag == Ist_CAS) + has_CAS = True; + } + + if (expect_CAS != has_CAS) { + /* inconsistency detected. re-disassemble the instruction so as + to generate a useful error message; then assert. */ + vex_traceflags |= VEX_TRACE_FE; + dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, + callback_opaque, + delta, archinfo, abiinfo ); + for (i = x1; i < x2; i++) { + vex_printf("\t\t"); + ppIRStmt(irsb_IN->stmts[i]); + vex_printf("\n"); + } + /* Failure of this assertion is serious and denotes a bug in + disInstr. */ + vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling"); + } + + return dres; + } + + + + /*--------------------------------------------------------------------*/ + /*--- end guest_amd64_toIR.c ---*/ + /*--------------------------------------------------------------------*/ Index: VEX/priv/guest_arm_defs.h =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_arm_defs.h Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,212 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_arm_defs.h) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* Only to be used within the guest-arm directory. */ + + #ifndef __VEX_GUEST_ARM_DEFS_H + #define __VEX_GUEST_ARM_DEFS_H + + + /*---------------------------------------------------------*/ + /*--- arm to IR conversion ---*/ + /*---------------------------------------------------------*/ + + extern + IRSB* bbToIR_ARM ( UChar* armCode, + Addr64 eip, + VexGuestExtents* vge, + Bool (*byte_accessible)(Addr64), + Bool (*resteerOkFn)(Addr64), + Bool host_bigendian, + VexArchInfo* archinfo_guest ); + + /* Used by the optimiser to specialise calls to helpers. */ + extern + IRExpr* guest_arm_spechelper ( HChar* function_name, + IRExpr** args ); + + /* Describes to the optimser which part of the guest state require + precise memory exceptions. This is logically part of the guest + state description. */ + extern + Bool guest_arm_state_requires_precise_mem_exns ( Int, Int ); + + extern + VexGuestLayout armGuest_layout; + + + /*---------------------------------------------------------*/ + /*--- arm guest helpers ---*/ + /*---------------------------------------------------------*/ + + /* --- CLEAN HELPERS --- */ + + extern UInt armg_calculate_flags_all ( + UInt cc_op, UInt cc_dep1, UInt cc_dep2 + ); + extern UInt armg_calculate_flags_c ( + UInt cc_op, UInt cc_dep1, UInt cc_dep2 + ); + + extern UInt armg_calculate_condition ( + UInt/*ARMCondcode*/ cond, + UInt cc_op, + UInt cc_dep1, UInt cc_dep2 + ); + + + /*---------------------------------------------------------*/ + /*--- Condition code stuff ---*/ + /*---------------------------------------------------------*/ + + /* Flags masks. Defines positions of flags bits in the CPSR. */ + #define ARMG_CC_SHIFT_N 31 + #define ARMG_CC_SHIFT_Z 30 + #define ARMG_CC_SHIFT_C 29 + #define ARMG_CC_SHIFT_V 28 + + #define ARMG_CC_MASK_N (1 << ARMG_CC_SHIFT_N) + #define ARMG_CC_MASK_Z (1 << ARMG_CC_SHIFT_Z) + #define ARMG_CC_MASK_V (1 << ARMG_CC_SHIFT_V) + #define ARMG_CC_MASK_C (1 << ARMG_CC_SHIFT_C) + + /* Flag thunk descriptors. A three-word thunk is used to record + details of the most recent flag-setting operation, so the flags can + be computed later if needed. + + The three words are: + + CC_OP, which describes the operation. + + CC_DEP1 and CC_DEP2. These are arguments to the operation. + We want Memcheck to believe that the resulting flags are + data-dependent on both CC_DEP1 and CC_DEP2, hence the + name DEP. + + When building the thunk, it is always necessary to write words into + CC_DEP1 and CC_DEP2, even if those args are not used given the + CC_OP field. This is important because otherwise Memcheck could + give false positives as it does not understand the relationship + between the CC_OP field and CC_DEP1 and CC_DEP2, and so believes + that the definedness of the stored flags always depends on both + CC_DEP1 and CC_DEP2. + + A summary of the field usages is: + TODO: make this right + + Operation DEP1 DEP2 NDEP + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + and/or/xor result shift_carry_out + tst/teq/bic result shift_carry_out + mov/mvn result shift_carry_out + + add/cmn first arg second arg + sub/cmp first arg second arg + + ... + + + Therefore Memcheck will believe the following: + + * ... + + */ + enum { + ARMG_CC_OP_COPY, /* DEP1 = current flags, DEP2 = 0 */ + /* just copy DEP1 to output */ + + ARMG_CC_OP_LOGIC, /* DEP1 = result, DEP2 = shifter_carry_out */ + + ARMG_CC_OP_SUB, /* DEP1 = arg1(Rn), DEP2 = arg2 (shifter_op) */ + ARMG_CC_OP_ADD, /* DEP1 = arg1(Rn), DEP2 = arg2 (shifter_op) */ + + ARMG_CC_OP_NUMBER + }; + + /* requires further study */ + + + + /* Defines conditions which we can ask for (ARM ARM 2e page A3-6) */ + + typedef + enum { + ARMCondEQ = 0, /* equal : Z=1 */ + ARMCondNE = 1, /* not equal : Z=0 */ + + ARMCondHS = 2, /* >=u (higher or same) : C=1 */ + ARMCondLO = 3, /* u (higher) : C=1 && Z=0 */ + ARMCondLS = 9, /* <=u (lower or same) : C=0 || Z=1 */ + + ARMCondGE = 10, /* >=s (signed greater or equal) : N=V */ + ARMCondLT = 11, /* s (signed greater) : Z=0 && N=V */ + ARMCondLE = 13, /* <=s (signed less or equal) : Z=1 || N!=V */ + + ARMCondAL = 14, /* always (unconditional) : */ + ARMCondNV = 15 /* never (basically undefined meaning) : */ + /* NB: ARM have deprecated the use of the NV condition code + - you are now supposed to use MOV R0,R0 as a noop + rather than MOVNV R0,R0 as was previously recommended. + Future processors may have the NV condition code reused to do other things. */ + } + ARMCondcode; + + #endif /* ndef __VEX_GUEST_ARM_DEFS_H */ + + /*---------------------------------------------------------------*/ + /*--- end guest_arm_defs.h ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_arm_helpers.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_arm_helpers.c Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,454 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_arm_helpers.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + #include "libvex_basictypes.h" + #include "libvex_guest_arm.h" + #include "libvex_ir.h" + #include "libvex.h" + + #include "main_util.h" + #include "guest_arm_defs.h" + + + /* This file contains helper functions for arm guest code. + Calls to these functions are generated by the back end. + These calls are of course in the host machine code and + this file will be compiled to host machine code, so that + all makes sense. + + Only change the signatures of these helper functions very + carefully. If you change the signature here, you'll have to change + the parameters passed to it in the IR calls constructed by + guest-arm/toIR.c. + */ + + + + + + + + #define BORROWFROM() \ + { \ + } + #define OVERFLOWFROM() \ + { \ + } + + + /*-------------------------------------------------------------*/ + /* + LOGIC: EOR, AND, TST, TEQ, MOV, ORR, MVN, BIC + ---------------- + n: Rd[31] + z: Rd==0 ? 1:0 + c: shifter_carry_out + v: unaffected + */ + #define ACTIONS_LOGIC() \ + { \ + { Int nf, zf, cf, vf; \ + Int oldV=0; /* CAB: vf unaffected: what todo? */ \ + nf = cc_dep1_formal & ARMG_CC_MASK_N; \ + zf = cc_dep1_formal == 0 ? 1 : 0; \ + cf = (cc_dep2_formal << ARMG_CC_SHIFT_C) & ARMG_CC_MASK_C; \ + vf = oldV & ARMG_CC_MASK_V; \ + return nf | zf | cf | vf; \ + } \ + } + + /*-------------------------------------------------------------*/ + /* + ADD: ADD, CMN + ---------------- + n: Rd[31] + z: Rd==0 ? 1:0 + c: CarryFrom(Rn + shifter_op) + v: OverflowFrom(Rn + shifter_op) + */ + #define ACTIONS_ADD() \ + { \ + { Int nf, zf, cf, vf; \ + Int argL, argR, res; \ + argL = cc_dep1_formal; \ + argR = cc_dep2_formal; \ + res = argL + argR; \ + nf = res & ARMG_CC_MASK_N; \ + zf = (res == 0) << ARMG_CC_SHIFT_Z; \ + cf = ((UInt)argL < (UInt)argR) << ARMG_CC_SHIFT_C; \ + vf = (((argL ^ argR ^ -1) & (argL ^ res)) >> \ + (32 - ARMG_CC_SHIFT_V)) & ARMG_CC_MASK_V; \ + return nf | zf | cf | vf; \ + } \ + } + + /*-------------------------------------------------------------*/ + /* + SUB: SUB, CMP, RSB + ---------------- + n: Rd[31] + z: Rd==0 ? 1:0 + c: NOT BorrowFrom(Rn - shifter_op) + v: OverflowFrom(Rn - shifter_op) + */ + // CAB: cf right? ARM ARM A4-99 + #define ACTIONS_SUB() \ + { \ + { Int nf, zf, cf, vf; \ + Int argL, argR, res; \ + argL = cc_dep1_formal; \ + argR = cc_dep2_formal; \ + res = argL - argR; \ + nf = res & ARMG_CC_MASK_N; \ + zf = (res == 0) << ARMG_CC_SHIFT_Z; \ + cf = (~((UInt)argL < (UInt)argR) << \ + ARMG_CC_SHIFT_C) & ARMG_CC_MASK_C; \ + vf = (((argL ^ argR ^ -1) & (argL ^ res)) >> \ + (32 - ARMG_CC_SHIFT_V)) & ARMG_CC_MASK_V; \ + return nf | zf | cf | vf; \ + } \ + } + + + /*-------------------------------------------------------------*/ + /* + ADC + ---------------- + n: Rd[31] + z: Rd==0 ? 1:0 + c: CarryFrom(Rn + shifter_op + C Flag) + v: OverflowFrom(Rn + shifter_op + C Flag) + */ + + /*-------------------------------------------------------------*/ + /* + RSC + ---------------- + n: Rd[31] + z: Rd==0 ? 1:0 + c: NOT BorrowFrom(shifter_op - Rn - NOT(C Flag)) + v: OverflowFrom(shifter_op - Rn - NOT(C Flag)) + */ + + /*-------------------------------------------------------------*/ + /* + SBC + ---------------- + n: Rd[31] + z: Rd==0 ? 1:0 + c: NOT BorrowFrom(Rn - shifter_op - NOT(C Flag)) + v: OverflowFrom(Rn - shifter_op - NOT(C Flag)) + */ + + + + + + + + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate all the 4 flags from the supplied thunk parameters. */ + UInt armg_calculate_flags_all ( UInt cc_op, + UInt cc_dep1_formal, + UInt cc_dep2_formal ) + { + switch (cc_op) { + case ARMG_CC_OP_LOGIC: ACTIONS_LOGIC(); + case ARMG_CC_OP_ADD: ACTIONS_ADD(); + case ARMG_CC_OP_SUB: ACTIONS_SUB(); + + default: + /* shouldn't really make these calls from generated code */ + vex_printf("armg_calculate_flags_all(ARM)( %u, 0x%x, 0x%x )\n", + cc_op, cc_dep1_formal, cc_dep2_formal ); + vpanic("armg_calculate_flags_all(ARM)"); + } + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate just the carry flag from the supplied thunk parameters. */ + UInt armg_calculate_flags_c ( UInt cc_op, + UInt cc_dep1, + UInt cc_dep2 ) + { + /* Fast-case some common ones. */ + switch (cc_op) { + default: + break; + } + return armg_calculate_flags_all(cc_op,cc_dep1,cc_dep2) & ARMG_CC_MASK_C; + } + + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* returns 1 or 0 */ + /*static*/ + UInt armg_calculate_condition ( UInt/*ARMCondcode*/ cond, + UInt cc_op, + UInt cc_dep1, + UInt cc_dep2 ) + { + UInt nf,zf,vf,cf; + UInt inv = cond & 1; + + UInt nzvc = armg_calculate_flags_all(cc_op, cc_dep1, cc_dep2); + + switch (cond) { + case ARMCondEQ: // Z=1 => z + case ARMCondNE: // Z=0 + zf = nzvc >> ARMG_CC_SHIFT_Z; + return 1 & (inv ^ zf); + + case ARMCondHS: // C=1 => c + case ARMCondLO: // C=0 + cf = nzvc >> ARMG_CC_SHIFT_C; + return 1 & (inv ^ cf); + + case ARMCondMI: // N=1 => n + case ARMCondPL: // N=0 + nf = nzvc >> ARMG_CC_SHIFT_N; + return 1 & (inv ^ nf); + + case ARMCondVS: // V=1 => v + case ARMCondVC: // V=0 + vf = nzvc >> ARMG_CC_SHIFT_V; + return 1 & (inv ^ vf); + + case ARMCondHI: // C=1 && Z=0 => c & ~z + case ARMCondLS: // C=0 || Z=1 + cf = nzvc >> ARMG_CC_SHIFT_C; + zf = nzvc >> ARMG_CC_SHIFT_Z; + return 1 & (inv ^ (cf & ~zf)); + + case ARMCondGE: // N=V => ~(n^v) + case ARMCondLT: // N!=V + nf = nzvc >> ARMG_CC_SHIFT_N; + vf = nzvc >> ARMG_CC_SHIFT_V; + return 1 & (inv ^ ~(nf ^ vf)); + + case ARMCondGT: // Z=0 && N=V => (~z & ~(n^v) => ~(z | (n^v) + case ARMCondLE: // Z=1 || N!=V + nf = nzvc >> ARMG_CC_SHIFT_N; + vf = nzvc >> ARMG_CC_SHIFT_V; + zf = nzvc >> ARMG_CC_SHIFT_Z; + return 1 & (inv ^ ~(zf | (nf ^ vf))); + + case ARMCondAL: // should never get here: Always => no flags to calc + case ARMCondNV: // should never get here: Illegal instr + default: + /* shouldn't really make these calls from generated code */ + vex_printf("armg_calculate_condition(ARM)( %u, %u, 0x%x, 0x%x )\n", + cond, cc_op, cc_dep1, cc_dep2 ); + vpanic("armg_calculate_condition(ARM)"); + } + } + + + /* Used by the optimiser to try specialisations. Returns an + equivalent expression, or NULL if none. */ + + #if 0 + /* temporarily unused */ + static Bool isU32 ( IRExpr* e, UInt n ) + { + return (e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U32 + && e->Iex.Const.con->Ico.U32 == n); + } + #endif + IRExpr* guest_arm_spechelper ( HChar* function_name, + IRExpr** args ) + { + return NULL; + } + + + /*----------------------------------------------*/ + /*--- The exported fns .. ---*/ + /*----------------------------------------------*/ + + /* VISIBLE TO LIBVEX CLIENT */ + #if 0 + void LibVEX_GuestARM_put_flags ( UInt flags_native, + /*OUT*/VexGuestARMState* vex_state ) + { + vassert(0); // FIXME + + /* Mask out everything except N Z V C. */ + flags_native + &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C); + + vex_state->guest_CC_OP = ARMG_CC_OP_COPY; + vex_state->guest_CC_DEP1 = flags_native; + vex_state->guest_CC_DEP2 = 0; + } + #endif + + /* VISIBLE TO LIBVEX CLIENT */ + UInt LibVEX_GuestARM_get_flags ( /*IN*/VexGuestARMState* vex_state ) + { + UInt flags; + vassert(0); // FIXME + + flags = armg_calculate_flags_all( + vex_state->guest_CC_OP, + vex_state->guest_CC_DEP1, + vex_state->guest_CC_DEP2 + ); + return flags; + } + + /* VISIBLE TO LIBVEX CLIENT */ + void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state ) + { + vex_state->guest_R0 = 0; + vex_state->guest_R1 = 0; + vex_state->guest_R2 = 0; + vex_state->guest_R3 = 0; + vex_state->guest_R4 = 0; + vex_state->guest_R5 = 0; + vex_state->guest_R6 = 0; + vex_state->guest_R7 = 0; + vex_state->guest_R8 = 0; + vex_state->guest_R9 = 0; + vex_state->guest_R10 = 0; + vex_state->guest_R11 = 0; + vex_state->guest_R12 = 0; + vex_state->guest_R13 = 0; + vex_state->guest_R14 = 0; + vex_state->guest_R15 = 0; + + // CAB: Want this? + //vex_state->guest_SYSCALLNO = 0; + + vex_state->guest_CC_OP = 0;// CAB: ? ARMG_CC_OP_COPY; + vex_state->guest_CC_DEP1 = 0; + vex_state->guest_CC_DEP2 = 0; + + // CAB: Want this? + //vex_state->guest_EMWARN = 0; + + vex_state->guest_SYSCALLNO = 0; + } + + + /*-----------------------------------------------------------*/ + /*--- Describing the arm guest state, for the benefit ---*/ + /*--- of iropt and instrumenters. ---*/ + /*-----------------------------------------------------------*/ + + /* Figure out if any part of the guest state contained in minoff + .. maxoff requires precise memory exceptions. If in doubt return + True (but this is generates significantly slower code). + + We enforce precise exns for guest %ESP and %EIP only. + */ + Bool guest_arm_state_requires_precise_mem_exns ( Int minoff, + Int maxoff) + { + return True; // FIXME (also comment above) + #if 0 + Int esp_min = offsetof(VexGuestX86State, guest_ESP); + Int esp_max = esp_min + 4 - 1; + Int eip_min = offsetof(VexGuestX86State, guest_EIP); + Int eip_max = eip_min + 4 - 1; + + if (maxoff < esp_min || minoff > esp_max) { + /* no overlap with esp */ + } else { + return True; + } + + if (maxoff < eip_min || minoff > eip_max) { + /* no overlap with eip */ + } else { + return True; + } + + return False; + #endif + } + + + + #define ALWAYSDEFD(field) \ + { offsetof(VexGuestARMState, field), \ + (sizeof ((VexGuestARMState*)0)->field) } + + VexGuestLayout + armGuest_layout + = { + /* Total size of the guest state, in bytes. */ + .total_sizeB = sizeof(VexGuestARMState), + + /* Describe the stack pointer. */ + .offset_SP = offsetof(VexGuestARMState,guest_R13), + .sizeof_SP = 4, + + /* Describe the instruction pointer. */ + .offset_IP = offsetof(VexGuestARMState,guest_R15), + .sizeof_IP = 4, + + /* Describe any sections to be regarded by Memcheck as + 'always-defined'. */ + .n_alwaysDefd = 2, + /* flags thunk: OP is always defd, whereas DEP1 and DEP2 + have to be tracked. See detailed comment in gdefs.h on + meaning of thunk fields. */ + + .alwaysDefd + = { /* 0 */ ALWAYSDEFD(guest_CC_OP), + /* 1 */ ALWAYSDEFD(guest_SYSCALLNO) + } + }; + + + /*---------------------------------------------------------------*/ + /*--- end guest_arm_helpers.c ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_arm_toIR.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_arm_toIR.c Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,2192 ---- + + /*--------------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_arm_toIR.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*--------------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* Translates ARM(v4) code to IR. */ + + #include "libvex_basictypes.h" + #include "libvex_ir.h" + #include "libvex.h" + #include "libvex_guest_arm.h" + + #include "main_util.h" + #include "main_globals.h" + #include "guest_arm_defs.h" + + + /*------------------------------------------------------------*/ + /*--- Globals ---*/ + /*------------------------------------------------------------*/ + + /* These are set at the start of the translation of a BB, so that we + don't have to pass them around endlessly. CONST means does not + change during translation of a bb. + */ + + /* We need to know this to do sub-register accesses correctly. */ + /* CONST */ + static Bool host_is_bigendian; + + /* Pointer to the guest code area. */ + /* CONST */ + static UChar* guest_code; + + /* The guest address corresponding to guest_code[0]. */ + /* CONST */ + static Addr32 guest_pc_bbstart; + + /* The IRSB* into which we're generating code. */ + static IRSB* irsb; + + + /*------------------------------------------------------------*/ + /*--- Debugging output ---*/ + /*------------------------------------------------------------*/ + + #define DIP(format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_printf(format, ## args) + + #define DIS(buf, format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_sprintf(buf, format, ## args) + + + + + /*------------------------------------------------------------*/ + /*--- Offsets of various parts of the arm guest state. ---*/ + /*------------------------------------------------------------*/ + + #define OFFB_R0 offsetof(VexGuestARMState,guest_R0) + #define OFFB_R1 offsetof(VexGuestARMState,guest_R1) + #define OFFB_R2 offsetof(VexGuestARMState,guest_R2) + #define OFFB_R3 offsetof(VexGuestARMState,guest_R3) + #define OFFB_R4 offsetof(VexGuestARMState,guest_R4) + #define OFFB_R5 offsetof(VexGuestARMState,guest_R5) + #define OFFB_R6 offsetof(VexGuestARMState,guest_R6) + #define OFFB_R7 offsetof(VexGuestARMState,guest_R7) + #define OFFB_R8 offsetof(VexGuestARMState,guest_R8) + #define OFFB_R9 offsetof(VexGuestARMState,guest_R9) + #define OFFB_R10 offsetof(VexGuestARMState,guest_R10) + #define OFFB_R11 offsetof(VexGuestARMState,guest_R11) + #define OFFB_R12 offsetof(VexGuestARMState,guest_R12) + #define OFFB_R13 offsetof(VexGuestARMState,guest_R13) + #define OFFB_R14 offsetof(VexGuestARMState,guest_R14) + #define OFFB_R15 offsetof(VexGuestARMState,guest_R15) + + // CAB: ? guest_SYSCALLNO; + + #define OFFB_CC_OP offsetof(VexGuestARMState,guest_CC_OP) + #define OFFB_CC_DEP1 offsetof(VexGuestARMState,guest_CC_DEP1) + #define OFFB_CC_DEP2 offsetof(VexGuestARMState,guest_CC_DEP2) + + // CAB: ? guest_EMWARN; + + + /*------------------------------------------------------------*/ + /*--- Disassemble an entire basic block ---*/ + /*------------------------------------------------------------*/ + + /* The results of disassembling an instruction. There are three + possible outcomes. For Dis_Resteer, the disassembler _must_ + continue at the specified address. For Dis_StopHere, the + disassembler _must_ terminate the BB. For Dis_Continue, we may at + our option either disassemble the next insn, or terminate the BB; + but in the latter case we must set the bb's ->next field to point + to the next instruction. */ + + typedef + enum { + Dis_StopHere, /* this insn terminates the BB; we must stop. */ + Dis_Continue, /* we can optionally continue into the next insn */ + Dis_Resteer /* followed a branch; continue at the spec'd addr */ + } + DisResult; + + + /* forward decls .. */ + static IRExpr* mkU32 ( UInt i ); + static void stmt ( IRStmt* st ); + + + /* disInstr disassembles an instruction located at &guest_code[delta], + and sets *size to its size. If the returned value is Dis_Resteer, + the next guest address is assigned to *whereNext. disInstr is not + permitted to return Dis_Resteer if either (1) resteerOK is False, + or (2) resteerOkFn, when applied to the address which it wishes to + resteer into, returns False. */ + + static DisResult disInstr ( /*IN*/ Bool resteerOK, + /*IN*/ Bool (*resteerOkFn) ( Addr64 ), + /*IN*/ Long delta, + /*OUT*/ Int* size, + /*OUT*/ Addr64* whereNext ); + + + /* This is the main (only, in fact) entry point for this module. */ + + /* Disassemble a complete basic block, starting at guest_pc_start, and + dumping the IR into global irsb. Returns the size, in bytes, of + the basic block. + */ + IRSB* bbToIR_ARM ( UChar* armCode, + Addr64 guest_pc_start, + VexGuestExtents* vge, + Bool (*byte_accessible)(Addr64), + Bool (*chase_into_ok)(Addr64), + Bool host_bigendian, + VexArchInfo* archinfo_guest ) + { + Long delta; + Int i, n_instrs, size, first_stmt_idx; + Addr64 guest_next; + Bool resteerOK; + DisResult dres; + static Int n_resteers = 0; + Int d_resteers = 0; + + /* check sanity .. */ + vassert(vex_control.guest_max_insns >= 1); + vassert(vex_control.guest_max_insns < 500); + vassert(vex_control.guest_chase_thresh >= 0); + vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns); + + vassert(archinfo_guest->hwcaps == 0); + + /* Start a new, empty extent. */ + vge->n_used = 1; + vge->base[0] = guest_pc_start; + vge->len[0] = 0; + + /* Set up globals. */ + host_is_bigendian = host_bigendian; + guest_code = armCode; + guest_pc_bbstart = (Addr32)guest_pc_start; + irsb = emptyIRSB(); + + vassert((guest_pc_start >> 32) == 0); + + /* Delta keeps track of how far along the armCode array we + have so far gone. */ + delta = 0; + n_instrs = 0; + + while (True) { + vassert(n_instrs < vex_control.guest_max_insns); + + guest_next = 0; + resteerOK = toBool(n_instrs < vex_control.guest_chase_thresh); + first_stmt_idx = irsb->stmts_used; + + if (n_instrs > 0) { + /* for the first insn, the dispatch loop will have set + R15, but for all the others we have to do it ourselves. */ + stmt( IRStmt_Put( OFFB_R15, mkU32(toUInt(guest_pc_bbstart + delta))) ); + } + + dres = disInstr( resteerOK, chase_into_ok, + delta, &size, &guest_next ); + + /* Print the resulting IR, if needed. */ + if (vex_traceflags & VEX_TRACE_FE) { + for (i = first_stmt_idx; i < irsb->stmts_used; i++) { + vex_printf(" "); + ppIRStmt(irsb->stmts[i]); + vex_printf("\n"); + } + } + + if (dres == Dis_StopHere) { + vassert(irsb->next != NULL); + if (vex_traceflags & VEX_TRACE_FE) { + vex_printf(" "); + vex_printf( "goto {"); + ppIRJumpKind(irsb->jumpkind); + vex_printf( "} "); + ppIRExpr( irsb->next ); + vex_printf( "\n"); + } + } + + delta += size; + vge->len[vge->n_used-1] = toUShort(vge->len[vge->n_used-1] + size); + n_instrs++; + DIP("\n"); + + vassert(size > 0 && size <= 18); + if (!resteerOK) + vassert(dres != Dis_Resteer); + if (dres != Dis_Resteer) + vassert(guest_next == 0); + + switch (dres) { + case Dis_Continue: + vassert(irsb->next == NULL); + if (n_instrs < vex_control.guest_max_insns) { + /* keep going */ + } else { + irsb->next = mkU32(toUInt(guest_pc_start+delta)); + return irsb; + } + break; + case Dis_StopHere: + vassert(irsb->next != NULL); + return irsb; + case Dis_Resteer: + vpanic("bbToIR_ARM: Dis_Resteer: fixme"); + /* need to add code here to start a new extent ... */ + vassert(irsb->next == NULL); + /* figure out a new delta to continue at. */ + vassert(chase_into_ok(guest_next)); + delta = guest_next - guest_pc_start; + n_resteers++; + d_resteers++; + if (0 && (n_resteers & 0xFF) == 0) + vex_printf("resteer[%d,%d] to %p (delta = %lld)\n", + n_resteers, d_resteers, + ULong_to_Ptr(guest_next), delta); + break; + } + } + } + + + /*------------------------------------------------------------*/ + /*--- Helper bits and pieces for deconstructing the ---*/ + /*--- ARM insn stream. ---*/ + /*------------------------------------------------------------*/ + + /* Add a statement to the list held by "irsb". */ + static void stmt ( IRStmt* st ) + { + addStmtToIRSB( irsb, st ); + } + + /* Generate a new temporary of the given type. */ + static IRTemp newTemp ( IRType ty ) + { + vassert(isPlausibleIRType(ty)); + return newIRTemp( irsb->tyenv, ty ); + } + + #if 0 + /* Bomb out if we can't handle something. */ + __attribute__ ((noreturn)) + static void unimplemented ( Char* str ) + { + vex_printf("armToIR: unimplemented feature\n"); + vpanic(str); + } + #endif + + /* Various simple conversions */ + + #if 0 + static UInt extend_s_8to32( UInt x ) + { + return (UInt)((((Int)x) << 24) >> 24); + } + + static UInt extend_s_16to32 ( UInt x ) + { + return (UInt)((((Int)x) << 16) >> 16); + } + #endif + + static UInt extend_s_24to32 ( UInt x ) + { + return (UInt)((((Int)x) << 8) >> 8); + } + + #if 0 + /* Fetch a byte from the guest insn stream. */ + static UChar getIByte ( UInt delta ) + { + return guest_code[delta]; + } + #endif + + /* Get a 8/16/32-bit unsigned value out of the insn stream. */ + + #if 0 + static UInt getUChar ( UInt delta ) + { + UInt v = guest_code[delta+0]; + return v & 0xFF; + } + #endif + + #if 0 + static UInt getUDisp16 ( UInt delta ) + { + UInt v = guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return v & 0xFFFF; + } + #endif + + #if 0 + static UInt getUDisp32 ( UInt delta ) + { + UInt v = guest_code[delta+3]; v <<= 8; + v |= guest_code[delta+2]; v <<= 8; + v |= guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return v; + } + #endif + + #if 0 + static UInt getUDisp ( Int size, UInt delta ) + { + switch (size) { + case 4: return getUDisp32(delta); + case 2: return getUDisp16(delta); + case 1: return getUChar(delta); + default: vpanic("getUDisp(ARM)"); + } + return 0; /*notreached*/ + } + #endif + + #if 0 + /* Get a byte value out of the insn stream and sign-extend to 32 + bits. */ + static UInt getSDisp8 ( UInt delta ) + { + return extend_s_8to32( (UInt) (guest_code[delta]) ); + } + #endif + + #if 0 + static UInt getSDisp16 ( UInt delta0 ) + { + UChar* eip = (UChar*)(&guest_code[delta0]); + UInt d = *eip++; + d |= ((*eip++) << 8); + return extend_s_16to32(d); + } + #endif + + #if 0 + static UInt getSDisp ( Int size, UInt delta ) + { + switch (size) { + case 4: return getUDisp32(delta); + case 2: return getSDisp16(delta); + case 1: return getSDisp8(delta); + default: vpanic("getSDisp(ARM)"); + } + return 0; /*notreached*/ + } + #endif + + + /*------------------------------------------------------------*/ + /*--- Helpers for constructing IR. ---*/ + /*------------------------------------------------------------*/ + + /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit + register references, we need to take the host endianness into + account. Supplied value is 0 .. 7 and in the Intel instruction + encoding. */ + + #if 0 + static IRType szToITy ( Int n ) + { + switch (n) { + case 1: return Ity_I8; + case 2: return Ity_I16; + case 4: return Ity_I32; + default: vpanic("szToITy(ARM)"); + } + } + #endif + + static Int integerGuestRegOffset ( UInt archreg ) + { + vassert(archreg < 16); + + vassert(!host_is_bigendian); //TODO: is this necessary? + // jrs: probably not; only matters if we reference sub-parts + // of the arm registers, but that isn't the case + switch (archreg) { + case 0: return offsetof(VexGuestARMState,guest_R0); + case 1: return offsetof(VexGuestARMState,guest_R1); + case 2: return offsetof(VexGuestARMState,guest_R2); + case 3: return offsetof(VexGuestARMState,guest_R3); + case 4: return offsetof(VexGuestARMState,guest_R4); + case 5: return offsetof(VexGuestARMState,guest_R5); + case 6: return offsetof(VexGuestARMState,guest_R6); + case 7: return offsetof(VexGuestARMState,guest_R7); + case 8: return offsetof(VexGuestARMState,guest_R8); + case 9: return offsetof(VexGuestARMState,guest_R9); + case 10: return offsetof(VexGuestARMState,guest_R10); + case 11: return offsetof(VexGuestARMState,guest_R11); + case 12: return offsetof(VexGuestARMState,guest_R12); + case 13: return offsetof(VexGuestARMState,guest_R13); + case 14: return offsetof(VexGuestARMState,guest_R14); + case 15: return offsetof(VexGuestARMState,guest_R15); + } + + vpanic("integerGuestRegOffset(arm,le)"); /*notreached*/ + } + + static IRExpr* getIReg ( UInt archreg ) + { + vassert(archreg < 16); + return IRExpr_Get( integerGuestRegOffset(archreg), Ity_I32 ); + } + + /* Ditto, but write to a reg instead. */ + static void putIReg ( UInt archreg, IRExpr* e ) + { + vassert(archreg < 16); + stmt( IRStmt_Put(integerGuestRegOffset(archreg), e) ); + } + + static void assign ( IRTemp dst, IRExpr* e ) + { + stmt( IRStmt_WrTmp(dst, e) ); + } + + static void storeLE ( IRExpr* addr, IRExpr* data ) + { + stmt( IRStmt_Store(Iend_LE, IRTemp_INVALID, addr, data) ); + } + + static IRExpr* unop ( IROp op, IRExpr* a ) + { + return IRExpr_Unop(op, a); + } + + static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) + { + return IRExpr_Binop(op, a1, a2); + } + + static IRExpr* mkexpr ( IRTemp tmp ) + { + return IRExpr_RdTmp(tmp); + } + + static IRExpr* mkU8 ( UChar i ) + { + return IRExpr_Const(IRConst_U8(i)); + } + + #if 0 + static IRExpr* mkU16 ( UInt i ) + { + vassert(i < 65536); + return IRExpr_Const(IRConst_U16(i)); + } + #endif + + static IRExpr* mkU32 ( UInt i ) + { + return IRExpr_Const(IRConst_U32(i)); + } + + #if 0 + static IRExpr* mkU ( IRType ty, UInt i ) + { + if (ty == Ity_I8) return mkU8(i); + if (ty == Ity_I16) return mkU16(i); + if (ty == Ity_I32) return mkU32(i); + /* If this panics, it usually means you passed a size (1,2,4) + value as the IRType, rather than a real IRType. */ + vpanic("mkU(ARM)"); + } + #endif + + static IRExpr* loadLE ( IRType ty, IRExpr* data ) + { + return IRExpr_Load(False, Iend_LE, ty, data); + } + + #if 0 + static IROp mkSizedOp ( IRType ty, IROp op8 ) + { + Int adj; + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 + || op8 == Iop_Mul8 + || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 + || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 + || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 + || op8 == Iop_Not8 ); + adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); + return adj + op8; + } + #endif + + #if 0 + static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd ) + { + if (szSmall == 1 && szBig == 4) { + return signd ? Iop_8Sto32 : Iop_8Uto32; + } + if (szSmall == 1 && szBig == 2) { + return signd ? Iop_8Sto16 : Iop_8Uto16; + } + if (szSmall == 2 && szBig == 4) { + return signd ? Iop_16Sto32 : Iop_16Uto32; + } + vpanic("mkWidenOp(ARM,guest)"); + } + #endif + + + + + + + + + + + + + + + /*------------------------------------------------------------*/ + /*--- Helpers for %flags. ---*/ + /*------------------------------------------------------------*/ + + /* -------------- Evaluating the flags-thunk. -------------- */ + + #if 0 + /* Build IR to calculate all the flags from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. + Returns an expression :: Ity_I32. */ + static IRExpr* mk_armg_calculate_flags_all ( void ) + { + IRExpr** args + = mkIRExprVec_3( IRExpr_Get(OFFB_CC_OP, Ity_I32), + IRExpr_Get(OFFB_CC_DEP1, Ity_I32), + IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ); + IRExpr* call + = mkIRExprCCall( + Ity_I32, + 0/*regparm*/, + "armg_calculate_flags_all", &armg_calculate_flags_all, + args + ); + + /* Exclude OP from definedness checking. We're only + interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = 1; + return call; + } + #endif + + /* Build IR to calculate just the carry flag from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */ + static IRExpr* mk_armg_calculate_flags_c ( void ) + { + IRExpr** args + = mkIRExprVec_3( IRExpr_Get(OFFB_CC_OP, Ity_I32), + IRExpr_Get(OFFB_CC_DEP1, Ity_I32), + IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ); + IRExpr* call + = mkIRExprCCall( + Ity_I32, + 0/*regparm*/, + "armg_calculate_flags_c", &armg_calculate_flags_c, + args + ); + /* Exclude OP from definedness checking. We're only + interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = 1; + return call; + } + + + /* Build IR to calculate some particular condition from stored + CC_OP/CC_DEP1/CC_DEP2. Returns an expression + of type Ity_I1. + */ + static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond ) + { + IRExpr** args + = mkIRExprVec_4( mkU32(cond), + IRExpr_Get(OFFB_CC_OP, Ity_I32), + IRExpr_Get(OFFB_CC_DEP1, Ity_I32), + IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ); + IRExpr* call + = mkIRExprCCall( + Ity_I32, + 0/*regparm*/, + "armg_calculate_condition", &armg_calculate_condition, + args + ); + + /* Exclude the requested condition and OP from definedness + checking. We're only interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1); + return unop(Iop_32to1, call); + } + + + + + + + + /* -------------- Building the flags-thunk. -------------- */ + + /* The machinery in this section builds the flag-thunk following a + flag-setting operation. Hence the various setFlags_* functions. + */ + + #if 0 + static Bool isAddSub ( IROp op8 ) + { + return op8 == Iop_Add8 || op8 == Iop_Sub8; + } + #endif + + #if 0 + static Bool isLogic ( IROp op8 ) + { + return op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8; + } + #endif + + /* U-widen 8/16/32 bit int expr to 32. */ + static IRExpr* widenUto32 ( IRExpr* e ) + { + switch (typeOfIRExpr(irsb->tyenv,e)) { + case Ity_I32: return e; + case Ity_I16: return unop(Iop_16Uto32,e); + case Ity_I8: return unop(Iop_8Uto32,e); + default: vpanic("widenUto32"); + } + } + + #if 0 + /* S-widen 8/16/32 bit int expr to 32. */ + static IRExpr* widenSto32 ( IRExpr* e ) + { + switch (typeOfIRExpr(irsb->tyenv,e)) { + case Ity_I32: return e; + case Ity_I16: return unop(Iop_16Sto32,e); + case Ity_I8: return unop(Iop_8Sto32,e); + default: vpanic("widenSto32"); + } + } + #endif + + /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some + of these combinations make sense. */ + static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) + { + IRType src_ty = typeOfIRExpr(irsb->tyenv,e); + if (src_ty == dst_ty) + return e; + if (src_ty == Ity_I32 && dst_ty == Ity_I16) + return unop(Iop_32to16, e); + if (src_ty == Ity_I32 && dst_ty == Ity_I8) + return unop(Iop_32to8, e); + + vex_printf("\nsrc, dst tys are: "); + ppIRType(src_ty); + vex_printf(", "); + ppIRType(dst_ty); + vex_printf("\n"); + vpanic("narrowTo(ARM)"); + } + + + /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is + auto-sized up to the real op. */ + + static + void setFlags_DEP1_DEP2 ( IROp op, IRTemp dep1, IRTemp dep2 ) + { + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(op)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) ); + } + + + /* Set the OP and DEP1 fields only, and write zero to DEP2. */ + + #if 0 + static + void setFlags_DEP1 ( IROp op, IRTemp dep1 ) + { + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(op)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); + } + #endif + + #if 0 + /* For shift operations, we put in the result and the undershifted + result. Except if the shift amount is zero, the thunk is left + unchanged. */ + + static void setFlags_DEP1_DEP2_shift ( IROp op, + IRTemp res, + IRTemp resUS, + IRTemp guard ) + { + vassert(guard); + + /* DEP1 contains the result, DEP2 contains the undershifted value. */ + stmt( IRStmt_Put( OFFB_CC_OP, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_OP,Ity_I32), + mkU32(op))) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_DEP1,Ity_I32), + widenUto32(mkexpr(res)))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_DEP2,Ity_I32), + widenUto32(mkexpr(resUS)))) ); + } + #endif + + + + + + #if 0 + /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the + two arguments. */ + + static + void setFlags_MUL ( IRTemp arg1, IRTemp arg2, UInt op ) + { + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(op) ) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) )); + } + #endif + + + + + + + + + + /* -------------- Condition codes. -------------- */ + + /* Condition codes, using the ARM encoding. */ + + static HChar* name_ARMCondcode ( ARMCondcode cond ) + { + switch (cond) { + case ARMCondEQ: return "{eq}"; + case ARMCondNE: return "{ne}"; + case ARMCondHS: return "{hs}"; // or 'cs' + case ARMCondLO: return "{lo}"; // or 'cc' + case ARMCondMI: return "{mi}"; + case ARMCondPL: return "{pl}"; + case ARMCondVS: return "{vs}"; + case ARMCondVC: return "{vc}"; + case ARMCondHI: return "{hi}"; + case ARMCondLS: return "{ls}"; + case ARMCondGE: return "{ge}"; + case ARMCondLT: return "{lt}"; + case ARMCondGT: return "{gt}"; + case ARMCondLE: return "{le}"; + case ARMCondAL: return ""; // {al}: default, doesn't need specifying + case ARMCondNV: return "{nv}"; + default: vpanic("name_ARMCondcode"); + } + } + + #if 0 + static + ARMCondcode positiveIse_ARMCondcode ( ARMCondcode cond, + Bool* needInvert ) + { + vassert(cond >= ARMCondEQ && cond <= ARMCondNV); + if (cond & 1) { + *needInvert = True; + return cond-1; + } else { + *needInvert = False; + return cond; + } + } + #endif + + + /* Addressing Mode 1 - DP ops + Addressing Mode 2 - Load/Store word/ubyte (scaled) + */ + static HChar* name_ARMShiftOp ( UChar shift_op, UChar imm_val ) + { + switch (shift_op) { + case 0x0: case 0x1: case 0x8: return "lsl"; + case 0x2: case 0x3: case 0xA: return "lsr"; + case 0x4: case 0x5: case 0xC: return "asr"; + case 0x6: return (imm_val==0) ? "rrx" : "ror"; + case 0x7: case 0xE: return "ror"; + default: vpanic("name_ARMShiftcode"); + } + } + + + /* Addressing Mode 4 - Load/Store Multiple */ + static HChar* name_ARMAddrMode4 ( UChar mode ) + { + /* See ARM ARM A5-55 for alternative names for stack operations + ldmfa (full ascending), etc. */ + switch (mode) { + case 0x0: return "da"; // Decrement after + case 0x1: return "ia"; // Increment after + case 0x2: return "db"; // Decrement before + case 0x3: return "ib"; // Increment before + default: vpanic("name_ARMAddrMode4"); + } + } + + /* Data Processing ops */ + static HChar* name_ARMDataProcOp ( UChar opc ) + { + switch (opc) { + case 0x0: return "and"; + case 0x1: return "eor"; + case 0x2: return "sub"; + case 0x3: return "rsb"; + case 0x4: return "add"; + case 0x5: return "adc"; + case 0x6: return "sbc"; + case 0x7: return "rsc"; + case 0x8: return "tst"; + case 0x9: return "teq"; + case 0xA: return "cmp"; + case 0xB: return "cmn"; + case 0xC: return "orr"; + case 0xD: return "mov"; + case 0xE: return "bic"; + case 0xF: return "mvn"; + default: vpanic("name_ARMDataProcOp"); + } + } + + + + /* + Addressing mode 4 - LOAD/STORE multiple, LDM|STM + ARM ARM A5-48 + */ + static + Bool dis_loadstore_mult ( UInt theInstr ) + { + UChar flags = toUChar((theInstr >> 20) & 0x1F); // theInstr[24:20] + UChar Rn_addr = toUChar((theInstr >> 16) & 0xF); + IRTemp Rn = newTemp(Ity_I32); + IRTemp Rn_orig = newTemp(Ity_I32); + UInt reg_list = theInstr & 0xFFFF; // each bit addresses a register: R15 to R0 + + // Load(1) | Store(0) + UChar L = toUChar((flags >> 0) & 1); + // (W)riteback Rn (incr(U=1) | decr(U=0) by n_bytes) + UChar W = toUChar((flags >> 1) & 1); + // Priviledged mode flag - *** CAB TODO *** + UChar S = toUChar((flags >> 2) & 1); + // Txfr ctl: Direction = upwards(1) | downwards(0) + UChar U = toUChar((flags >> 3) & 1); + // Txfr ctl: Rn within(P=1) | outside(P=0) accessed mem + UChar PU = toUChar((flags >> 3) & 3); + + IRTemp start_addr = newTemp(Ity_I32); + IRTemp end_addr = newTemp(Ity_I32); + IRTemp data=0; + UInt n_bytes=0; + UInt tmp_reg = reg_list; + UInt reg_idx, offset; + Bool decode_ok = True; + + HChar* cond_name = name_ARMCondcode( (theInstr >> 28) & 0xF ); + HChar reg_names[70]; + UInt buf_offset; + + while (tmp_reg > 0) { // Count num bits in reg_list => num_bytes + if (tmp_reg & 1) { n_bytes += 4; } + tmp_reg = tmp_reg >> 1; + } + + assign( Rn, getIReg(Rn_addr) ); + assign( Rn_orig, mkexpr(Rn) ); + + switch (PU) { // + case 0x0: // Decrement after (DA) + assign( start_addr, binop( Iop_Add32, mkexpr(Rn), mkU32(n_bytes + 4) ) ); + assign( end_addr, mkexpr(Rn) ); + break; + + case 0x1: // Increment after (IA) + assign( start_addr, mkexpr(Rn) ); + assign( end_addr, binop( Iop_Add32, mkexpr(Rn), mkU32(n_bytes - 4) ) ); + break; + + case 0x2: // Decrement before (DB) + assign( start_addr, binop( Iop_Sub32, mkexpr(Rn), mkU32(n_bytes) ) ); + assign( end_addr, binop( Iop_Sub32, mkexpr(Rn), mkU32(4) ) ); + break; + + case 0x3: // Increment before (IB) + assign( start_addr, binop( Iop_Add32, mkexpr(Rn), mkU32(4) ) ); + assign( end_addr, binop( Iop_Add32, mkexpr(Rn), mkU32(n_bytes) ) ); + break; + + default: + vex_printf("dis_loadstore_mult(ARM): No such case: 0x%x", PU); + return False; + } + + if (W==1) { + if (U==1) { // upwards + putIReg( Rn_addr, binop( Iop_Add32, mkexpr(Rn), mkU32(n_bytes) ) ); + } else { // downwards + putIReg( Rn_addr, binop( Iop_Sub32, mkexpr(Rn), mkU32(n_bytes) ) ); + } + } + + + /* + Loop through register list, LOAD/STORE indicated registers + Lowest numbered reg -> lowest address, so start with lowest register + reg_idx: guest register address + offset : current mem offset from start_addr + */ + reg_names[0] = '\0'; + buf_offset=0; + offset=0; + for (reg_idx=0; reg_idx < 16; reg_idx++) { + if (( reg_list >> reg_idx ) & 1) { // reg_list[i] == 1? + + if (L==1) { // LOAD Ri, (start_addr + offset) + + if (Rn_addr == reg_idx && W==1) { // Undefined - ARM ARM A4-31 + decode_ok=False; + break; + } + + assign( data, loadLE(Ity_I32, binop(Iop_Add32, + mkexpr(start_addr), + mkU32(offset))) ); + if (reg_idx == 15) { + // assuming architecture < 5: See ARM ARM A4-31 + putIReg( reg_idx, binop(Iop_And32, mkexpr(data), mkU32(0xFFFFFFFC)) ); + } else { + putIReg( reg_idx, mkexpr(data) ); + } + } else { // STORE Ri, (start_addr + offset) + + // ARM ARM A4-85 (Operand restrictions) + if (reg_idx == Rn_addr && W==1) { // Rn in reg_list && writeback + if (offset != 0) { // Undefined - See ARM ARM A4-85 + decode_ok=False; + break; + } + // is lowest reg in reg_list: store Rn_orig + storeLE( mkexpr(start_addr), mkexpr(Rn_orig) ); + } else { + storeLE( binop(Iop_Add32, mkexpr(start_addr), mkU32(offset) ), + getIReg(reg_idx) ); + } + } + offset += 4; + + reg_names[buf_offset++] = 'R'; + if (reg_idx > 9) { + reg_names[buf_offset++] = '1'; + reg_names[buf_offset++] = (HChar)toUChar(38 + reg_idx); + } else { + reg_names[buf_offset++] = (HChar)toUChar(48 + reg_idx); + } + reg_names[buf_offset++] = ','; + // CAB: Eugh! Where's strcpy?! + } + } + if (buf_offset > 0) { + reg_names[buf_offset-1] = '\0'; + } + DIP("%s%s%s R%d%s, {%s}%s\n", (L==1) ? "ldm":"stm", cond_name, + name_ARMAddrMode4( PU ), Rn_addr, (W==1) ? "!" : "", + reg_names, (S==1) ? "^" : ""); + + // CAB TODO: + // IR assert( end_addr == (start_addr + offset) - 8 ) + + if (offset == 0) { // Unpredictable - ARM ARM A5-21 + vex_printf("dis_loadstore_mult(arm): Unpredictable - offset==0\n"); + decode_ok = False; + } + + return decode_ok; + } + + + + + + static + Bool dis_loadstore_w_ub_address ( UInt theInstr, IRTemp* address, HChar* buf ) + { + UChar is_reg = toUChar((theInstr >> 25) & 0x1); + // immediate | register offset/index + UInt flags = (theInstr >> 20) & 0x3F; // theInstr[25:20] + UChar Rn_addr = toUChar((theInstr >> 16) & 0xF); + UChar Rm_addr = toUChar((theInstr >> 00) & 0xF); + UChar shift_op = toUChar((theInstr >> 04) & 0xFF); + UInt offset_12 = (theInstr >> 00) & 0xFFF; + IRTemp Rn = newTemp(Ity_I32); + IRTemp Rm = newTemp(Ity_I32); + UChar shift_imm, shift; + + UChar W = toUChar((flags >> 1) & 1); // base register writeback flag - See *Note + UChar U = toUChar((flags >> 3) & 1); // offset is added(1)|subtracted(0) from the base + UChar P = toUChar((flags >> 4) & 1); // addressing mode flag - See *Note + /* *Note + P==0: post-indexed addressing: addr -> Rn + W==0: normal mem access + W==1: unprivileged mem access + P==1: W==0: offset addressing: Rn not updated - ARM ARM A5-20 + W==1: pre-indexed addressing: addr -> Rn + */ + + IRTemp scaled_index = newTemp(Ity_I32); + IRTemp reg_offset = newTemp(Ity_I32); + + IRTemp oldFlagC = newTemp(Ity_I32); + + HChar buf2[30]; + HChar buf3[20]; + buf3[0] = '\0'; + + if (Rn_addr == 15) { + if (P==1 && W==0) { // offset addressing + // CAB: This right? + assign( Rn, binop(Iop_And32, getIReg(15), mkU32(8)) ); + } else { // Unpredictable - ARM ARM A5-25,29... + vex_printf("dis_loadstore_w_ub_address(arm): Unpredictable - Rn_addr==15\n"); + return False; + } + } else { + assign( Rn, getIReg(Rn_addr) ); + } + + /* + Retrieve / Calculate reg_offset + */ + if (is_reg) { + if (Rm_addr == 15) { // Unpredictable - ARM ARM A5-21 + vex_printf("dis_loadstore_w_ub_address(arm): Unpredictable - Rm_addr==15\n"); + return False; + } + if (P==0 || W==1) { // pre|post-indexed addressing + if (Rm_addr == Rn_addr) { // Unpredictable - ARM ARM A5-25 + vex_printf("dis_loadstore_w_ub_address(arm): Unpredictable - Rm_addr==Rn_addr\n"); + return False; + } + } + assign( Rm, getIReg(Rm_addr) ); + + if (shift_op == 0) { // Register addressing + assign( reg_offset, mkexpr(Rm) ); + } else { // Scaled Register addressing + shift_imm = toUChar((shift_op >> 3) & 0x1F); + shift = toUChar((shift_op >> 1) & 0x3); + + switch (shift) { + case 0x0: // LSL + assign( scaled_index, binop(Iop_Shl32, mkexpr(Rm), mkU8(shift_imm)) ); + break; + + case 0x1: // LSR + if (shift_imm) { + assign( scaled_index, binop(Iop_Shr32, mkexpr(Rm), mkU8(shift_imm)) ); + } else { + assign( scaled_index, mkU32(0) ); + } + break; + + case 0x2: // ASR + if (shift_imm) { + assign( scaled_index, binop(Iop_Sar32, mkexpr(Rm), mkU32(shift_imm)) ); + } else { + assign( scaled_index, // Rm[31] ? 0xFFFFFFFF : 0x0 + IRExpr_Mux0X(binop(Iop_And32, mkexpr(Rm), mkU32(0x8FFFFFFF)), + mkexpr(0x0), mkexpr(0xFFFFFFFF)) ); + } + break; + + case 0x3: // ROR|RRX + assign( oldFlagC, binop(Iop_Shr32, + mk_armg_calculate_flags_c(), + mkU8(ARMG_CC_SHIFT_C)) ); + + if (shift_imm == 0) { // RRX (ARM ARM A5-17) + // 33 bit ROR using carry flag as the 33rd bit + // op = Rm >> 1, carry flag replacing vacated bit position. + // scaled_index = (c_flag << 31) | (Rm >> 1) + assign( scaled_index, binop(Iop_Or32, + binop(Iop_Shl32, mkexpr(oldFlagC), mkU32(31)), + binop(Iop_Shr32, mkexpr(Rm), mkU8(1))) ); + + } else { // ROR + // scaled_index = Rm ROR shift_imm + // = (Rm >> shift_imm) | (Rm << (32-shift_imm)) + assign( scaled_index, + binop(Iop_Or32, + binop(Iop_Shr32, mkexpr(Rm), mkU8(shift_imm)), + binop(Iop_Shl32, mkexpr(Rm), + binop(Iop_Sub8, mkU8(32), mkU32(shift_imm)))) ); + } + break; + + default: + vex_printf("dis_loadstore_w_ub(ARM): No such case: 0x%x", shift); + return False; + } + assign( reg_offset, mkexpr(scaled_index) ); + + if (shift == 0x3 && shift_imm == 0) { + DIS(buf3, ", %s", name_ARMShiftOp(toUChar(shift_op * 2), shift_imm)); + } else { + DIS(buf3, ", %s #%d", + name_ARMShiftOp(toUChar(shift_op * 2), shift_imm), + shift_imm); + } + } + DIS(buf2, "%cR%d%s", (U==1) ? '+' : '-', Rm_addr, buf3); + } else { // immediate + assign( reg_offset, mkU32(offset_12) ); + + DIS(buf2, "#%c%u", (U==1) ? '+' : '-', offset_12); + } + DIS(buf, "[R%d%s, %s%s", Rn_addr, + (P==0) ? "]" : "", buf2, + (P==1) ? ((W==1) ? "]!" : "]") : ""); + + /* + Depending on P,U,W, write to Rn and set address to load/store + */ + if (P==1) { // offset | pre-indexed addressing + if (U == 1) { // - increment + assign( *address, binop(Iop_Add32, mkexpr(Rn), mkexpr(reg_offset)) ); + } else { // - decrement + assign( *address, binop(Iop_Sub32, mkexpr(Rn), mkexpr(reg_offset)) ); + } + if (W == 1) { // pre-indexed addressing, base register writeback + putIReg( Rn_addr, mkexpr(*address) ); + } + } else { // post-indexed addressing + assign( *address, mkexpr(Rn) ); + if (U == 1) { // - increment + putIReg( Rn_addr, binop( Iop_Add32, mkexpr(Rn), mkexpr(reg_offset) ) ); + } else { // - decrement + putIReg( Rn_addr, binop( Iop_Sub32, mkexpr(Rn), mkexpr(reg_offset) ) ); + } + } + return True; + } + + + + + /* + Addressing mode 2 - LOAD/STORE word or unsigned byte + ARM ARM A5-18 + */ + static + Bool dis_loadstore_w_ub ( UInt theInstr ) + { + UInt flags = (theInstr >> 20) & 0x3F; // theInstr[25:20] + UChar Rn_addr = toUChar((theInstr >> 16) & 0xF); + UChar Rd_addr = toUChar((theInstr >> 12) & 0xF); + IRTemp address = newTemp(Ity_I32); + + UChar L = toUChar((flags >> 0) & 1); // Load(1) | Store(0) + UChar W = toUChar((flags >> 1) & 1); // base register writeback + UChar B = toUChar((flags >> 2) & 1); // access = unsigned byte(1) | word(0) + + IRTemp value = newTemp(Ity_I32); + IRTemp data = newTemp(Ity_I32); + IRTemp data_ror8 = newTemp(Ity_I32); + IRTemp data_ror16 = newTemp(Ity_I32); + IRTemp data_ror24 = newTemp(Ity_I32); + IRExpr* expr_addr_10; + HChar* cond_name = name_ARMCondcode( (theInstr >> 28) & 0xF ); + HChar dis_buf[50]; + + + vassert(((theInstr >> 26) & 0x3) == 0x1); + + // Get the address to load/store + if (!dis_loadstore_w_ub_address(theInstr, &address, dis_buf)) { return False; } + + DIP("%s%s%s R%d, %s\n", (L==1) ? "ldr" : "str", cond_name, + (B==1) ? "b" : "", Rd_addr, dis_buf); + + if (Rd_addr == Rn_addr && W==1) { // Unpredictable - ARM ARM A4-39,41,89,91 + vex_printf("dis_loadstore_w_ub(arm): Unpredictable - Rd_addr==Rn_addr\n"); + return False; + } + + /* + LOAD/STORE Rd, address + */ + if (L==1) { // LOAD + if (B==1) { // unsigned byte (LDRB): ARM ARM A4-40 + if (Rd_addr == 15) { // Unpredictable - ARM ARM A4-40 + vex_printf("dis_loadstore_w_ub(arm): Unpredictable - Rd_addr==15\n"); + return False; + } + putIReg( Rd_addr, loadLE( Ity_I8, mkexpr( address ) ) ); + } + else { // word (LDR): ARM ARM A4-38 + expr_addr_10 = binop(Iop_And32, mkexpr(address), mkU32(0x3)); + + /* + CAB TODO + if (Rd_addr == 15 && address[1:0] == 0) => Unpredictable + How to bomb out using IR? + */ + + /* LOAD memory data (4 bytes) */ + assign( data, loadLE( Ity_I32, mkexpr( address ) ) ); + + // data ROR 8 + assign( data_ror8, binop(Iop_Sub8, mkU8(32), mkU32(8)) ); + assign( data_ror8, + binop( Iop_Or32, + binop( Iop_Shr32, mkexpr(data), mkU8(8) ), + binop( Iop_Shl32, mkexpr(data), mkexpr(data_ror8) ))); + // data ROR 16 + assign( data_ror16, binop(Iop_Sub8, mkU8(32), mkU32(16)) ); + assign( data_ror16, + binop( Iop_Or32, + binop( Iop_Shr32, mkexpr(data), mkU8(16) ), + binop( Iop_Shl32, mkexpr(data), mkexpr(data_ror16) ))); + + // data ROR 24 + assign( data_ror24, binop(Iop_Sub8, mkU8(32), mkU32(24)) ); + assign( data_ror24, + binop( Iop_Or32, + binop( Iop_Shr32, mkexpr(data), mkU8(24) ), + binop( Iop_Shl32, mkexpr(data), mkexpr(data_ror24) ))); + + /* switch (address[1:0]) { + 0x0: value = data; + 0x1: value = data ROR 8; + 0x2: value = data ROR 16; + 0x3: value = data ROR 24; } */ + assign( value, IRExpr_Mux0X( + binop(Iop_CmpEQ32, expr_addr_10, mkU32(0x0)), + IRExpr_Mux0X( + binop(Iop_CmpEQ32, expr_addr_10, mkU32(0x1)), + IRExpr_Mux0X( + binop(Iop_CmpEQ32, expr_addr_10, mkU32(0x2)), + mkexpr(data_ror24), + mkexpr(data_ror16) ), + mkexpr(data_ror8) ), + mkexpr(data) ) ); + + if (Rd_addr == 15) { + // assuming architecture < 5: See ARM ARM A4-28 + putIReg( Rd_addr, binop(Iop_And32, mkexpr(value), mkU32(0xFFFFFFFC)) ); + + // CAB: Need to tell vex we're doing a jump here? + // irsb->jumpkind = Ijk_Boring; + // irsb->next = mkexpr(value); + } else { + putIReg( Rd_addr, mkexpr(value) ); + } + + } + } else { // STORE: ARM ARM A4-88 + if (B==1) { // unsigned byte + if (Rd_addr == 15) { // Unpredictable - ARM ARM A4-90 + vex_printf("dis_loadstore_w_ub(arm): Unpredictable - Rd_addr==15\n"); + return False; + } + storeLE( mkexpr(address), unop(Iop_32to8, getIReg(Rd_addr)) ); // Rd[7:0] + } else { // word + + if (Rd_addr == 15) { // Implementation Defined - ARM ARM A4-88 + vex_printf("dis_loadstore_w_ub(arm): Implementation Defined - Rd_addr==15\n"); + return False; + // CAB TODO: What to do here? + } + storeLE( mkexpr(address), getIReg(Rd_addr) ); + } + } + return True; + } + + + + + + + /* + ARMG_CC_OP_LSL, ARMG_CC_OP_LSR, ARMG_CC_OP_ASR + ARM ARM A5-9... + + carry = carry_out[0] + */ + static + IRExpr* dis_shift( Bool* decode_ok, UInt theInstr, IRTemp* carry_out, HChar* buf ) + { + UChar Rn_addr = toUChar((theInstr >> 16) & 0xF); + UChar Rd_addr = toUChar((theInstr >> 12) & 0xF); + UChar Rs_addr = toUChar((theInstr >> 8) & 0xF); + UChar Rm_addr = toUChar((theInstr >> 0) & 0xF); + UChar by_reg = toUChar((theInstr >> 4) & 0x1); // instr[4] + UChar shift_imm = toUChar((theInstr >> 7) & 0x1F); // instr[11:7] + UChar shift_op = toUChar((theInstr >> 4) & 0xF); // instr[7:4] + IRTemp Rm = newTemp(Ity_I32); + IRTemp Rs = newTemp(Ity_I32); + IRTemp shift_amt = newTemp(Ity_I8); + IRTemp carry_shift = newTemp(Ity_I8); + IRTemp oldFlagC = newTemp(Ity_I32); + IRTemp mux_false = newTemp(Ity_I32); + IRExpr* expr; + IROp op; + + assign( oldFlagC, binop(Iop_Shr32, + mk_armg_calculate_flags_c(), + mkU8(ARMG_CC_SHIFT_C)) ); + + switch (shift_op) { + case 0x0: case 0x8: case 0x1: op = Iop_Shl32; break; + case 0x2: case 0xA: case 0x3: op = Iop_Shr32; break; + case 0x4: case 0xC: case 0x5: op = Iop_Sar32; break; + default: + vex_printf("dis_shift(arm): No such case: 0x%x\n", shift_op); + *decode_ok = False; + return mkU32(0); + } + + + if (by_reg) { // Register Shift + assign( Rm, getIReg(Rm_addr) ); + + if (Rd_addr == 15 || Rm_addr == 15 || + Rn_addr == 15 || Rs_addr == 15) { // Unpredictable (ARM ARM A5-10) + vex_printf("dis_shift(arm): Unpredictable - Rd|Rm|Rn|Rs == R15\n"); + *decode_ok = False; + return mkU32(0); + } + + assign( Rs, getIReg((theInstr >> 8) & 0xF) ); // instr[11:8] + + // shift_amt = shift_expr & 31 => Rs[5:0] + assign( shift_amt, + narrowTo(Ity_I8, binop( Iop_And32, mkexpr(Rs), mkU32(0x1F)) ) ); + + // CAB TODO: support for >31 shift ? (Rs[7:0]) + + switch (shift_op) { + case 0x1: // LSL(reg) + assign( mux_false, mkU32(0) ); + assign( carry_shift, binop(Iop_Add8, mkU8(32), mkexpr(shift_amt)) ); + break; + + case 0x3: // LSR(reg) + assign( mux_false, mkU32(0) ); + assign( carry_shift, binop(Iop_Sub8, mkexpr(shift_amt), mkU8(1)) ); + break; + + case 0x5: // ASR(reg) + // Rs[31] == 0 ? 0x0 : 0xFFFFFFFF + assign( mux_false, + IRExpr_Mux0X( + binop(Iop_CmpLT32U, mkexpr(Rs), mkU32(0x80000000)), + mkU32(0xFFFFFFFF), mkU32(0) ) ); + assign( carry_shift, + binop(Iop_Sub8, mkexpr(shift_amt), mkU8(1)) ); + break; + + default: + vex_printf("dis_shift(arm): Reg shift: No such case: 0x%x\n", shift_op); + *decode_ok = False; + return mkU32(0); + } + + expr = IRExpr_Mux0X( + binop(Iop_CmpLT32U, widenUto32(mkexpr(shift_amt)), mkU32(32)), + mkexpr(mux_false), + binop(op, mkexpr(Rm), mkexpr(shift_amt)) ); + + // shift_amt == 0 ? old_flag_c : Rm >> x + assign( *carry_out, + IRExpr_Mux0X( + binop(Iop_CmpEQ8, mkexpr(shift_amt), mkU8(0)), + binop(Iop_Shr32, mkexpr(Rm), mkexpr(carry_shift)), + mkexpr(oldFlagC) ) ); + + DIS(buf, "R%d, %s R%d", Rm_addr, name_ARMShiftOp(shift_op, 0), Rs_addr); + } + else { // Immediate shift + + // CAB: This right? + // "the value used is the address of the current intruction plus 8" + if (Rm_addr == 15 || Rn_addr == 15) { // ARM ARM A5-9 + assign( Rm, binop(Iop_Add32, getIReg(15), mkU32(8)) ); + } else { + assign( Rm, getIReg(Rm_addr) ); + } + + if (shift_imm == 0) { + switch (shift_op) { + case 0x0: case 0x8: // LSL(imm) + expr = mkexpr(Rm); + assign( *carry_out, mkexpr(oldFlagC) ); + break; + + case 0x2: case 0xA: // LSR(imm) + expr = mkexpr(0); + // Rm >> 31: carry = R[0] + assign( *carry_out, binop(Iop_Shr32, mkexpr(Rm), mkU8(31)) ); + break; + + case 0x4: case 0xC: // ASR(imm) + // Rs[31] == 0 ? 0x0 : 0xFFFFFFFF + expr = IRExpr_Mux0X( + binop(Iop_CmpLT32U, mkexpr(Rs), mkU32(0x80000000)), + mkU32(0xFFFFFFFF), mkU32(0) ); + // Rm >> 31: carry = R[0] + assign( *carry_out, binop(Iop_Shr32, mkexpr(Rm), mkU8(31)) ); + break; + + default: + vex_printf("dis_shift(arm): Imm shift: No such case: 0x%x\n", shift_op); + *decode_ok = False; + return mkU32(0); + } + DIS(buf, "R%d", Rm_addr); + } else { + expr = binop(op, mkexpr(Rm), mkU8(shift_imm)); + assign( *carry_out, binop(op, mkexpr(Rm), + binop(Iop_Sub32, mkU32(shift_imm), mkU32(1)) ) ); + + DIS(buf, "R%d, %s #%d", Rm_addr, name_ARMShiftOp(shift_op, 0), shift_imm); + } + } + return expr; + } + + + + + /* + ARMG_CC_OP_ROR + ARM ARM A5-15,16,17 + */ + static + IRExpr* dis_rotate ( Bool* decode_ok, UInt theInstr, IRTemp* carry_out, HChar* buf ) + { + UChar Rn_addr = toUChar((theInstr >> 16) & 0xF); + UChar Rd_addr = toUChar((theInstr >> 12) & 0xF); + UChar Rs_addr = toUChar((theInstr >> 8) & 0xF); + UChar Rm_addr = toUChar((theInstr >> 0) & 0xF); + UChar by_reg = toUChar((theInstr >> 4) & 0x1); // instr[4] + UChar rot_imm = toUChar((theInstr >> 7) & 0x1F); // instr[11:7] + IRTemp Rm = newTemp(Ity_I32); + IRTemp Rs = newTemp(Ity_I32); + IRTemp rot_amt = newTemp(Ity_I8); // Rs[7:0] + IRTemp oldFlagC = newTemp(Ity_I32); + IRExpr* expr=0; + + assign( oldFlagC, binop(Iop_Shr32, + mk_armg_calculate_flags_c(), + mkU8(ARMG_CC_SHIFT_C)) ); + + if (by_reg) { // Register rotate + assign( Rm, getIReg(Rm_addr) ); + + if (Rd_addr == 15 || Rm_addr == 15 || + Rn_addr == 15 || Rs_addr == 15) { // Unpredictable (ARM ARM A5-10) + vex_printf("dis_rotate(arm): Unpredictable - Rd|Rm|Rn|Rs == R15\n"); + *decode_ok = False; + return mkU32(0); + } + + assign( Rs, getIReg((theInstr >> 8) & 0xF) ); // instr[11:8] + // Rs[4:0] + assign( rot_amt, narrowTo(Ity_I8, + binop(Iop_And32, mkexpr(Rs), mkU32(0x1F))) ); + + // CAB: This right? + // Rs[7:0] == 0 ? oldFlagC : (Rs[4:0] == 0 ? Rm >> 31 : Rm >> rot-1 ) + assign( *carry_out, + IRExpr_Mux0X( + binop(Iop_CmpNE32, mkU32(0), + binop(Iop_And32, mkexpr(Rs), mkU32(0xFF))), + mkexpr(oldFlagC), + IRExpr_Mux0X( + binop(Iop_CmpEQ8, mkexpr(rot_amt), mkU8(0)), + binop(Iop_Shr32, mkexpr(Rm), + binop(Iop_Sub8, mkexpr(rot_amt), mkU8(1))), + binop(Iop_Shr32, mkexpr(Rm), + binop(Iop_Shr32, mkexpr(Rm), mkU8(31))) ) ) ); + + + /* expr = (dst0 >> rot_amt) | (dst0 << (wordsize-rot_amt)) */ + expr = binop(Iop_Or32, + binop(Iop_Shr32, mkexpr(Rm), mkexpr(rot_amt)), + binop(Iop_Shl32, mkexpr(Rm), + binop(Iop_Sub8, mkU8(32), mkexpr(rot_amt)))); + + DIS(buf, "R%d, ror R%d", Rm_addr, Rs_addr); + } + else { // Immediate rotate + + // CAB: This right? + // "the value used is the address of the current intruction plus 8" + if (Rm_addr == 15 || Rn_addr == 15) { // ARM ARM A5-9 + assign( Rm, binop(Iop_Add32, getIReg(15), mkU32(8)) ); + } else { + assign( Rm, getIReg(Rm_addr) ); + } + + // Rm >> rot-1: carry = R[0] + assign( *carry_out, binop(Iop_Shr32, mkexpr(Rm), + binop(Iop_Sub8, mkU8(rot_imm), mkU8(1)) ) ); + + if (rot_imm == 0) { // RRX (ARM ARM A5-17) + // 33 bit ROR using carry flag as the 33rd bit + // op = Rm >> 1, carry flag replacing vacated bit position. + + // CAB: This right? + expr = binop(Iop_Or32, + binop(Iop_Shl32, mkexpr(oldFlagC), mkU8(31)), + binop(Iop_Shr32, mkexpr(Rm), mkU8(1))); + DIS(buf, "R%d, rrx", Rm_addr); + } else { + expr = binop(Iop_Or32, + binop(Iop_Shr32, mkexpr(Rm), mkU8(rot_imm)), + binop(Iop_Shl32, mkexpr(Rm), + binop(Iop_Sub8, mkU8(32), mkU8(rot_imm)))); + + DIS(buf, "R%d, ror #%u", Rm_addr, (UInt)rot_imm); + } + } + return expr; + } + + + + + /* + CAB TODO: + - Not all shifts by 0 leave c_flag unchanged, so guard_expr is more difficult... + assign( flags_guard, binop( Iop_CmpEQ32, mkexpr(shift_amt), mkU32(0) ) ); + setFlags_DEP1_DEP2_shift( ARMG_CC_OP_LSL, Rm, shift_op, flags_guard ); + */ + + + + + /* Addressing mode 1 - Data Processing ops + General syntax: {}{S} , , + Returns expression + */ + static + IRExpr* dis_shifter_op ( Bool *decode_ok, UInt theInstr, IRTemp* carry_out, HChar* buf ) + { + UChar is_immed = toUChar((theInstr >> 25) & 1); // immediate / register shift + UChar shift_op = toUChar((theInstr >> 4) & 0xF); // second byte + UInt immed_8, rot_imm; + UInt imm; + IRTemp oldFlagC = newTemp(Ity_I32); + + if (is_immed) { // ARM ARM A5-2 + // dst = src ROR rot << 1 + // = (src >> rot) | (src << (32-rot)); + immed_8 = theInstr & 0xFF; + rot_imm = ((theInstr >> 8) & 0xF) << 1; + imm = (immed_8 >> rot_imm) | (immed_8 << (32-rot_imm)); + + if (rot_imm == 0) { + assign( oldFlagC, binop(Iop_Shr32, + mk_armg_calculate_flags_c(), + mkU8(ARMG_CC_SHIFT_C)) ); + assign( *carry_out, mkexpr(oldFlagC) ); + } else { + assign( *carry_out, binop(Iop_Shr32, mkU32(imm), mkU8(31)) ); + } + DIS(buf, "#%u", imm); + return mkU32(imm); + } else { + + // We shouldn't have any 'op' with bits 4=1 and 7=1 : 1xx1 + switch (shift_op) { + case 0x0: case 0x8: case 0x1: + case 0x2: case 0xA: case 0x3: + case 0x4: case 0xC: case 0x5: + return dis_shift( decode_ok, theInstr, carry_out, buf ); + + case 0x6: case 0xE: case 0x7: + return dis_rotate( decode_ok, theInstr, carry_out, buf ); + + default: // Error: Any other value shouldn't be here. + *decode_ok = False; + vex_printf("dis_shifter_op(arm): shift: No such case: 0x%x\n", shift_op); + return mkU32(0); + } + } + } + + + + + + /* -------------- Helper for DPI's. -------------- + */ + static + Bool dis_dataproc ( UInt theInstr ) + { + UChar opc = toUChar((theInstr >> 21) & 0xF); + UChar set_flags = toUChar((theInstr >> 20) & 1); + UChar Rn_addr = toUChar((theInstr >> 16) & 0xF); + UChar Rd_addr = toUChar((theInstr >> 12) & 0xF); + IRTemp Rn = newTemp(Ity_I32); + IRTemp Rd = newTemp(Ity_I32); + IRTemp alu_out = newTemp(Ity_I32); + IRTemp shifter_op = newTemp(Ity_I32); + IRTemp carry_out = newTemp(Ity_I32); + IROp op_set_flags = ARMG_CC_OP_LOGIC; + Bool testing_instr = False; + Bool decode_ok = True; + HChar* cond_name = name_ARMCondcode( (theInstr >> 28) & 0xF ); + HChar* ch_set_flags = (set_flags == 1) ? "S" : ""; + HChar dis_buf[50]; + + assign( shifter_op, dis_shifter_op( &decode_ok, theInstr, &carry_out, dis_buf ) ); + if (!decode_ok) return False; + + assign( Rd, getIReg(Rd_addr) ); + assign( Rn, getIReg(Rn_addr) ); + + + switch (opc) { + case 0x0: case 0x1: case 0x2: case 0x3: case 0x4: + case 0xC: case 0xE: + DIP("%s%s%s R%d, R%d, %s\n", name_ARMDataProcOp(opc), + cond_name, ch_set_flags, Rd_addr, Rn_addr, dis_buf); + break; + case 0x5: case 0x6: case 0x7: + // CAB: Unimplemented + break; + case 0x8: case 0x9: case 0xA: case 0xB: + DIP("%s%s R%d, %s\n", name_ARMDataProcOp(opc), + cond_name, Rn_addr, dis_buf); + break; + case 0xD: case 0xF: + DIP("%s%s%s R%d, %s\n", name_ARMDataProcOp(opc), + cond_name, ch_set_flags, Rd_addr, dis_buf); + break; + default:break; + } + + + switch (opc) { + case 0x0: // AND + assign( alu_out, binop(Iop_And32, getIReg(Rn_addr), mkexpr(shifter_op)) ); + break; + + case 0x1: // EOR + assign( alu_out, binop(Iop_Xor32, getIReg(Rn_addr), mkexpr(shifter_op)) ); + break; + + case 0x2: // SUB + assign( alu_out, binop( Iop_Sub32, getIReg(Rn_addr), mkexpr(shifter_op) ) ); + op_set_flags = ARMG_CC_OP_SUB; + break; + + case 0x3: // RSB + assign( alu_out, binop( Iop_Sub32, mkexpr(shifter_op), getIReg(Rn_addr) ) ); + op_set_flags = ARMG_CC_OP_SUB; + /* set_flags(), below, switches the args for this case */ + break; + + case 0x4: // ADD + assign( alu_out, binop( Iop_Add32, getIReg(Rn_addr), mkexpr(shifter_op) ) ); + op_set_flags = ARMG_CC_OP_ADD; + break; + + case 0x5: // ADC // CAB: Unimplemented + case 0x6: // SBC // CAB: Unimplemented + case 0x7: // RSC // CAB: Unimplemented + goto decode_failure; + + case 0x8: // TST + vassert(set_flags==1); + assign( alu_out, binop(Iop_And32, getIReg(Rn_addr), mkexpr(shifter_op)) ); + testing_instr = True; + break; + + case 0x9: // TEQ + vassert(set_flags==1); + assign( alu_out, binop(Iop_Xor32, getIReg(Rn_addr), mkexpr(shifter_op)) ); + testing_instr = True; + break; + + case 0xA: // CMP + vassert(set_flags==1); + op_set_flags = ARMG_CC_OP_SUB; + testing_instr = True; + break; + + case 0xB: // CMN + vassert(set_flags==1); + op_set_flags = ARMG_CC_OP_ADD; + testing_instr = True; + break; + + case 0xC: // ORR + assign( alu_out, binop(Iop_Or32, getIReg(Rn_addr), mkexpr(shifter_op)) ); + break; + + case 0xD: // MOV + assign( alu_out, mkexpr(shifter_op) ); + break; + + case 0xE: // BIC + assign( alu_out, binop(Iop_And32, getIReg(Rn_addr), + unop( Iop_Not32, mkexpr(shifter_op))) ); + break; + + case 0xF: // MVN + assign( alu_out, unop(Iop_Not32, mkexpr(shifter_op)) ); + break; + + default: + decode_failure: + vex_printf("dis_dataproc(arm): unhandled opcode: 0x%x\n", opc); + return False; + } + + if (!testing_instr) { + if ( Rd_addr == 15) { // dest reg == PC + // CPSR = SPSR: Unpredictable in User | System mode (no SPSR!) + // Unpredictable - We're only supporting user mode... + vex_printf("dis_dataproc(arm): Unpredictable - Rd_addr==15\n"); + return False; + } + putIReg( Rd_addr, mkexpr(alu_out) ); + } + + if (set_flags) { + if (op_set_flags == ARMG_CC_OP_LOGIC) { + setFlags_DEP1_DEP2( op_set_flags, alu_out, carry_out ); + } else { + if (opc == 0x3) { + setFlags_DEP1_DEP2( op_set_flags, shifter_op, Rn ); + } else { + setFlags_DEP1_DEP2( op_set_flags, Rn, shifter_op ); + } + } + } + return decode_ok; + } + + + + + /* -------------- Helper for Branch. -------------- + */ + static + void dis_branch ( UInt theInstr ) + { + UChar link = toUChar((theInstr >> 24) & 1); + UInt signed_immed_24 = theInstr & 0xFFFFFF; + UInt branch_offset; + IRTemp addr = newTemp(Ity_I32); + IRTemp dest = newTemp(Ity_I32); + + if (link) { // LR (R14) = addr of instr after branch instr + assign( addr, binop(Iop_Add32, getIReg(15), mkU32(4)) ); + putIReg( 14, mkexpr(addr) ); + } + + // PC = PC + (SignExtend(signed_immed_24) << 2) + branch_offset = extend_s_24to32( signed_immed_24 ) << 2; + assign( dest, binop(Iop_Add32, getIReg(15), mkU32(branch_offset)) ); + + irsb->jumpkind = link ? Ijk_Call : Ijk_Boring; + irsb->next = mkexpr(dest); + + // Note: Not actually writing to R15 - let the IR stuff do that. + + DIP("b%s%s 0x%x\n", + link ? "l" : "", + name_ARMCondcode( (theInstr >> 28) & 0xF ), + branch_offset); + } + + + + + + + + + + + + + + + + + /*------------------------------------------------------------*/ + /*--- Disassemble a single instruction ---*/ + /*------------------------------------------------------------*/ + + /* Disassemble a single instruction into IR. The instruction + is located in host memory at &guest_code[delta]. + Set *size to be the size of the instruction. + If the returned value is Dis_Resteer, + the next guest address is assigned to *whereNext. If resteerOK + is False, disInstr may not return Dis_Resteer. */ + + static DisResult disInstr ( /*IN*/ Bool resteerOK, + /*IN*/ Bool (*resteerOkFn) ( Addr64 ), + /*IN*/ Long delta, + /*OUT*/ Int* size, + /*OUT*/ Addr64* whereNext ) + { + // IRType ty; + // IRTemp addr, t1, t2; + // Int alen; + UChar opc1, opc2, opc_tmp; //, modrm, abyte; + ARMCondcode cond; + // UInt d32; + // UChar dis_buf[50]; + // Int am_sz, d_sz; + DisResult whatNext = Dis_Continue; + UInt theInstr; + + + /* At least this is simple on ARM: insns are all 4 bytes long, and + 4-aligned. So just fish the whole thing out of memory right now + and have done. */ + + /* We will set *size to 4 if the insn is successfully decoded. + Setting it to 0 by default makes bbToIR_ARM abort if we fail the + decode. */ + *size = 0; + + theInstr = *(UInt*)(&guest_code[delta]); + + // vex_printf("START: 0x%x, %,b\n", theInstr, theInstr ); + + DIP("\t0x%x: ", toUInt(guest_pc_bbstart+delta)); + + + + // TODO: fix the client-request stuff, else nothing will work + + /* Spot the client-request magic sequence. */ + // Essentially a v. unlikely sequence of noops that we can catch + { + UInt* code = (UInt*)(guest_code + delta); + + /* Spot this: + E1A00EE0 mov r0, r0, ror #29 + E1A001E0 mov r0, r0, ror #3 + E1A00DE0 mov r0, r0, ror #27 + E1A002E0 mov r0, r0, ror #5 + E1A006E0 mov r0, r0, ror #13 + E1A009E0 mov r0, r0, ror #19 + */ + /* I suspect these will have to be turned the other way round to + work on little-endian arm. */ + if (code[0] == 0xE1A00EE0 && + code[1] == 0xE1A001E0 && + code[2] == 0xE1A00DE0 && + code[3] == 0xE1A002E0 && + code[4] == 0xE1A006E0 && + code[5] == 0xE1A009E0) { + + // uh ... I'll figure this out later. possibly r0 = client_request(r0) */ + DIP("?CAB? = client_request ( ?CAB? )\n"); + + *size = 24; + + irsb->next = mkU32(toUInt(guest_pc_bbstart+delta)); + irsb->jumpkind = Ijk_ClientReq; + + whatNext = Dis_StopHere; + goto decode_success; + } + } + + + + + + /* + Deal with condition first + */ + cond = (theInstr >> 28) & 0xF; /* opcode: bits 31:28 */ + // vex_printf("\ndisInstr(arm): cond: 0x%x, %b\n", cond, cond ); + + switch (cond) { + case 0xF: // => Illegal instruction prior to v5 (see ARM ARM A3-5) + vex_printf("disInstr(arm): illegal condition\n"); + goto decode_failure; + + case 0xE: // => Unconditional: go translate the instruction + break; + + default: + // => Valid condition: translate the condition test first + stmt( IRStmt_Exit( mk_armg_calculate_condition(cond), + Ijk_Boring, + IRConst_U32(toUInt(guest_pc_bbstart+delta+4)) ) ); + //irsb->next = mkU32(guest_pc_bbstart+delta+4); + //irsb->jumpkind = Ijk_Boring; + } + + + + /* Primary opcode is roughly bits 27:20 (ARM ARM(v2) A3-2) + secondary opcode is bits 4:0 */ + opc1 = toUChar((theInstr >> 20) & 0xFF); /* opcode1: bits 27:20 */ + opc2 = toUChar((theInstr >> 4 ) & 0xF); /* opcode2: bits 7:4 */ + // vex_printf("disInstr(arm): opcode1: 0x%2x, %,09b\n", opc1, opc1 ); + // vex_printf("disInstr(arm): opcode2: 0x%02x, %,04b\n", opc2, opc2 ); + + switch (opc1 >> 4) { // instr[27:24] + case 0x0: + case 0x1: + /* + Multiplies, extra load/store instructions: ARM ARM A3-3 + */ + if ( (opc1 & 0xE0) == 0x0 && (opc2 & 0x9) == 0x9 ) { // 000xxxxx && 1xx1 + if (opc2 == 0x9) { + if ((opc1 & 0x1C) == 0x00) { // multiply (accumulate) + goto decode_failure; + } + if ((opc1 & 0x18) == 0x08) { // multiply (accumulate) long + goto decode_failure; + } + if ((opc1 & 0x1B) == 0x10) { // swap/swap byte + goto decode_failure; + } + } + if ( opc2 == 0xB ) { + if ((opc1 & 0x04) == 0x00) { // load/store 1/2word reg offset + goto decode_failure; + } else { // load/store 1/2word imm offset + goto decode_failure; + } + } + if ((opc2 & 0xD) == 0xD) { + if ((opc1 & 0x05) == 0x00) { // load/store 2 words reg offset + goto decode_failure; + } + if ((opc1 & 0x05) == 0x04) { // load/store 2 words imm offset + goto decode_failure; + } + if ((opc1 & 0x05) == 0x01) { // load/store signed 1/2word/byte reg offset + goto decode_failure; + } + if ((opc1 & 0x05) == 0x05) { // load/store signed 1/2word/byte imm offset + goto decode_failure; + } + } + } /* endif: Multiplies, extra load/store... */ + + /* + 'Misc' Instructions: ARM ARM A3-4 + */ + if ((opc1 & 0xF9) == 0x10) { // 0001 0xx0 + opc_tmp = toUChar((opc1 >> 1) & 0x3); + switch (opc2) { + case 0x0: + if ((opc_tmp & 0x1) == 0x0) { // move stat reg -> reg + goto decode_failure; + } else { // move reg -> stat reg + goto decode_failure; + } + + case 0x1: + if (opc_tmp == 0x1) { // branch/exchange instr set + goto decode_failure; + } + if (opc_tmp == 0x3) { // count leading zeros + goto decode_failure; + } + break; + + case 0x3: + if (opc_tmp == 0x1) { // branch & link/exchange instr set + goto decode_failure; + } + break; + + case 0x5: // enhanced dsp add/subtracts + goto decode_failure; + + case 0x7: + if (opc_tmp == 0x1) { // software breakpoint + if (cond != 0xE) { // Unpredictable - ARM ARM A3-4 + vex_printf("disInstr(arm): Unpredictable instruction\n"); + goto decode_failure; + } + goto decode_failure; + } + break; + + case 0x8: case 0x9: case 0xA: // enhanced dsp multiplies + case 0xB: case 0xC: case 0xD: case 0xE: + goto decode_failure; + + default: break; + } + } /* endif: 'Misc' Instructions... */ + // fall through... + + case 0x2: + case 0x3: + if ((opc1 & 0xFB) == 0x30) goto decode_failure; // Undefined - ARM ARM A3-2 + + /* + A lonely 'MOV imm to status reg': + */ + if ((opc1 & 0xFB) == 0x32) { // 0011 0x10 + goto decode_failure; + } + + /* + Data Processing Instructions + (if we get here, it's a dpi) + */ + if (!dis_dataproc( theInstr )) { goto decode_failure; } + break; + + + /* + Load/Store word | unsigned byte + */ + case 0x6: case 0x7: // LOAD/STORE reg offset + if ((opc2 & 0x1) == 0x1) goto decode_failure; // Undefined - ARM ARM A3-2 + + case 0x4: case 0x5: // LOAD/STORE imm offset + if (!dis_loadstore_w_ub(theInstr)) { goto decode_failure; } + break; + + /* + Load/Store multiple + */ + case 0x8: case 0x9: + if (!dis_loadstore_mult(theInstr)) { goto decode_failure; } + break; + + + /* + Branch, Branch and Link + */ + case 0xA: case 0xB: // B, BL + // B(L): L=1 => return address stored in link register (R14) + dis_branch(theInstr); + whatNext = Dis_StopHere; + break; + + + /* + Co-processor instructions + */ + case 0xC: case 0xD: // co-pro load/store & double reg trxfrs + goto decode_failure; + + case 0xE: + if ((opc2 & 0x1) == 0x0) { // co-pro data processing + goto decode_failure; + } else { // co-pro register transfers + goto decode_failure; + } + + + /* + Software Interrupt + */ + case 0xF: // swi + goto decode_failure; + + default: + decode_failure: + /* All decode failures end up here. */ + vex_printf("disInstr(arm): unhandled instruction: " + "0x%x\n", theInstr); + vpanic("armToIR: unimplemented insn"); + + } /* switch (opc) for the main (primary) opcode switch. */ + + decode_success: + /* All decode successes end up here. */ + // vex_printf("disInstr(arm): success"); + DIP("\n"); + + *size = 4; + return whatNext; + } + + #undef DIP + #undef DIS + + /*--------------------------------------------------------------------*/ + /*--- end guest_arm_toIR.c ---*/ + /*--------------------------------------------------------------------*/ Index: VEX/priv/guest_generic_bb_to_IR.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_generic_bb_to_IR.c Sat May 26 10:11:28 2012 *************** *** 0 **** --- 1,526 ---- + + /*--------------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_generic_bb_to_IR.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*--------------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + #include "libvex_basictypes.h" + #include "libvex_ir.h" + #include "libvex.h" + #include "main_util.h" + #include "main_globals.h" + #include "guest_generic_bb_to_IR.h" + + + /* Forwards .. */ + __attribute__((regparm(2))) + static UInt genericg_compute_checksum_4al_4plus ( HWord addr, HWord len ); + __attribute__((regparm(2))) + static UInt genericg_compute_checksum_generic ( HWord addr, HWord len ); + + /* Small helpers */ + static Bool const_False ( void* callback_opaque, Addr64 a ) { + return False; + } + + /* Disassemble a complete basic block, starting at guest_IP_start, + returning a new IRSB. The disassembler may chase across basic + block boundaries if it wishes and if chase_into_ok allows it. + The precise guest address ranges from which code has been taken + are written into vge. guest_IP_bbstart is taken to be the IP in + the guest's address space corresponding to the instruction at + &guest_code[0]. + + dis_instr_fn is the arch-specific fn to disassemble on function; it + is this that does the real work. + + do_self_check indicates that the caller needs a self-checking + translation. + + preamble_function is a callback which allows the caller to add + its own IR preamble (following the self-check, if any). May be + NULL. If non-NULL, the IRSB under construction is handed to + this function, which presumably adds IR statements to it. The + callback may optionally complete the block and direct bb_to_IR + not to disassemble any instructions into it; this is indicated + by the callback returning True. + + offB_TIADDR and offB_TILEN are the offsets of guest_TIADDR and + guest_TILEN. Since this routine has to work for any guest state, + without knowing what it is, those offsets have to passed in. + + callback_opaque is a caller-supplied pointer to data which the + callbacks may want to see. Vex has no idea what it is. + (In fact it's a VgInstrumentClosure.) + */ + + IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge, + /*IN*/ void* callback_opaque, + /*IN*/ DisOneInstrFn dis_instr_fn, + /*IN*/ UChar* guest_code, + /*IN*/ Addr64 guest_IP_bbstart, + /*IN*/ Bool (*chase_into_ok)(void*,Addr64), + /*IN*/ Bool host_bigendian, + /*IN*/ VexArch arch_guest, + /*IN*/ VexArchInfo* archinfo_guest, + /*IN*/ VexAbiInfo* abiinfo_both, + /*IN*/ IRType guest_word_type, + /*IN*/ Bool do_self_check, + /*IN*/ Bool (*preamble_function)(void*,IRSB*), + /*IN*/ Int offB_TISTART, + /*IN*/ Int offB_TILEN ) + { + Long delta; + Int i, n_instrs, first_stmt_idx; + Bool resteerOK, need_to_put_IP, debug_print; + DisResult dres; + IRStmt* imark; + static Int n_resteers = 0; + Int d_resteers = 0; + Int selfcheck_idx = 0; + IRSB* irsb; + Addr64 guest_IP_curr_instr; + IRConst* guest_IP_bbstart_IRConst = NULL; + + Bool (*resteerOKfn)(void*,Addr64) = NULL; + + debug_print = toBool(vex_traceflags & VEX_TRACE_FE); + + /* Note: for adler32 to work without % operation for the self + check, need to limit length of stuff it scans to 5552 bytes. + Therefore limiting the max bb len to 100 insns seems generously + conservative. */ + + /* check sanity .. */ + vassert(sizeof(HWord) == sizeof(void*)); + vassert(vex_control.guest_max_insns >= 1); + vassert(vex_control.guest_max_insns < 100); + vassert(vex_control.guest_chase_thresh >= 0); + vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns); + vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64); + + /* Start a new, empty extent. */ + vge->n_used = 1; + vge->base[0] = guest_IP_bbstart; + vge->len[0] = 0; + + /* And a new IR superblock to dump the result into. */ + irsb = emptyIRSB(); + + /* Delta keeps track of how far along the guest_code array we have + so far gone. */ + delta = 0; + n_instrs = 0; + + /* Guest addresses as IRConsts. Used in the two self-checks + generated. */ + if (do_self_check) { + guest_IP_bbstart_IRConst + = guest_word_type==Ity_I32 + ? IRConst_U32(toUInt(guest_IP_bbstart)) + : IRConst_U64(guest_IP_bbstart); + } + + /* If asked to make a self-checking translation, leave 5 spaces + in which to put the check statements. We'll fill them in later + when we know the length and adler32 of the area to check. */ + if (do_self_check) { + selfcheck_idx = irsb->stmts_used; + addStmtToIRSB( irsb, IRStmt_NoOp() ); + addStmtToIRSB( irsb, IRStmt_NoOp() ); + addStmtToIRSB( irsb, IRStmt_NoOp() ); + addStmtToIRSB( irsb, IRStmt_NoOp() ); + addStmtToIRSB( irsb, IRStmt_NoOp() ); + } + + /* If the caller supplied a function to add its own preamble, use + it now. */ + if (preamble_function) { + Bool stopNow = preamble_function( callback_opaque, irsb ); + if (stopNow) { + /* The callback has completed the IR block without any guest + insns being disassembled into it, so just return it at + this point, even if a self-check was requested - as there + is nothing to self-check. The five self-check no-ops will + still be in place, but they are harmless. */ + return irsb; + } + } + + /* Process instructions. */ + while (True) { + vassert(n_instrs < vex_control.guest_max_insns); + + /* Regardless of what chase_into_ok says, is chasing permissible + at all right now? Set resteerOKfn accordingly. */ + resteerOK + = toBool( + n_instrs < vex_control.guest_chase_thresh + /* If making self-checking translations, don't chase + .. it makes the checks too complicated. We only want + to scan just one sequence of bytes in the check, not + a whole bunch. */ + && !do_self_check + /* we can't afford to have a resteer once we're on the + last extent slot. */ + && vge->n_used < 3 + ); + + resteerOKfn + = resteerOK ? chase_into_ok : const_False; + + /* This is the IP of the instruction we're just about to deal + with. */ + guest_IP_curr_instr = guest_IP_bbstart + delta; + + /* This is the irsb statement array index of the first stmt in + this insn. That will always be the instruction-mark + descriptor. */ + first_stmt_idx = irsb->stmts_used; + + /* Add an instruction-mark statement. We won't know until after + disassembling the instruction how long it instruction is, so + just put in a zero length and we'll fix it up later. */ + addStmtToIRSB( irsb, IRStmt_IMark( guest_IP_curr_instr, 0 )); + + /* for the first insn, the dispatch loop will have set + %IP, but for all the others we have to do it ourselves. */ + need_to_put_IP = toBool(n_instrs > 0); + + /* Finally, actually disassemble an instruction. */ + dres = dis_instr_fn ( irsb, + need_to_put_IP, + resteerOKfn, + callback_opaque, + guest_code, + delta, + guest_IP_curr_instr, + arch_guest, + archinfo_guest, + abiinfo_both, + host_bigendian ); + + /* stay sane ... */ + vassert(dres.whatNext == Dis_StopHere + || dres.whatNext == Dis_Continue + || dres.whatNext == Dis_Resteer); + vassert(dres.len >= 0 && dres.len <= 20); + if (dres.whatNext != Dis_Resteer) + vassert(dres.continueAt == 0); + + /* Fill in the insn-mark length field. */ + vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used); + imark = irsb->stmts[first_stmt_idx]; + vassert(imark); + vassert(imark->tag == Ist_IMark); + vassert(imark->Ist.IMark.len == 0); + imark->Ist.IMark.len = toUInt(dres.len); + + /* Print the resulting IR, if needed. */ + if (vex_traceflags & VEX_TRACE_FE) { + for (i = first_stmt_idx; i < irsb->stmts_used; i++) { + vex_printf(" "); + ppIRStmt(irsb->stmts[i]); + vex_printf("\n"); + } + } + + /* If dis_instr_fn terminated the BB at this point, check it + also filled in the irsb->next field. */ + if (dres.whatNext == Dis_StopHere) { + vassert(irsb->next != NULL); + if (debug_print) { + vex_printf(" "); + vex_printf( "goto {"); + ppIRJumpKind(irsb->jumpkind); + vex_printf( "} "); + ppIRExpr( irsb->next ); + vex_printf( "\n"); + } + } + + /* Update the VexGuestExtents we are constructing. */ + /* If vex_control.guest_max_insns is required to be < 100 and + each insn is at max 20 bytes long, this limit of 5000 then + seems reasonable since the max possible extent length will be + 100 * 20 == 2000. */ + vassert(vge->len[vge->n_used-1] < 5000); + vge->len[vge->n_used-1] + = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len )); + n_instrs++; + if (debug_print) + vex_printf("\n"); + + /* Advance delta (inconspicuous but very important :-) */ + delta += (Long)dres.len; + + switch (dres.whatNext) { + case Dis_Continue: + vassert(irsb->next == NULL); + if (n_instrs < vex_control.guest_max_insns) { + /* keep going */ + } else { + /* We have to stop. */ + irsb->next + = IRExpr_Const( + guest_word_type == Ity_I32 + ? IRConst_U32(toUInt(guest_IP_bbstart+delta)) + : IRConst_U64(guest_IP_bbstart+delta) + ); + goto done; + } + break; + case Dis_StopHere: + vassert(irsb->next != NULL); + goto done; + case Dis_Resteer: + /* Check that we actually allowed a resteer .. */ + vassert(resteerOK); + vassert(irsb->next == NULL); + /* figure out a new delta to continue at. */ + vassert(resteerOKfn(callback_opaque,dres.continueAt)); + delta = dres.continueAt - guest_IP_bbstart; + /* we now have to start a new extent slot. */ + vge->n_used++; + vassert(vge->n_used <= 3); + vge->base[vge->n_used-1] = dres.continueAt; + vge->len[vge->n_used-1] = 0; + n_resteers++; + d_resteers++; + if (0 && (n_resteers & 0xFF) == 0) + vex_printf("resteer[%d,%d] to 0x%llx (delta = %lld)\n", + n_resteers, d_resteers, + dres.continueAt, delta); + break; + default: + vpanic("bb_to_IR"); + } + } + /*NOTREACHED*/ + vassert(0); + + done: + /* We're done. The only thing that might need attending to is that + a self-checking preamble may need to be created. */ + if (do_self_check) { + + UInt len2check, expected32; + IRTemp tistart_tmp, tilen_tmp; + UInt (*checksum_fn)(HWord, HWord) __attribute__((regparm(2))); + HWord checksum_fn_entry; + + vassert(vge->n_used == 1); + len2check = vge->len[0]; + + /* stay sane */ + vassert(len2check >= 0 && len2check < 1000/*arbitrary*/); + + if (len2check >= 4 && 0 == (((HWord)guest_code) & 3)) { + checksum_fn = genericg_compute_checksum_4al_4plus; + } else { + checksum_fn = genericg_compute_checksum_generic; + } + + expected32 = checksum_fn( (HWord)guest_code, len2check ); + + /* Set TISTART and TILEN. These will describe to the despatcher + the area of guest code to invalidate should we exit with a + self-check failure. */ + + tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type); + tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type); + + irsb->stmts[selfcheck_idx+0] + = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(guest_IP_bbstart_IRConst) ); + + irsb->stmts[selfcheck_idx+1] + = IRStmt_WrTmp(tilen_tmp, + guest_word_type==Ity_I32 + ? IRExpr_Const(IRConst_U32(len2check)) + : IRExpr_Const(IRConst_U64(len2check)) + ); + + irsb->stmts[selfcheck_idx+2] + = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) ); + + irsb->stmts[selfcheck_idx+3] + = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) ); + + if (abiinfo_both->host_ppc_calls_use_fndescrs) { + HWord* fndescr = (HWord*)checksum_fn; + checksum_fn_entry = fndescr[0]; + } else { + checksum_fn_entry = (HWord)checksum_fn; + } + + irsb->stmts[selfcheck_idx+4] + = IRStmt_Exit( + IRExpr_Binop( + Iop_CmpNE32, + mkIRExprCCall( + Ity_I32, + 2/*regparms*/, + checksum_fn == genericg_compute_checksum_4al_4plus + ? "genericg_compute_checksum_4al_4plus" + : "genericg_compute_checksum_generic", + (void*)checksum_fn_entry, + mkIRExprVec_2( + mkIRExpr_HWord( (HWord)guest_code ), + mkIRExpr_HWord( (HWord)len2check ) + ) + ), + IRExpr_Const(IRConst_U32(expected32)) + ), + Ijk_TInval, + guest_IP_bbstart_IRConst + ); + } + + return irsb; + } + + + /*------------------------------------------------------------- + A support routine for doing self-checking translations. + -------------------------------------------------------------*/ + + /* CLEAN HELPER */ + /* CALLED FROM GENERATED CODE */ + + /* Compute a checksum of host memory at [addr .. addr+len-1], as fast + as possible. The _4al_4plus version is assured that the request is + for 4-aligned memory and for a block of 4 or more long, whilst the + _generic version must be able to handle any alignment, and lengths + down to zero too. This fn is called once for every use of a + self-checking translation, so it needs to be as fast as + possible. */ + + static inline UInt ROL32 ( UInt w, Int n ) { + w = (w << n) | (w >> (32-n)); + return w; + } + + __attribute((regparm(2))) + static UInt genericg_compute_checksum_generic ( HWord addr, HWord len ) + { + UInt sum1 = 0, sum2 = 0; + /* pull up to 4-alignment */ + while ((addr & 3) != 0 && len >= 1) { + UChar* p = (UChar*)addr; + sum1 = (sum1 << 8) | (UInt)p[0]; + addr++; + len--; + } + /* vectorised + unrolled */ + while (len >= 16) { + UInt* p = (UInt*)addr; + UInt w; + w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + addr += 16; + len -= 16; + sum1 ^= sum2; + } + /* vectorised fixup */ + while (len >= 4) { + UInt* p = (UInt*)addr; + UInt w = p[0]; + sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + addr += 4; + len -= 4; + sum1 ^= sum2; + } + /* scalar fixup */ + while (len >= 1) { + UChar* p = (UChar*)addr; + UInt w = (UInt)p[0]; + sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + addr++; + len--; + } + return sum1 + sum2; + } + + __attribute((regparm(2))) + static UInt genericg_compute_checksum_4al_4plus ( HWord addr, HWord len ) + { + UInt sum1 = 0, sum2 = 0; + /* vassert(0 == (addr & 3)); */ + /* vassert(len >= 4); */ + /* vectorised + unrolled */ + while (len >= 16) { + UInt* p = (UInt*)addr; + UInt w; + w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + addr += 16; + len -= 16; + sum1 ^= sum2; + } + /* vectorised fixup */ + while (len >= 4) { + UInt* p = (UInt*)addr; + UInt w = p[0]; + sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + addr += 4; + len -= 4; + sum1 ^= sum2; + } + /* scalar fixup */ + while (len >= 1) { + UChar* p = (UChar*)addr; + UInt w = (UInt)p[0]; + sum1 = ROL32(sum1 ^ w, 31); sum2 += w; + addr++; + len--; + } + return sum1 + sum2; + } + + /*--------------------------------------------------------------------*/ + /*--- end guest_generic_bb_to_IR.c ---*/ + /*--------------------------------------------------------------------*/ Index: VEX/priv/guest_generic_bb_to_IR.h =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_generic_bb_to_IR.h Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,182 ---- + + /*--------------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_generic_bb_to_IR.h) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*--------------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + #ifndef __VEX_GUEST_GENERIC_BB_TO_IR_H + #define __VEX_GUEST_GENERIC_BB_TO_IR_H + + + /* This defines stuff needed by the guest insn disassemblers. + It's a bit circular; is imported by + - the guest-specific toIR.c files (guest-{x86,amd64,ppc,arm}/toIR.c) + - the generic disassembly driver (bb_to_IR.c) + - vex_main.c + */ + + + /* --------------------------------------------------------------- + Result of disassembling an instruction + --------------------------------------------------------------- */ + + /* The results of disassembling an instruction. There are three + possible outcomes. For Dis_Resteer, the disassembler _must_ + continue at the specified address. For Dis_StopHere, the + disassembler _must_ terminate the BB. For Dis_Continue, we may at + our option either disassemble the next insn, or terminate the BB; + but in the latter case we must set the bb's ->next field to point + to the next instruction. */ + + typedef + + struct { + + /* The disassembled insn has this length. Must always be + set. */ + Int len; + + /* What happens next? + Dis_StopHere: this insn terminates the BB; we must stop. + Dis_Continue: we can optionally continue into the next insn + Dis_Resteer: followed a branch; continue at the spec'd addr + */ + enum { Dis_StopHere, Dis_Continue, Dis_Resteer } whatNext; + + /* For Dis_Resteer, this is the guest address we should continue + at. Otherwise ignored (should be zero). */ + Addr64 continueAt; + + } + + DisResult; + + + /* --------------------------------------------------------------- + The type of a function which disassembles one instruction. + C's function-type syntax is really astonishing bizarre. + --------------------------------------------------------------- */ + + /* A function of this type (DisOneInstrFn) disassembles an instruction + located at host address &guest_code[delta], whose guest IP is + guest_IP (this may be entirely unrelated to where the insn is + actually located in the host's address space.). The returned + DisResult.len field carries its size. If the returned + DisResult.whatNext field is Dis_Resteer then DisResult.continueAt + should hold the guest IP of the next insn to disassemble. + + disInstr is not permitted to return Dis_Resteer if resteerOkFn, + when applied to the address which it wishes to resteer into, + returns False. + + The resulting IR is added to the end of irbb. + */ + + typedef + + DisResult (*DisOneInstrFn) ( + + /* This is the IRSB to which the resulting IR is to be appended. */ + /*OUT*/ IRSB* irbb, + + /* Do we need to generate IR to set the guest IP for this insn, + or not? */ + /*IN*/ Bool put_IP, + + /* Return True iff resteering to the given addr is allowed */ + /*IN*/ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), + + /* Vex-opaque data passed to all caller (valgrind) supplied + callbacks. */ + /*IN*/ void* callback_opaque, + + /* Where is the guest code? */ + /*IN*/ UChar* guest_code, + + /* Where is the actual insn? Note: it's at &guest_code[delta] */ + /*IN*/ Long delta, + + /* What is the guest IP of the insn? */ + /*IN*/ Addr64 guest_IP, + + /* Info about the guest architecture */ + /*IN*/ VexArch guest_arch, + /*IN*/ VexArchInfo* archinfo, + + /* ABI info for both guest and host */ + /*IN*/ VexAbiInfo* abiinfo, + + /* Is the host bigendian? */ + /*IN*/ Bool host_bigendian + + ); + + + /* --------------------------------------------------------------- + Top-level BB to IR conversion fn. + --------------------------------------------------------------- */ + + /* See detailed comment in bb_to_IR.c. */ + extern + IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge, + /*IN*/ void* closure_opaque, + /*IN*/ DisOneInstrFn dis_instr_fn, + /*IN*/ UChar* guest_code, + /*IN*/ Addr64 guest_IP_bbstart, + /*IN*/ Bool (*chase_into_ok)(void*,Addr64), + /*IN*/ Bool host_bigendian, + /*IN*/ VexArch arch_guest, + /*IN*/ VexArchInfo* archinfo_guest, + /*IN*/ VexAbiInfo* abiinfo_both, + /*IN*/ IRType guest_word_type, + /*IN*/ Bool do_self_check, + /*IN*/ Bool (*preamble_function)(void*,IRSB*), + /*IN*/ Int offB_TISTART, + /*IN*/ Int offB_TILEN ); + + + #endif /* ndef __VEX_GUEST_GENERIC_BB_TO_IR_H */ + + /*--------------------------------------------------------------------*/ + /*--- end guest_generic_bb_to_IR.h ---*/ + /*--------------------------------------------------------------------*/ Index: VEX/priv/guest_generic_x87.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_generic_x87.c Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,551 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_generic_x87.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* This file contains functions for doing some x87-specific + operations. Both the amd64 and x86 front ends (guests) indirectly + call these functions via guest helper calls. By putting them here, + code duplication is avoided. Some of these functions are tricky + and hard to verify, so there is much to be said for only having one + copy thereof. + */ + + #include "libvex_basictypes.h" + + #include "main_util.h" + #include "guest_generic_x87.h" + + + /* 80 and 64-bit floating point formats: + + 80-bit: + + S 0 0-------0 zero + S 0 0X------X denormals + S 1-7FFE 1X------X normals (all normals have leading 1) + S 7FFF 10------0 infinity + S 7FFF 10X-----X snan + S 7FFF 11X-----X qnan + + S is the sign bit. For runs X----X, at least one of the Xs must be + nonzero. Exponent is 15 bits, fractional part is 63 bits, and + there is an explicitly represented leading 1, and a sign bit, + giving 80 in total. + + 64-bit avoids the confusion of an explicitly represented leading 1 + and so is simpler: + + S 0 0------0 zero + S 0 X------X denormals + S 1-7FE any normals + S 7FF 0------0 infinity + S 7FF 0X-----X snan + S 7FF 1X-----X qnan + + Exponent is 11 bits, fractional part is 52 bits, and there is a + sign bit, giving 64 in total. + */ + + + static inline UInt read_bit_array ( UChar* arr, UInt n ) + { + UChar c = arr[n >> 3]; + c >>= (n&7); + return c & 1; + } + + static inline void write_bit_array ( UChar* arr, UInt n, UInt b ) + { + UChar c = arr[n >> 3]; + c = toUChar( c & ~(1 << (n&7)) ); + c = toUChar( c | ((b&1) << (n&7)) ); + arr[n >> 3] = c; + } + + /* Convert an IEEE754 double (64-bit) into an x87 extended double + (80-bit), mimicing the hardware fairly closely. Both numbers are + stored little-endian. Limitations, all of which could be fixed, + given some level of hassle: + + * Identity of NaNs is not preserved. + + See comments in the code for more details. + */ + void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 ) + { + Bool mantissaIsZero; + Int bexp, i, j, shift; + UChar sign; + + sign = toUChar( (f64[7] >> 7) & 1 ); + bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); + bexp &= 0x7FF; + + mantissaIsZero = False; + if (bexp == 0 || bexp == 0x7FF) { + /* We'll need to know whether or not the mantissa (bits 51:0) is + all zeroes in order to handle these cases. So figure it + out. */ + mantissaIsZero + = toBool( + (f64[6] & 0x0F) == 0 + && f64[5] == 0 && f64[4] == 0 && f64[3] == 0 + && f64[2] == 0 && f64[1] == 0 && f64[0] == 0 + ); + } + + /* If the exponent is zero, either we have a zero or a denormal. + Produce a zero. This is a hack in that it forces denormals to + zero. Could do better. */ + if (bexp == 0) { + f80[9] = toUChar( sign << 7 ); + f80[8] = f80[7] = f80[6] = f80[5] = f80[4] + = f80[3] = f80[2] = f80[1] = f80[0] = 0; + + if (mantissaIsZero) + /* It really is zero, so that's all we can do. */ + return; + + /* There is at least one 1-bit in the mantissa. So it's a + potentially denormalised double -- but we can produce a + normalised long double. Count the leading zeroes in the + mantissa so as to decide how much to bump the exponent down + by. Note, this is SLOW. */ + shift = 0; + for (i = 51; i >= 0; i--) { + if (read_bit_array(f64, i)) + break; + shift++; + } + + /* and copy into place as many bits as we can get our hands on. */ + j = 63; + for (i = 51 - shift; i >= 0; i--) { + write_bit_array( f80, j, + read_bit_array( f64, i ) ); + j--; + } + + /* Set the exponent appropriately, and we're done. */ + bexp -= shift; + bexp += (16383 - 1023); + f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); + f80[8] = toUChar( bexp & 0xFF ); + return; + } + + /* If the exponent is 7FF, this is either an Infinity, a SNaN or + QNaN, as determined by examining bits 51:0, thus: + 0 ... 0 Inf + 0X ... X SNaN + 1X ... X QNaN + where at least one of the Xs is not zero. + */ + if (bexp == 0x7FF) { + if (mantissaIsZero) { + /* Produce an appropriately signed infinity: + S 1--1 (15) 1 0--0 (63) + */ + f80[9] = toUChar( (sign << 7) | 0x7F ); + f80[8] = 0xFF; + f80[7] = 0x80; + f80[6] = f80[5] = f80[4] = f80[3] + = f80[2] = f80[1] = f80[0] = 0; + return; + } + /* So it's either a QNaN or SNaN. Distinguish by considering + bit 51. Note, this destroys all the trailing bits + (identity?) of the NaN. IEEE754 doesn't require preserving + these (it only requires that there be one QNaN value and one + SNaN value), but x87 does seem to have some ability to + preserve them. Anyway, here, the NaN's identity is + destroyed. Could be improved. */ + if (f64[6] & 8) { + /* QNaN. Make a QNaN: + S 1--1 (15) 1 1--1 (63) + */ + f80[9] = toUChar( (sign << 7) | 0x7F ); + f80[8] = 0xFF; + f80[7] = 0xFF; + f80[6] = f80[5] = f80[4] = f80[3] + = f80[2] = f80[1] = f80[0] = 0xFF; + } else { + /* SNaN. Make a SNaN: + S 1--1 (15) 0 1--1 (63) + */ + f80[9] = toUChar( (sign << 7) | 0x7F ); + f80[8] = 0xFF; + f80[7] = 0x7F; + f80[6] = f80[5] = f80[4] = f80[3] + = f80[2] = f80[1] = f80[0] = 0xFF; + } + return; + } + + /* It's not a zero, denormal, infinity or nan. So it must be a + normalised number. Rebias the exponent and build the new + number. */ + bexp += (16383 - 1023); + + f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); + f80[8] = toUChar( bexp & 0xFF ); + f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78) + | ((f64[5] >> 5) & 7) ); + f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) ); + f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) ); + f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) ); + f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) ); + f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) ); + f80[1] = toUChar( ((f64[0] << 3) & 0xF8) ); + f80[0] = toUChar( 0 ); + } + + + /* Convert an x87 extended double (80-bit) into an IEEE 754 double + (64-bit), mimicking the hardware fairly closely. Both numbers are + stored little-endian. Limitations, both of which could be fixed, + given some level of hassle: + + * Rounding following truncation could be a bit better. + + * Identity of NaNs is not preserved. + + See comments in the code for more details. + */ + void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 ) + { + Bool isInf; + Int bexp, i, j; + UChar sign; + + sign = toUChar((f80[9] >> 7) & 1); + bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8]; + bexp &= 0x7FFF; + + /* If the exponent is zero, either we have a zero or a denormal. + But an extended precision denormal becomes a double precision + zero, so in either case, just produce the appropriately signed + zero. */ + if (bexp == 0) { + f64[7] = toUChar(sign << 7); + f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; + return; + } + + /* If the exponent is 7FFF, this is either an Infinity, a SNaN or + QNaN, as determined by examining bits 62:0, thus: + 0 ... 0 Inf + 0X ... X SNaN + 1X ... X QNaN + where at least one of the Xs is not zero. + */ + if (bexp == 0x7FFF) { + isInf = toBool( + (f80[7] & 0x7F) == 0 + && f80[6] == 0 && f80[5] == 0 && f80[4] == 0 + && f80[3] == 0 && f80[2] == 0 && f80[1] == 0 + && f80[0] == 0 + ); + if (isInf) { + if (0 == (f80[7] & 0x80)) + goto wierd_NaN; + /* Produce an appropriately signed infinity: + S 1--1 (11) 0--0 (52) + */ + f64[7] = toUChar((sign << 7) | 0x7F); + f64[6] = 0xF0; + f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; + return; + } + /* So it's either a QNaN or SNaN. Distinguish by considering + bit 62. Note, this destroys all the trailing bits + (identity?) of the NaN. IEEE754 doesn't require preserving + these (it only requires that there be one QNaN value and one + SNaN value), but x87 does seem to have some ability to + preserve them. Anyway, here, the NaN's identity is + destroyed. Could be improved. */ + if (f80[8] & 0x40) { + /* QNaN. Make a QNaN: + S 1--1 (11) 1 1--1 (51) + */ + f64[7] = toUChar((sign << 7) | 0x7F); + f64[6] = 0xFF; + f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF; + } else { + /* SNaN. Make a SNaN: + S 1--1 (11) 0 1--1 (51) + */ + f64[7] = toUChar((sign << 7) | 0x7F); + f64[6] = 0xF7; + f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF; + } + return; + } + + /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is + zero, the x87 FPU appears to consider the number denormalised + and converts it to a QNaN. */ + if (0 == (f80[7] & 0x80)) { + wierd_NaN: + /* Strange hardware QNaN: + S 1--1 (11) 1 0--0 (51) + */ + /* On a PIII, these QNaNs always appear with sign==1. I have + no idea why. */ + f64[7] = (1 /*sign*/ << 7) | 0x7F; + f64[6] = 0xF8; + f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; + return; + } + + /* It's not a zero, denormal, infinity or nan. So it must be a + normalised number. Rebias the exponent and consider. */ + bexp -= (16383 - 1023); + if (bexp >= 0x7FF) { + /* It's too big for a double. Construct an infinity. */ + f64[7] = toUChar((sign << 7) | 0x7F); + f64[6] = 0xF0; + f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; + return; + } + + if (bexp <= 0) { + /* It's too small for a normalised double. First construct a + zero and then see if it can be improved into a denormal. */ + f64[7] = toUChar(sign << 7); + f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; + + if (bexp < -52) + /* Too small even for a denormal. */ + return; + + /* Ok, let's make a denormal. Note, this is SLOW. */ + /* Copy bits 63, 62, 61, etc of the src mantissa into the dst, + indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */ + /* bexp is in range -52 .. 0 inclusive */ + for (i = 63; i >= 0; i--) { + j = i - 12 + bexp; + if (j < 0) break; + /* We shouldn't really call vassert from generated code. */ + vassert(j >= 0 && j < 52); + write_bit_array ( f64, + j, + read_bit_array ( f80, i ) ); + } + /* and now we might have to round ... */ + if (read_bit_array(f80, 10+1 - bexp) == 1) + goto do_rounding; + + return; + } + + /* Ok, it's a normalised number which is representable as a double. + Copy the exponent and mantissa into place. */ + /* + for (i = 0; i < 52; i++) + write_bit_array ( f64, + i, + read_bit_array ( f80, i+11 ) ); + */ + f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) ); + f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) ); + f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) ); + f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) ); + f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) ); + f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) ); + + f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) ); + + f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) ); + + /* Now consider any rounding that needs to happen as a result of + truncating the mantissa. */ + if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ { + + /* If the bottom bits of f80 are "100 0000 0000", then the + infinitely precise value is deemed to be mid-way between the + two closest representable values. Since we're doing + round-to-nearest (the default mode), in that case it is the + bit immediately above which indicates whether we should round + upwards or not -- if 0, we don't. All that is encapsulated + in the following simple test. */ + if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0) + return; + + do_rounding: + /* Round upwards. This is a kludge. Once in every 2^24 + roundings (statistically) the bottom three bytes are all 0xFF + and so we don't round at all. Could be improved. */ + if (f64[0] != 0xFF) { + f64[0]++; + } + else + if (f64[0] == 0xFF && f64[1] != 0xFF) { + f64[0] = 0; + f64[1]++; + } + else + if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) { + f64[0] = 0; + f64[1] = 0; + f64[2]++; + } + /* else we don't round, but we should. */ + } + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Extract the signed significand or exponent component as per + fxtract. Arg and result are doubles travelling under the guise of + ULongs. Returns significand when getExp is zero and exponent + otherwise. */ + ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp ) + { + ULong uSig, uExp; + /* Long sSig; */ + Int sExp, i; + UInt sign, expExp; + + /* + S 7FF 0------0 infinity + S 7FF 0X-----X snan + S 7FF 1X-----X qnan + */ + const ULong posInf = 0x7FF0000000000000ULL; + const ULong negInf = 0xFFF0000000000000ULL; + const ULong nanMask = 0x7FF0000000000000ULL; + const ULong qNan = 0x7FF8000000000000ULL; + const ULong posZero = 0x0000000000000000ULL; + const ULong negZero = 0x8000000000000000ULL; + const ULong bit51 = 1ULL << 51; + const ULong bit52 = 1ULL << 52; + const ULong sigMask = bit52 - 1; + + /* Mimic PIII behaviour for special cases. */ + if (arg == posInf) + return getExp ? posInf : posInf; + if (arg == negInf) + return getExp ? posInf : negInf; + if ((arg & nanMask) == nanMask) + return qNan; + if (arg == posZero) + return getExp ? negInf : posZero; + if (arg == negZero) + return getExp ? negInf : negZero; + + /* Split into sign, exponent and significand. */ + sign = ((UInt)(arg >> 63)) & 1; + + /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */ + uSig = arg & sigMask; + + /* Get the exponent. */ + sExp = ((Int)(arg >> 52)) & 0x7FF; + + /* Deal with denormals: if the exponent is zero, then the + significand cannot possibly be zero (negZero/posZero are handled + above). Shift the significand left until bit 51 of it becomes + 1, and decrease the exponent accordingly. + */ + if (sExp == 0) { + for (i = 0; i < 52; i++) { + if (uSig & bit51) + break; + uSig <<= 1; + sExp--; + } + uSig <<= 1; + } else { + /* Add the implied leading-1 in the significand. */ + uSig |= bit52; + } + + /* Roll in the sign. */ + /* sSig = uSig; */ + /* if (sign) sSig =- sSig; */ + + /* Convert sig into a double. This should be an exact conversion. + Then divide by 2^52, which should give a value in the range 1.0 + to 2.0-epsilon, at least for normalised args. */ + /* dSig = (Double)sSig; */ + /* dSig /= 67108864.0; */ /* 2^26 */ + /* dSig /= 67108864.0; */ /* 2^26 */ + uSig &= sigMask; + uSig |= 0x3FF0000000000000ULL; + if (sign) + uSig ^= negZero; + + /* Convert exp into a double. Also an exact conversion. */ + /* dExp = (Double)(sExp - 1023); */ + sExp -= 1023; + if (sExp == 0) { + uExp = 0; + } else { + uExp = sExp < 0 ? -sExp : sExp; + expExp = 0x3FF +52; + /* 1 <= uExp <= 1074 */ + /* Skip first 42 iterations of normalisation loop as we know they + will always happen */ + uExp <<= 42; + expExp -= 42; + for (i = 0; i < 52-42; i++) { + if (uExp & bit52) + break; + uExp <<= 1; + expExp--; + } + uExp &= sigMask; + uExp |= ((ULong)expExp) << 52; + if (sExp < 0) uExp ^= negZero; + } + + return getExp ? uExp : uSig; + } + + + /*---------------------------------------------------------------*/ + /*--- end guest_generic_x87.c ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_generic_x87.h =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_generic_x87.h Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,117 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_generic_x87.h) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* This file contains functions for doing some x87-specific + operations. Both the amd64 and x86 front ends (guests) indirectly + call these functions via guest helper calls. By putting them here, + code duplication is avoided. Some of these functions are tricky + and hard to verify, so there is much to be said for only having one + copy thereof. + */ + + #ifndef __VEX_GUEST_GENERIC_X87_H + #define __VEX_GUEST_GENERIC_X87_H + + #include "libvex_basictypes.h" + + + /* Convert an IEEE754 double (64-bit) into an x87 extended double + (80-bit), mimicing the hardware fairly closely. Both numbers are + stored little-endian. Limitations, all of which could be fixed, + given some level of hassle: + + * Identity of NaNs is not preserved. + + See comments in the code for more details. + */ + extern + void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 ); + + + /* Convert an x87 extended double (80-bit) into an IEEE 754 double + (64-bit), mimicking the hardware fairly closely. Both numbers are + stored little-endian. Limitations, both of which could be fixed, + given some level of hassle: + + * Rounding following truncation could be a bit better. + + * Identity of NaNs is not preserved. + + See comments in the code for more details. + */ + extern + void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 ); + + + /* Layout of the real x87 state. */ + typedef + struct { + UShort env[14]; + UChar reg[80]; + } + Fpu_State; + + /* Offsets, in 16-bit ints, into the FPU environment (env) area. */ + #define FP_ENV_CTRL 0 + #define FP_ENV_STAT 2 + #define FP_ENV_TAG 4 + #define FP_ENV_IP 6 /* and 7 */ + #define FP_ENV_CS 8 + #define FP_ENV_OPOFF 10 /* and 11 */ + #define FP_ENV_OPSEL 12 + #define FP_REG(ii) (10*(7-(ii))) + + + /* Do the computations for x86/amd64 FXTRACT */ + extern ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp ); + + + + #endif /* ndef __VEX_GUEST_GENERIC_X87_H */ + + /*---------------------------------------------------------------*/ + /*--- end guest_generic_x87.h ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_ppc_defs.h =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_ppc_defs.h Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,167 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_ppc_defs.h) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* Only to be used within the guest-ppc directory. */ + + + #ifndef __VEX_GUEST_PPC_DEFS_H + #define __VEX_GUEST_PPC_DEFS_H + + + /*---------------------------------------------------------*/ + /*--- ppc to IR conversion ---*/ + /*---------------------------------------------------------*/ + + /* Convert one ppc insn to IR. See the type DisOneInstrFn in + bb_to_IR.h. */ + extern + DisResult disInstr_PPC ( IRSB* irbb, + Bool put_IP, + Bool (*resteerOkFn) ( void*, Addr64 ), + void* callback_opaque, + UChar* guest_code, + Long delta, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian ); + + /* Used by the optimiser to specialise calls to helpers. */ + extern + IRExpr* guest_ppc32_spechelper ( HChar* function_name, + IRExpr** args ); + + extern + IRExpr* guest_ppc64_spechelper ( HChar* function_name, + IRExpr** args ); + + /* Describes to the optimser which part of the guest state require + precise memory exceptions. This is logically part of the guest + state description. */ + extern + Bool guest_ppc32_state_requires_precise_mem_exns ( Int, Int ); + + extern + Bool guest_ppc64_state_requires_precise_mem_exns ( Int, Int ); + + extern + VexGuestLayout ppc32Guest_layout; + + extern + VexGuestLayout ppc64Guest_layout; + + + /* FP Rounding mode - different encoding to IR */ + typedef + enum { + PPCrm_NEAREST = 0, + PPCrm_NegINF = 1, + PPCrm_PosINF = 2, + PPCrm_ZERO = 3 + } PPCRoundingMode; + + /* Floating point comparison values - different encoding to IR */ + typedef + enum { + PPCcr_LT = 0x8, + PPCcr_GT = 0x4, + PPCcr_EQ = 0x2, + PPCcr_UN = 0x1 + } + PPCCmpF64Result; + + /* + Enumeration for xer_ca/ov calculation helper functions + */ + enum { + /* 0 */ PPCG_FLAG_OP_ADD=0, // addc[o], addic + /* 1 */ PPCG_FLAG_OP_ADDE, // adde[o], addme[o], addze[o] + /* 2 */ PPCG_FLAG_OP_DIVW, // divwo + /* 3 */ PPCG_FLAG_OP_DIVWU, // divwuo + /* 4 */ PPCG_FLAG_OP_MULLW, // mullwo + /* 5 */ PPCG_FLAG_OP_NEG, // nego + /* 6 */ PPCG_FLAG_OP_SUBF, // subfo + /* 7 */ PPCG_FLAG_OP_SUBFC, // subfc[o] + /* 8 */ PPCG_FLAG_OP_SUBFE, // subfe[o], subfme[o], subfze[o] + /* 9 */ PPCG_FLAG_OP_SUBFI, // subfic + /* 10 */ PPCG_FLAG_OP_SRAW, // sraw + /* 11 */ PPCG_FLAG_OP_SRAWI, // srawi + /* 12 */ PPCG_FLAG_OP_SRAD, // srad + /* 13 */ PPCG_FLAG_OP_SRADI, // sradi + PPCG_FLAG_OP_NUMBER + }; + + + /*---------------------------------------------------------*/ + /*--- ppc guest helpers ---*/ + /*---------------------------------------------------------*/ + + /* --- CLEAN HELPERS --- */ + + /* none, right now */ + + /* --- DIRTY HELPERS --- */ + + extern ULong ppcg_dirtyhelper_MFTB ( void ); + + extern UInt ppc32g_dirtyhelper_MFSPR_268_269 ( UInt ); + + extern UInt ppc32g_dirtyhelper_MFSPR_287 ( void ); + + extern void ppc32g_dirtyhelper_LVS ( VexGuestPPC32State* gst, + UInt vD_idx, UInt sh, + UInt shift_right ); + + extern void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst, + UInt vD_idx, UInt sh, + UInt shift_right ); + + #endif /* ndef __VEX_GUEST_PPC_DEFS_H */ + + /*---------------------------------------------------------------*/ + /*--- end guest_ppc_defs.h ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_ppc_helpers.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_ppc_helpers.c Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,844 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_ppc_helpers.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + #include "libvex_basictypes.h" + #include "libvex_emwarn.h" + #include "libvex_guest_ppc32.h" + #include "libvex_guest_ppc64.h" + #include "libvex_ir.h" + #include "libvex.h" + + #include "main_util.h" + #include "guest_generic_bb_to_IR.h" + #include "guest_ppc_defs.h" + + + /* This file contains helper functions for ppc32 and ppc64 guest code. + Calls to these functions are generated by the back end. These + calls are of course in the host machine code and this file will be + compiled to host machine code, so that all makes sense. + + Only change the signatures of these helper functions very + carefully. If you change the signature here, you'll have to change + the parameters passed to it in the IR calls constructed by + guest-ppc/toIR.c. + */ + + + /*---------------------------------------------------------------*/ + /*--- Misc integer helpers. ---*/ + /*---------------------------------------------------------------*/ + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially-transparent) */ + /* Horrible hack. On non-ppc platforms, return 1. */ + /* Reads a complete, consistent 64-bit TB value. */ + ULong ppcg_dirtyhelper_MFTB ( void ) + { + # if defined(__powerpc__) || defined(_AIX) + ULong res; + UInt lo, hi1, hi2; + while (1) { + __asm__ __volatile__ ("\n" + "\tmftbu %0\n" + "\tmftb %1\n" + "\tmftbu %2\n" + : "=r" (hi1), "=r" (lo), "=r" (hi2) + ); + if (hi1 == hi2) break; + } + res = ((ULong)hi1) << 32; + res |= (ULong)lo; + return res; + # else + return 1ULL; + # endif + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially transparent) */ + UInt ppc32g_dirtyhelper_MFSPR_268_269 ( UInt r269 ) + { + # if defined(__powerpc__) || defined(_AIX) + UInt spr; + if (r269) { + __asm__ __volatile__("mfspr %0,269" : "=b"(spr)); + } else { + __asm__ __volatile__("mfspr %0,268" : "=b"(spr)); + } + return spr; + # else + return 0; + # endif + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (I'm not really sure what the side effects are) */ + UInt ppc32g_dirtyhelper_MFSPR_287 ( void ) + { + # if defined(__powerpc__) || defined(_AIX) + UInt spr; + __asm__ __volatile__("mfspr %0,287" : "=b"(spr)); + return spr; + # else + return 0; + # endif + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest state, writes guest mem) */ + void ppc32g_dirtyhelper_LVS ( VexGuestPPC32State* gst, + UInt vD_off, UInt sh, UInt shift_right ) + { + static + UChar ref[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; + U128* pU128_src; + U128* pU128_dst; + + vassert( vD_off <= sizeof(VexGuestPPC32State)-8 ); + vassert( sh <= 15 ); + vassert( shift_right <= 1 ); + if (shift_right) + sh = 16-sh; + /* else shift left */ + + pU128_src = (U128*)&ref[sh]; + pU128_dst = (U128*)( ((UChar*)gst) + vD_off ); + + (*pU128_dst)[0] = (*pU128_src)[0]; + (*pU128_dst)[1] = (*pU128_src)[1]; + (*pU128_dst)[2] = (*pU128_src)[2]; + (*pU128_dst)[3] = (*pU128_src)[3]; + } + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest state, writes guest mem) */ + void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst, + UInt vD_off, UInt sh, UInt shift_right ) + { + static + UChar ref[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; + U128* pU128_src; + U128* pU128_dst; + + vassert( vD_off <= sizeof(VexGuestPPC64State)-8 ); + vassert( sh <= 15 ); + vassert( shift_right <= 1 ); + if (shift_right) + sh = 16-sh; + /* else shift left */ + + pU128_src = (U128*)&ref[sh]; + pU128_dst = (U128*)( ((UChar*)gst) + vD_off ); + + (*pU128_dst)[0] = (*pU128_src)[0]; + (*pU128_dst)[1] = (*pU128_src)[1]; + (*pU128_dst)[2] = (*pU128_src)[2]; + (*pU128_dst)[3] = (*pU128_src)[3]; + } + + + /* Helper-function specialiser. */ + + IRExpr* guest_ppc32_spechelper ( HChar* function_name, + IRExpr** args ) + { + return NULL; + } + + IRExpr* guest_ppc64_spechelper ( HChar* function_name, + IRExpr** args ) + { + return NULL; + } + + + /*----------------------------------------------*/ + /*--- The exported fns .. ---*/ + /*----------------------------------------------*/ + + /* VISIBLE TO LIBVEX CLIENT */ + UInt LibVEX_GuestPPC32_get_CR ( /*IN*/VexGuestPPC32State* vex_state ) + { + # define FIELD(_n) \ + ( ( (UInt) \ + ( (vex_state->guest_CR##_n##_321 & (7<<1)) \ + | (vex_state->guest_CR##_n##_0 & 1) \ + ) \ + ) \ + << (4 * (7-(_n))) \ + ) + + return + FIELD(0) | FIELD(1) | FIELD(2) | FIELD(3) + | FIELD(4) | FIELD(5) | FIELD(6) | FIELD(7); + + # undef FIELD + } + + + /* VISIBLE TO LIBVEX CLIENT */ + /* Note: %CR is 32 bits even for ppc64 */ + UInt LibVEX_GuestPPC64_get_CR ( /*IN*/VexGuestPPC64State* vex_state ) + { + # define FIELD(_n) \ + ( ( (UInt) \ + ( (vex_state->guest_CR##_n##_321 & (7<<1)) \ + | (vex_state->guest_CR##_n##_0 & 1) \ + ) \ + ) \ + << (4 * (7-(_n))) \ + ) + + return + FIELD(0) | FIELD(1) | FIELD(2) | FIELD(3) + | FIELD(4) | FIELD(5) | FIELD(6) | FIELD(7); + + # undef FIELD + } + + + /* VISIBLE TO LIBVEX CLIENT */ + void LibVEX_GuestPPC32_put_CR ( UInt cr_native, + /*OUT*/VexGuestPPC32State* vex_state ) + { + UInt t; + + # define FIELD(_n) \ + do { \ + t = cr_native >> (4*(7-(_n))); \ + vex_state->guest_CR##_n##_0 = toUChar(t & 1); \ + vex_state->guest_CR##_n##_321 = toUChar(t & (7<<1)); \ + } while (0) + + FIELD(0); + FIELD(1); + FIELD(2); + FIELD(3); + FIELD(4); + FIELD(5); + FIELD(6); + FIELD(7); + + # undef FIELD + } + + + /* VISIBLE TO LIBVEX CLIENT */ + /* Note: %CR is 32 bits even for ppc64 */ + void LibVEX_GuestPPC64_put_CR ( UInt cr_native, + /*OUT*/VexGuestPPC64State* vex_state ) + { + UInt t; + + # define FIELD(_n) \ + do { \ + t = cr_native >> (4*(7-(_n))); \ + vex_state->guest_CR##_n##_0 = toUChar(t & 1); \ + vex_state->guest_CR##_n##_321 = toUChar(t & (7<<1)); \ + } while (0) + + FIELD(0); + FIELD(1); + FIELD(2); + FIELD(3); + FIELD(4); + FIELD(5); + FIELD(6); + FIELD(7); + + # undef FIELD + } + + + /* VISIBLE TO LIBVEX CLIENT */ + UInt LibVEX_GuestPPC32_get_XER ( /*IN*/VexGuestPPC32State* vex_state ) + { + UInt w = 0; + w |= ( ((UInt)vex_state->guest_XER_BC) & 0xFF ); + w |= ( (((UInt)vex_state->guest_XER_SO) & 0x1) << 31 ); + w |= ( (((UInt)vex_state->guest_XER_OV) & 0x1) << 30 ); + w |= ( (((UInt)vex_state->guest_XER_CA) & 0x1) << 29 ); + return w; + } + + + /* VISIBLE TO LIBVEX CLIENT */ + /* Note: %XER is 32 bits even for ppc64 */ + UInt LibVEX_GuestPPC64_get_XER ( /*IN*/VexGuestPPC64State* vex_state ) + { + UInt w = 0; + w |= ( ((UInt)vex_state->guest_XER_BC) & 0xFF ); + w |= ( (((UInt)vex_state->guest_XER_SO) & 0x1) << 31 ); + w |= ( (((UInt)vex_state->guest_XER_OV) & 0x1) << 30 ); + w |= ( (((UInt)vex_state->guest_XER_CA) & 0x1) << 29 ); + return w; + } + + + /* VISIBLE TO LIBVEX CLIENT */ + void LibVEX_GuestPPC32_put_XER ( UInt xer_native, + /*OUT*/VexGuestPPC32State* vex_state ) + { + vex_state->guest_XER_BC = toUChar(xer_native & 0xFF); + vex_state->guest_XER_SO = toUChar((xer_native >> 31) & 0x1); + vex_state->guest_XER_OV = toUChar((xer_native >> 30) & 0x1); + vex_state->guest_XER_CA = toUChar((xer_native >> 29) & 0x1); + } + + /* VISIBLE TO LIBVEX CLIENT */ + /* Note: %XER is 32 bits even for ppc64 */ + void LibVEX_GuestPPC64_put_XER ( UInt xer_native, + /*OUT*/VexGuestPPC64State* vex_state ) + { + vex_state->guest_XER_BC = toUChar(xer_native & 0xFF); + vex_state->guest_XER_SO = toUChar((xer_native >> 31) & 0x1); + vex_state->guest_XER_OV = toUChar((xer_native >> 30) & 0x1); + vex_state->guest_XER_CA = toUChar((xer_native >> 29) & 0x1); + } + + /* VISIBLE TO LIBVEX CLIENT */ + void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) + { + Int i; + vex_state->guest_GPR0 = 0; + vex_state->guest_GPR1 = 0; + vex_state->guest_GPR2 = 0; + vex_state->guest_GPR3 = 0; + vex_state->guest_GPR4 = 0; + vex_state->guest_GPR5 = 0; + vex_state->guest_GPR6 = 0; + vex_state->guest_GPR7 = 0; + vex_state->guest_GPR8 = 0; + vex_state->guest_GPR9 = 0; + vex_state->guest_GPR10 = 0; + vex_state->guest_GPR11 = 0; + vex_state->guest_GPR12 = 0; + vex_state->guest_GPR13 = 0; + vex_state->guest_GPR14 = 0; + vex_state->guest_GPR15 = 0; + vex_state->guest_GPR16 = 0; + vex_state->guest_GPR17 = 0; + vex_state->guest_GPR18 = 0; + vex_state->guest_GPR19 = 0; + vex_state->guest_GPR20 = 0; + vex_state->guest_GPR21 = 0; + vex_state->guest_GPR22 = 0; + vex_state->guest_GPR23 = 0; + vex_state->guest_GPR24 = 0; + vex_state->guest_GPR25 = 0; + vex_state->guest_GPR26 = 0; + vex_state->guest_GPR27 = 0; + vex_state->guest_GPR28 = 0; + vex_state->guest_GPR29 = 0; + vex_state->guest_GPR30 = 0; + vex_state->guest_GPR31 = 0; + + vex_state->guest_FPR0 = 0; + vex_state->guest_FPR1 = 0; + vex_state->guest_FPR2 = 0; + vex_state->guest_FPR3 = 0; + vex_state->guest_FPR4 = 0; + vex_state->guest_FPR5 = 0; + vex_state->guest_FPR6 = 0; + vex_state->guest_FPR7 = 0; + vex_state->guest_FPR8 = 0; + vex_state->guest_FPR9 = 0; + vex_state->guest_FPR10 = 0; + vex_state->guest_FPR11 = 0; + vex_state->guest_FPR12 = 0; + vex_state->guest_FPR13 = 0; + vex_state->guest_FPR14 = 0; + vex_state->guest_FPR15 = 0; + vex_state->guest_FPR16 = 0; + vex_state->guest_FPR17 = 0; + vex_state->guest_FPR18 = 0; + vex_state->guest_FPR19 = 0; + vex_state->guest_FPR20 = 0; + vex_state->guest_FPR21 = 0; + vex_state->guest_FPR22 = 0; + vex_state->guest_FPR23 = 0; + vex_state->guest_FPR24 = 0; + vex_state->guest_FPR25 = 0; + vex_state->guest_FPR26 = 0; + vex_state->guest_FPR27 = 0; + vex_state->guest_FPR28 = 0; + vex_state->guest_FPR29 = 0; + vex_state->guest_FPR30 = 0; + vex_state->guest_FPR31 = 0; + + /* Initialise the vector state. */ + # define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0; + + VECZERO(vex_state->guest_VR0 ); + VECZERO(vex_state->guest_VR1 ); + VECZERO(vex_state->guest_VR2 ); + VECZERO(vex_state->guest_VR3 ); + VECZERO(vex_state->guest_VR4 ); + VECZERO(vex_state->guest_VR5 ); + VECZERO(vex_state->guest_VR6 ); + VECZERO(vex_state->guest_VR7 ); + VECZERO(vex_state->guest_VR8 ); + VECZERO(vex_state->guest_VR9 ); + VECZERO(vex_state->guest_VR10); + VECZERO(vex_state->guest_VR11); + VECZERO(vex_state->guest_VR12); + VECZERO(vex_state->guest_VR13); + VECZERO(vex_state->guest_VR14); + VECZERO(vex_state->guest_VR15); + VECZERO(vex_state->guest_VR16); + VECZERO(vex_state->guest_VR17); + VECZERO(vex_state->guest_VR18); + VECZERO(vex_state->guest_VR19); + VECZERO(vex_state->guest_VR20); + VECZERO(vex_state->guest_VR21); + VECZERO(vex_state->guest_VR22); + VECZERO(vex_state->guest_VR23); + VECZERO(vex_state->guest_VR24); + VECZERO(vex_state->guest_VR25); + VECZERO(vex_state->guest_VR26); + VECZERO(vex_state->guest_VR27); + VECZERO(vex_state->guest_VR28); + VECZERO(vex_state->guest_VR29); + VECZERO(vex_state->guest_VR30); + VECZERO(vex_state->guest_VR31); + + # undef VECZERO + + vex_state->guest_CIA = 0; + vex_state->guest_LR = 0; + vex_state->guest_CTR = 0; + + vex_state->guest_XER_SO = 0; + vex_state->guest_XER_OV = 0; + vex_state->guest_XER_CA = 0; + vex_state->guest_XER_BC = 0; + + vex_state->guest_CR0_321 = 0; + vex_state->guest_CR0_0 = 0; + vex_state->guest_CR1_321 = 0; + vex_state->guest_CR1_0 = 0; + vex_state->guest_CR2_321 = 0; + vex_state->guest_CR2_0 = 0; + vex_state->guest_CR3_321 = 0; + vex_state->guest_CR3_0 = 0; + vex_state->guest_CR4_321 = 0; + vex_state->guest_CR4_0 = 0; + vex_state->guest_CR5_321 = 0; + vex_state->guest_CR5_0 = 0; + vex_state->guest_CR6_321 = 0; + vex_state->guest_CR6_0 = 0; + vex_state->guest_CR7_321 = 0; + vex_state->guest_CR7_0 = 0; + + vex_state->guest_FPROUND = (UInt)PPCrm_NEAREST; + + vex_state->guest_VRSAVE = 0; + + vex_state->guest_VSCR = 0x0; // Non-Java mode = 0 + + vex_state->guest_EMWARN = EmWarn_NONE; + + vex_state->guest_TISTART = 0; + vex_state->guest_TILEN = 0; + + vex_state->guest_NRADDR = 0; + vex_state->guest_NRADDR_GPR2 = 0; + + vex_state->guest_REDIR_SP = -1; + for (i = 0; i < VEX_GUEST_PPC32_REDIR_STACK_SIZE; i++) + vex_state->guest_REDIR_STACK[i] = 0; + + vex_state->guest_IP_AT_SYSCALL = 0; + vex_state->guest_SPRG3_RO = 0; + } + + + /* VISIBLE TO LIBVEX CLIENT */ + void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state ) + { + Int i; + vex_state->guest_GPR0 = 0; + vex_state->guest_GPR1 = 0; + vex_state->guest_GPR2 = 0; + vex_state->guest_GPR3 = 0; + vex_state->guest_GPR4 = 0; + vex_state->guest_GPR5 = 0; + vex_state->guest_GPR6 = 0; + vex_state->guest_GPR7 = 0; + vex_state->guest_GPR8 = 0; + vex_state->guest_GPR9 = 0; + vex_state->guest_GPR10 = 0; + vex_state->guest_GPR11 = 0; + vex_state->guest_GPR12 = 0; + vex_state->guest_GPR13 = 0; + vex_state->guest_GPR14 = 0; + vex_state->guest_GPR15 = 0; + vex_state->guest_GPR16 = 0; + vex_state->guest_GPR17 = 0; + vex_state->guest_GPR18 = 0; + vex_state->guest_GPR19 = 0; + vex_state->guest_GPR20 = 0; + vex_state->guest_GPR21 = 0; + vex_state->guest_GPR22 = 0; + vex_state->guest_GPR23 = 0; + vex_state->guest_GPR24 = 0; + vex_state->guest_GPR25 = 0; + vex_state->guest_GPR26 = 0; + vex_state->guest_GPR27 = 0; + vex_state->guest_GPR28 = 0; + vex_state->guest_GPR29 = 0; + vex_state->guest_GPR30 = 0; + vex_state->guest_GPR31 = 0; + + vex_state->guest_FPR0 = 0; + vex_state->guest_FPR1 = 0; + vex_state->guest_FPR2 = 0; + vex_state->guest_FPR3 = 0; + vex_state->guest_FPR4 = 0; + vex_state->guest_FPR5 = 0; + vex_state->guest_FPR6 = 0; + vex_state->guest_FPR7 = 0; + vex_state->guest_FPR8 = 0; + vex_state->guest_FPR9 = 0; + vex_state->guest_FPR10 = 0; + vex_state->guest_FPR11 = 0; + vex_state->guest_FPR12 = 0; + vex_state->guest_FPR13 = 0; + vex_state->guest_FPR14 = 0; + vex_state->guest_FPR15 = 0; + vex_state->guest_FPR16 = 0; + vex_state->guest_FPR17 = 0; + vex_state->guest_FPR18 = 0; + vex_state->guest_FPR19 = 0; + vex_state->guest_FPR20 = 0; + vex_state->guest_FPR21 = 0; + vex_state->guest_FPR22 = 0; + vex_state->guest_FPR23 = 0; + vex_state->guest_FPR24 = 0; + vex_state->guest_FPR25 = 0; + vex_state->guest_FPR26 = 0; + vex_state->guest_FPR27 = 0; + vex_state->guest_FPR28 = 0; + vex_state->guest_FPR29 = 0; + vex_state->guest_FPR30 = 0; + vex_state->guest_FPR31 = 0; + + /* Initialise the vector state. */ + # define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0; + + VECZERO(vex_state->guest_VR0 ); + VECZERO(vex_state->guest_VR1 ); + VECZERO(vex_state->guest_VR2 ); + VECZERO(vex_state->guest_VR3 ); + VECZERO(vex_state->guest_VR4 ); + VECZERO(vex_state->guest_VR5 ); + VECZERO(vex_state->guest_VR6 ); + VECZERO(vex_state->guest_VR7 ); + VECZERO(vex_state->guest_VR8 ); + VECZERO(vex_state->guest_VR9 ); + VECZERO(vex_state->guest_VR10); + VECZERO(vex_state->guest_VR11); + VECZERO(vex_state->guest_VR12); + VECZERO(vex_state->guest_VR13); + VECZERO(vex_state->guest_VR14); + VECZERO(vex_state->guest_VR15); + VECZERO(vex_state->guest_VR16); + VECZERO(vex_state->guest_VR17); + VECZERO(vex_state->guest_VR18); + VECZERO(vex_state->guest_VR19); + VECZERO(vex_state->guest_VR20); + VECZERO(vex_state->guest_VR21); + VECZERO(vex_state->guest_VR22); + VECZERO(vex_state->guest_VR23); + VECZERO(vex_state->guest_VR24); + VECZERO(vex_state->guest_VR25); + VECZERO(vex_state->guest_VR26); + VECZERO(vex_state->guest_VR27); + VECZERO(vex_state->guest_VR28); + VECZERO(vex_state->guest_VR29); + VECZERO(vex_state->guest_VR30); + VECZERO(vex_state->guest_VR31); + + # undef VECZERO + + vex_state->guest_CIA = 0; + vex_state->guest_LR = 0; + vex_state->guest_CTR = 0; + + vex_state->guest_XER_SO = 0; + vex_state->guest_XER_OV = 0; + vex_state->guest_XER_CA = 0; + vex_state->guest_XER_BC = 0; + + vex_state->guest_CR0_321 = 0; + vex_state->guest_CR0_0 = 0; + vex_state->guest_CR1_321 = 0; + vex_state->guest_CR1_0 = 0; + vex_state->guest_CR2_321 = 0; + vex_state->guest_CR2_0 = 0; + vex_state->guest_CR3_321 = 0; + vex_state->guest_CR3_0 = 0; + vex_state->guest_CR4_321 = 0; + vex_state->guest_CR4_0 = 0; + vex_state->guest_CR5_321 = 0; + vex_state->guest_CR5_0 = 0; + vex_state->guest_CR6_321 = 0; + vex_state->guest_CR6_0 = 0; + vex_state->guest_CR7_321 = 0; + vex_state->guest_CR7_0 = 0; + + vex_state->guest_FPROUND = (UInt)PPCrm_NEAREST; + + vex_state->guest_VRSAVE = 0; + + vex_state->guest_VSCR = 0x0; // Non-Java mode = 0 + + vex_state->guest_EMWARN = EmWarn_NONE; + + vex_state->padding = 0; + + vex_state->guest_TISTART = 0; + vex_state->guest_TILEN = 0; + + vex_state->guest_NRADDR = 0; + vex_state->guest_NRADDR_GPR2 = 0; + + vex_state->guest_REDIR_SP = -1; + for (i = 0; i < VEX_GUEST_PPC64_REDIR_STACK_SIZE; i++) + vex_state->guest_REDIR_STACK[i] = 0; + + vex_state->guest_IP_AT_SYSCALL = 0; + vex_state->guest_SPRG3_RO = 0; + + vex_state->padding2 = 0; + } + + + /*-----------------------------------------------------------*/ + /*--- Describing the ppc guest state, for the benefit ---*/ + /*--- of iropt and instrumenters. ---*/ + /*-----------------------------------------------------------*/ + + /* Figure out if any part of the guest state contained in minoff + .. maxoff requires precise memory exceptions. If in doubt return + True (but this is generates significantly slower code). + + By default we enforce precise exns for guest R1 (stack pointer), + CIA (current insn address) and LR (link register). These are the + minimum needed to extract correct stack backtraces from ppc + code. [[NB: not sure if keeping LR up to date is actually + necessary.]] + */ + Bool guest_ppc32_state_requires_precise_mem_exns ( Int minoff, + Int maxoff ) + { + Int lr_min = offsetof(VexGuestPPC32State, guest_LR); + Int lr_max = lr_min + 4 - 1; + Int r1_min = offsetof(VexGuestPPC32State, guest_GPR1); + Int r1_max = r1_min + 4 - 1; + Int cia_min = offsetof(VexGuestPPC32State, guest_CIA); + Int cia_max = cia_min + 4 - 1; + + if (maxoff < lr_min || minoff > lr_max) { + /* no overlap with LR */ + } else { + return True; + } + + if (maxoff < r1_min || minoff > r1_max) { + /* no overlap with R1 */ + } else { + return True; + } + + if (maxoff < cia_min || minoff > cia_max) { + /* no overlap with CIA */ + } else { + return True; + } + + return False; + } + + Bool guest_ppc64_state_requires_precise_mem_exns ( Int minoff, + Int maxoff ) + { + /* Given that R2 is a Big Deal in the ELF ppc64 ABI, it seems + prudent to be conservative with it, even though thus far there + is no evidence to suggest that it actually needs to be kept up + to date wrt possible exceptions. */ + Int lr_min = offsetof(VexGuestPPC64State, guest_LR); + Int lr_max = lr_min + 8 - 1; + Int r1_min = offsetof(VexGuestPPC64State, guest_GPR1); + Int r1_max = r1_min + 8 - 1; + Int r2_min = offsetof(VexGuestPPC64State, guest_GPR2); + Int r2_max = r2_min + 8 - 1; + Int cia_min = offsetof(VexGuestPPC64State, guest_CIA); + Int cia_max = cia_min + 8 - 1; + + if (maxoff < lr_min || minoff > lr_max) { + /* no overlap with LR */ + } else { + return True; + } + + if (maxoff < r1_min || minoff > r1_max) { + /* no overlap with R1 */ + } else { + return True; + } + + if (maxoff < r2_min || minoff > r2_max) { + /* no overlap with R2 */ + } else { + return True; + } + + if (maxoff < cia_min || minoff > cia_max) { + /* no overlap with CIA */ + } else { + return True; + } + + return False; + } + + + #define ALWAYSDEFD32(field) \ + { offsetof(VexGuestPPC32State, field), \ + (sizeof ((VexGuestPPC32State*)0)->field) } + + VexGuestLayout + ppc32Guest_layout + = { + /* Total size of the guest state, in bytes. */ + .total_sizeB = sizeof(VexGuestPPC32State), + + /* Describe the stack pointer. */ + .offset_SP = offsetof(VexGuestPPC32State,guest_GPR1), + .sizeof_SP = 4, + + /* Describe the frame pointer. */ + .offset_FP = offsetof(VexGuestPPC32State,guest_GPR1), + .sizeof_FP = 4, + + /* Describe the instruction pointer. */ + .offset_IP = offsetof(VexGuestPPC32State,guest_CIA), + .sizeof_IP = 4, + + /* Describe any sections to be regarded by Memcheck as + 'always-defined'. */ + .n_alwaysDefd = 11, + + .alwaysDefd + = { /* 0 */ ALWAYSDEFD32(guest_CIA), + /* 1 */ ALWAYSDEFD32(guest_EMWARN), + /* 2 */ ALWAYSDEFD32(guest_TISTART), + /* 3 */ ALWAYSDEFD32(guest_TILEN), + /* 4 */ ALWAYSDEFD32(guest_VSCR), + /* 5 */ ALWAYSDEFD32(guest_FPROUND), + /* 6 */ ALWAYSDEFD32(guest_NRADDR), + /* 7 */ ALWAYSDEFD32(guest_NRADDR_GPR2), + /* 8 */ ALWAYSDEFD32(guest_REDIR_SP), + /* 9 */ ALWAYSDEFD32(guest_REDIR_STACK), + /* 10 */ ALWAYSDEFD32(guest_IP_AT_SYSCALL) + } + }; + + #define ALWAYSDEFD64(field) \ + { offsetof(VexGuestPPC64State, field), \ + (sizeof ((VexGuestPPC64State*)0)->field) } + + VexGuestLayout + ppc64Guest_layout + = { + /* Total size of the guest state, in bytes. */ + .total_sizeB = sizeof(VexGuestPPC64State), + + /* Describe the stack pointer. */ + .offset_SP = offsetof(VexGuestPPC64State,guest_GPR1), + .sizeof_SP = 8, + + /* Describe the frame pointer. */ + .offset_FP = offsetof(VexGuestPPC64State,guest_GPR1), + .sizeof_FP = 8, + + /* Describe the instruction pointer. */ + .offset_IP = offsetof(VexGuestPPC64State,guest_CIA), + .sizeof_IP = 8, + + /* Describe any sections to be regarded by Memcheck as + 'always-defined'. */ + .n_alwaysDefd = 11, + + .alwaysDefd + = { /* 0 */ ALWAYSDEFD64(guest_CIA), + /* 1 */ ALWAYSDEFD64(guest_EMWARN), + /* 2 */ ALWAYSDEFD64(guest_TISTART), + /* 3 */ ALWAYSDEFD64(guest_TILEN), + /* 4 */ ALWAYSDEFD64(guest_VSCR), + /* 5 */ ALWAYSDEFD64(guest_FPROUND), + /* 6 */ ALWAYSDEFD64(guest_NRADDR), + /* 7 */ ALWAYSDEFD64(guest_NRADDR_GPR2), + /* 8 */ ALWAYSDEFD64(guest_REDIR_SP), + /* 9 */ ALWAYSDEFD64(guest_REDIR_STACK), + /* 10 */ ALWAYSDEFD64(guest_IP_AT_SYSCALL) + } + }; + + /*---------------------------------------------------------------*/ + /*--- end guest_ppc_helpers.c ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_ppc_toIR.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_ppc_toIR.c Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,9891 ---- + + /*--------------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_ppc_toIR.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*--------------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* TODO 18/Nov/05: + + Spot rld... cases which are simply left/right shifts and emit + Shl64/Shr64 accordingly. + + Altivec + - datastream insns + - lvxl,stvxl: load/store with 'least recently used' hint + - vexptefp, vlogefp + + LIMITATIONS: + + Various, including: + + - Some invalid forms of lswi and lswx are accepted when they should + not be. + + - Floating Point: + - All exceptions disabled in FPSCR + - condition codes not set in FPSCR + + - Altivec floating point: + - vmaddfp, vnmsubfp + Because we're using Java/IEEE mode (FPSCR[NJ]), rather than the + system default of Non-Java mode, we get some small errors + (lowest bit only). + This is because Non-Java mode brutally hacks denormalised results + to zero, whereas we keep maximum accuracy. However, using + Non-Java mode would give us more inaccuracy, as our intermediate + results would then be zeroed, too. + + - AbiHints for the stack red zone are only emitted for + unconditional calls and returns (bl, blr). They should also be + emitted for conditional calls and returns, but we don't have a + way to express that right now. Ah well. + */ + + /* "Special" instructions. + + This instruction decoder can decode four special instructions + which mean nothing natively (are no-ops as far as regs/mem are + concerned) but have meaning for supporting Valgrind. A special + instruction is flagged by a 16-byte preamble: + + 32-bit mode: 54001800 54006800 5400E800 54009800 + (rlwinm 0,0,3,0,0; rlwinm 0,0,13,0,0; + rlwinm 0,0,29,0,0; rlwinm 0,0,19,0,0) + + 64-bit mode: 78001800 78006800 7800E802 78009802 + (rotldi 0,0,3; rotldi 0,0,13; + rotldi 0,0,61; rotldi 0,0,51) + + Following that, one of the following 3 are allowed + (standard interpretation in parentheses): + + 7C210B78 (or 1,1,1) %R3 = client_request ( %R4 ) + 7C421378 (or 2,2,2) %R3 = guest_NRADDR + 7C631B78 (or 3,3,3) branch-and-link-to-noredir %R11 + 7C842378 (or 4,4,4) %R3 = guest_NRADDR_GPR2 + + Any other bytes following the 16-byte preamble are illegal and + constitute a failure in instruction decoding. This all assumes + that the preamble will never occur except in specific code + fragments designed for Valgrind to catch. + */ + + + /* Translates PPC32/64 code to IR. */ + + /* References + + #define PPC32 + "PowerPC Microprocessor Family: + The Programming Environments Manual for 32-Bit Microprocessors" + 02/21/2000 + http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2 + + #define PPC64 + "PowerPC Microprocessor Family: + Programming Environments Manual for 64-Bit Microprocessors" + 06/10/2003 + http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/F7E732FF811F783187256FDD004D3797 + + #define AV + "PowerPC Microprocessor Family: + AltiVec(TM) Technology Programming Environments Manual" + 07/10/2003 + http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D + */ + + #include "libvex_basictypes.h" + #include "libvex_ir.h" + #include "libvex.h" + #include "libvex_guest_ppc32.h" + #include "libvex_guest_ppc64.h" + + #include "main_util.h" + #include "main_globals.h" + #include "guest_generic_bb_to_IR.h" + #include "guest_ppc_defs.h" + + + /*------------------------------------------------------------*/ + /*--- Globals ---*/ + /*------------------------------------------------------------*/ + + /* These are set at the start of the translation of an insn, right + down in disInstr_PPC, so that we don't have to pass them around + endlessly. They are all constant during the translation of any + given insn. */ + + /* We need to know this to do sub-register accesses correctly. */ + static Bool host_is_bigendian; + + /* Pointer to the guest code area. */ + static UChar* guest_code; + + /* The guest address corresponding to guest_code[0]. */ + static Addr64 guest_CIA_bbstart; + + /* The guest address for the instruction currently being + translated. */ + static Addr64 guest_CIA_curr_instr; + + /* The IRSB* into which we're generating code. */ + static IRSB* irsb; + + /* Is our guest binary 32 or 64bit? Set at each call to + disInstr_PPC below. */ + static Bool mode64 = False; + + // Given a pointer to a function as obtained by "& functionname" in C, + // produce a pointer to the actual entry point for the function. For + // most platforms it's the identity function. Unfortunately, on + // ppc64-linux it isn't (sigh) and ditto for ppc32-aix5 and + // ppc64-aix5. + static void* fnptr_to_fnentry( VexAbiInfo* vbi, void* f ) + { + if (vbi->host_ppc_calls_use_fndescrs) { + /* f is a pointer to a 3-word function descriptor, of which the + first word is the entry address. */ + /* note, this is correct even with cross-jitting, since this is + purely a host issue, not a guest one. */ + HWord* fdescr = (HWord*)f; + return (void*)(fdescr[0]); + } else { + /* Simple; "& f" points directly at the code for f. */ + return f; + } + } + + + /*------------------------------------------------------------*/ + /*--- Debugging output ---*/ + /*------------------------------------------------------------*/ + + #define DIP(format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_printf(format, ## args) + + #define DIS(buf, format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_sprintf(buf, format, ## args) + + + /*------------------------------------------------------------*/ + /*--- Offsets of various parts of the ppc32/64 guest state ---*/ + /*------------------------------------------------------------*/ + + #define offsetofPPCGuestState(_x) \ + (mode64 ? offsetof(VexGuestPPC64State, _x) : \ + offsetof(VexGuestPPC32State, _x)) + + #define OFFB_CIA offsetofPPCGuestState(guest_CIA) + #define OFFB_IP_AT_SYSCALL offsetofPPCGuestState(guest_IP_AT_SYSCALL) + #define OFFB_SPRG3_RO offsetofPPCGuestState(guest_SPRG3_RO) + #define OFFB_LR offsetofPPCGuestState(guest_LR) + #define OFFB_CTR offsetofPPCGuestState(guest_CTR) + #define OFFB_XER_SO offsetofPPCGuestState(guest_XER_SO) + #define OFFB_XER_OV offsetofPPCGuestState(guest_XER_OV) + #define OFFB_XER_CA offsetofPPCGuestState(guest_XER_CA) + #define OFFB_XER_BC offsetofPPCGuestState(guest_XER_BC) + #define OFFB_FPROUND offsetofPPCGuestState(guest_FPROUND) + #define OFFB_VRSAVE offsetofPPCGuestState(guest_VRSAVE) + #define OFFB_VSCR offsetofPPCGuestState(guest_VSCR) + #define OFFB_EMWARN offsetofPPCGuestState(guest_EMWARN) + #define OFFB_TISTART offsetofPPCGuestState(guest_TISTART) + #define OFFB_TILEN offsetofPPCGuestState(guest_TILEN) + #define OFFB_NRADDR offsetofPPCGuestState(guest_NRADDR) + #define OFFB_NRADDR_GPR2 offsetofPPCGuestState(guest_NRADDR_GPR2) + + + /*------------------------------------------------------------*/ + /*--- Extract instruction fields --- */ + /*------------------------------------------------------------*/ + + /* Extract field from insn, given idx (zero = lsb) and field length */ + #define IFIELD( insn, idx, len ) ((insn >> idx) & ((1< end. + begin->end works from right to left, 0=lsb + */ + static UInt MASK32( UInt begin, UInt end ) + { + UInt m1, m2, mask; + vassert(begin < 32); + vassert(end < 32); + m1 = ((UInt)(-1)) << begin; + m2 = ((UInt)(-1)) << end << 1; + mask = m1 ^ m2; + if (begin > end) mask = ~mask; // wrap mask + return mask; + } + + /* ditto for 64bit mask */ + static ULong MASK64( UInt begin, UInt end ) + { + ULong m1, m2, mask; + vassert(begin < 64); + vassert(end < 64); + m1 = ((ULong)(-1)) << begin; + m2 = ((ULong)(-1)) << end << 1; + mask = m1 ^ m2; + if (begin > end) mask = ~mask; // wrap mask + return mask; + } + + static Addr64 nextInsnAddr( void ) + { + return guest_CIA_curr_instr + 4; + } + + + /*------------------------------------------------------------*/ + /*--- Helper bits and pieces for deconstructing the ---*/ + /*--- ppc32/64 insn stream. ---*/ + /*------------------------------------------------------------*/ + + /* Add a statement to the list held by "irsb". */ + static void stmt ( IRStmt* st ) + { + addStmtToIRSB( irsb, st ); + } + + /* Generate a new temporary of the given type. */ + static IRTemp newTemp ( IRType ty ) + { + vassert(isPlausibleIRType(ty)); + return newIRTemp( irsb->tyenv, ty ); + } + + /* Various simple conversions */ + + static UChar extend_s_5to8 ( UChar x ) + { + return toUChar((((Int)x) << 27) >> 27); + } + + static UInt extend_s_8to32( UChar x ) + { + return (UInt)((((Int)x) << 24) >> 24); + } + + static UInt extend_s_16to32 ( UInt x ) + { + return (UInt)((((Int)x) << 16) >> 16); + } + + static ULong extend_s_16to64 ( UInt x ) + { + return (ULong)((((Long)x) << 48) >> 48); + } + + static ULong extend_s_26to64 ( UInt x ) + { + return (ULong)((((Long)x) << 38) >> 38); + } + + static ULong extend_s_32to64 ( UInt x ) + { + return (ULong)((((Long)x) << 32) >> 32); + } + + /* Do a big-endian load of a 32-bit word, regardless of the endianness + of the underlying host. */ + static UInt getUIntBigendianly ( UChar* p ) + { + UInt w = 0; + w = (w << 8) | p[0]; + w = (w << 8) | p[1]; + w = (w << 8) | p[2]; + w = (w << 8) | p[3]; + return w; + } + + + /*------------------------------------------------------------*/ + /*--- Helpers for constructing IR. ---*/ + /*------------------------------------------------------------*/ + + static void assign ( IRTemp dst, IRExpr* e ) + { + stmt( IRStmt_WrTmp(dst, e) ); + } + + /* This generates a normal (non store-conditional) store. */ + static void storeBE ( IRExpr* addr, IRExpr* data ) + { + IRType tyA = typeOfIRExpr(irsb->tyenv, addr); + vassert(tyA == Ity_I32 || tyA == Ity_I64); + stmt( IRStmt_Store(Iend_BE, IRTemp_INVALID, addr, data) ); + } + + static IRExpr* unop ( IROp op, IRExpr* a ) + { + return IRExpr_Unop(op, a); + } + + static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) + { + return IRExpr_Binop(op, a1, a2); + } + + static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) + { + return IRExpr_Triop(op, a1, a2, a3); + } + + static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2, + IRExpr* a3, IRExpr* a4 ) + { + return IRExpr_Qop(op, a1, a2, a3, a4); + } + + static IRExpr* mkexpr ( IRTemp tmp ) + { + return IRExpr_RdTmp(tmp); + } + + static IRExpr* mkU8 ( UChar i ) + { + return IRExpr_Const(IRConst_U8(i)); + } + + static IRExpr* mkU16 ( UInt i ) + { + return IRExpr_Const(IRConst_U16(i)); + } + + static IRExpr* mkU32 ( UInt i ) + { + return IRExpr_Const(IRConst_U32(i)); + } + + static IRExpr* mkU64 ( ULong i ) + { + return IRExpr_Const(IRConst_U64(i)); + } + + /* This generates a normal (non load-linked) load. */ + static IRExpr* loadBE ( IRType ty, IRExpr* data ) + { + return IRExpr_Load(False, Iend_BE, ty, data); + } + + /* And this, a linked load. */ + static IRExpr* loadlinkedBE ( IRType ty, IRExpr* data ) + { + if (mode64) { + vassert(ty == Ity_I32 || ty == Ity_I64); + } else { + vassert(ty == Ity_I32); + } + return IRExpr_Load(True, Iend_BE, ty, data); + } + + static IRExpr* mkOR1 ( IRExpr* arg1, IRExpr* arg2 ) + { + vassert(typeOfIRExpr(irsb->tyenv, arg1) == Ity_I1); + vassert(typeOfIRExpr(irsb->tyenv, arg2) == Ity_I1); + return unop(Iop_32to1, binop(Iop_Or32, unop(Iop_1Uto32, arg1), + unop(Iop_1Uto32, arg2))); + } + + static IRExpr* mkAND1 ( IRExpr* arg1, IRExpr* arg2 ) + { + vassert(typeOfIRExpr(irsb->tyenv, arg1) == Ity_I1); + vassert(typeOfIRExpr(irsb->tyenv, arg2) == Ity_I1); + return unop(Iop_32to1, binop(Iop_And32, unop(Iop_1Uto32, arg1), + unop(Iop_1Uto32, arg2))); + } + + /* expand V128_8Ux16 to 2x V128_16Ux8's */ + static void expand8Ux16( IRExpr* vIn, + /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) + { + IRTemp ones8x16 = newTemp(Ity_V128); + + vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128); + vassert(vEvn && *vEvn == IRTemp_INVALID); + vassert(vOdd && *vOdd == IRTemp_INVALID); + *vEvn = newTemp(Ity_V128); + *vOdd = newTemp(Ity_V128); + + assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) ); + assign( *vOdd, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), vIn) ); + assign( *vEvn, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), + binop(Iop_ShrV128, vIn, mkU8(8))) ); + } + + /* expand V128_8Sx16 to 2x V128_16Sx8's */ + static void expand8Sx16( IRExpr* vIn, + /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) + { + IRTemp ones8x16 = newTemp(Ity_V128); + + vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128); + vassert(vEvn && *vEvn == IRTemp_INVALID); + vassert(vOdd && *vOdd == IRTemp_INVALID); + *vEvn = newTemp(Ity_V128); + *vOdd = newTemp(Ity_V128); + + assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) ); + assign( *vOdd, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), vIn) ); + assign( *vEvn, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), + binop(Iop_ShrV128, vIn, mkU8(8))) ); + } + + /* expand V128_16Uto8 to 2x V128_32Ux4's */ + static void expand16Ux8( IRExpr* vIn, + /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) + { + IRTemp ones16x8 = newTemp(Ity_V128); + + vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128); + vassert(vEvn && *vEvn == IRTemp_INVALID); + vassert(vOdd && *vOdd == IRTemp_INVALID); + *vEvn = newTemp(Ity_V128); + *vOdd = newTemp(Ity_V128); + + assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) ); + assign( *vOdd, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), vIn) ); + assign( *vEvn, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), + binop(Iop_ShrV128, vIn, mkU8(16))) ); + } + + /* expand V128_16Sto8 to 2x V128_32Sx4's */ + static void expand16Sx8( IRExpr* vIn, + /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd ) + { + IRTemp ones16x8 = newTemp(Ity_V128); + + vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128); + vassert(vEvn && *vEvn == IRTemp_INVALID); + vassert(vOdd && *vOdd == IRTemp_INVALID); + *vEvn = newTemp(Ity_V128); + *vOdd = newTemp(Ity_V128); + + assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) ); + assign( *vOdd, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), vIn) ); + assign( *vEvn, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), + binop(Iop_ShrV128, vIn, mkU8(16))) ); + } + + /* break V128 to 4xI32's, then sign-extend to I64's */ + static void breakV128to4x64S( IRExpr* t128, + /*OUTs*/ + IRTemp* t3, IRTemp* t2, + IRTemp* t1, IRTemp* t0 ) + { + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + + vassert(typeOfIRExpr(irsb->tyenv, t128) == Ity_V128); + vassert(t0 && *t0 == IRTemp_INVALID); + vassert(t1 && *t1 == IRTemp_INVALID); + vassert(t2 && *t2 == IRTemp_INVALID); + vassert(t3 && *t3 == IRTemp_INVALID); + *t0 = newTemp(Ity_I64); + *t1 = newTemp(Ity_I64); + *t2 = newTemp(Ity_I64); + *t3 = newTemp(Ity_I64); + + assign( hi64, unop(Iop_V128HIto64, t128) ); + assign( lo64, unop(Iop_V128to64, t128) ); + assign( *t3, unop(Iop_32Sto64, unop(Iop_64HIto32, mkexpr(hi64))) ); + assign( *t2, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(hi64))) ); + assign( *t1, unop(Iop_32Sto64, unop(Iop_64HIto32, mkexpr(lo64))) ); + assign( *t0, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(lo64))) ); + } + + /* break V128 to 4xI32's, then zero-extend to I64's */ + static void breakV128to4x64U ( IRExpr* t128, + /*OUTs*/ + IRTemp* t3, IRTemp* t2, + IRTemp* t1, IRTemp* t0 ) + { + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + + vassert(typeOfIRExpr(irsb->tyenv, t128) == Ity_V128); + vassert(t0 && *t0 == IRTemp_INVALID); + vassert(t1 && *t1 == IRTemp_INVALID); + vassert(t2 && *t2 == IRTemp_INVALID); + vassert(t3 && *t3 == IRTemp_INVALID); + *t0 = newTemp(Ity_I64); + *t1 = newTemp(Ity_I64); + *t2 = newTemp(Ity_I64); + *t3 = newTemp(Ity_I64); + + assign( hi64, unop(Iop_V128HIto64, t128) ); + assign( lo64, unop(Iop_V128to64, t128) ); + assign( *t3, unop(Iop_32Uto64, unop(Iop_64HIto32, mkexpr(hi64))) ); + assign( *t2, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(hi64))) ); + assign( *t1, unop(Iop_32Uto64, unop(Iop_64HIto32, mkexpr(lo64))) ); + assign( *t0, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(lo64))) ); + } + + /* Signed saturating narrow 64S to 32 */ + static IRExpr* mkQNarrow64Sto32 ( IRExpr* t64 ) + { + IRTemp hi32 = newTemp(Ity_I32); + IRTemp lo32 = newTemp(Ity_I32); + + vassert(typeOfIRExpr(irsb->tyenv, t64) == Ity_I64); + + assign( hi32, unop(Iop_64HIto32, t64)); + assign( lo32, unop(Iop_64to32, t64)); + + return IRExpr_Mux0X( + /* if (hi32 == (lo32 >>s 31)) */ + unop(Iop_1Uto8, + binop(Iop_CmpEQ32, mkexpr(hi32), + binop( Iop_Sar32, mkexpr(lo32), mkU8(31)))), + /* else: sign dep saturate: 1->0x80000000, 0->0x7FFFFFFF */ + binop(Iop_Add32, mkU32(0x7FFFFFFF), + binop(Iop_Shr32, mkexpr(hi32), mkU8(31))), + /* then: within signed-32 range: lo half good enough */ + mkexpr(lo32) ); + } + + /* Unsigned saturating narrow 64S to 32 */ + static IRExpr* mkQNarrow64Uto32 ( IRExpr* t64 ) + { + IRTemp hi32 = newTemp(Ity_I32); + IRTemp lo32 = newTemp(Ity_I32); + + vassert(typeOfIRExpr(irsb->tyenv, t64) == Ity_I64); + + assign( hi32, unop(Iop_64HIto32, t64)); + assign( lo32, unop(Iop_64to32, t64)); + + return IRExpr_Mux0X( + /* if (top 32 bits of t64 are 0) */ + unop(Iop_1Uto8, binop(Iop_CmpEQ32, mkexpr(hi32), mkU32(0))), + /* else: positive saturate -> 0xFFFFFFFF */ + mkU32(0xFFFFFFFF), + /* then: within unsigned-32 range: lo half good enough */ + mkexpr(lo32) ); + } + + /* Signed saturate narrow 64->32, combining to V128 */ + static IRExpr* mkV128from4x64S ( IRExpr* t3, IRExpr* t2, + IRExpr* t1, IRExpr* t0 ) + { + vassert(typeOfIRExpr(irsb->tyenv, t3) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv, t2) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv, t1) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv, t0) == Ity_I64); + return binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkQNarrow64Sto32( t3 ), + mkQNarrow64Sto32( t2 )), + binop(Iop_32HLto64, + mkQNarrow64Sto32( t1 ), + mkQNarrow64Sto32( t0 ))); + } + + /* Unsigned saturate narrow 64->32, combining to V128 */ + static IRExpr* mkV128from4x64U ( IRExpr* t3, IRExpr* t2, + IRExpr* t1, IRExpr* t0 ) + { + vassert(typeOfIRExpr(irsb->tyenv, t3) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv, t2) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv, t1) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv, t0) == Ity_I64); + return binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkQNarrow64Uto32( t3 ), + mkQNarrow64Uto32( t2 )), + binop(Iop_32HLto64, + mkQNarrow64Uto32( t1 ), + mkQNarrow64Uto32( t0 ))); + } + + /* Simulate irops Iop_MullOdd*, since we don't have them */ + #define MK_Iop_MullOdd8Ux16( expr_vA, expr_vB ) \ + binop(Iop_MullEven8Ux16, \ + binop(Iop_ShrV128, expr_vA, mkU8(8)), \ + binop(Iop_ShrV128, expr_vB, mkU8(8))) + + #define MK_Iop_MullOdd8Sx16( expr_vA, expr_vB ) \ + binop(Iop_MullEven8Sx16, \ + binop(Iop_ShrV128, expr_vA, mkU8(8)), \ + binop(Iop_ShrV128, expr_vB, mkU8(8))) + + #define MK_Iop_MullOdd16Ux8( expr_vA, expr_vB ) \ + binop(Iop_MullEven16Ux8, \ + binop(Iop_ShrV128, expr_vA, mkU8(16)), \ + binop(Iop_ShrV128, expr_vB, mkU8(16))) + + #define MK_Iop_MullOdd16Sx8( expr_vA, expr_vB ) \ + binop(Iop_MullEven16Sx8, \ + binop(Iop_ShrV128, expr_vA, mkU8(16)), \ + binop(Iop_ShrV128, expr_vB, mkU8(16))) + + static IRExpr* /* :: Ity_I64 */ mk64lo32Sto64 ( IRExpr* src ) + { + vassert(typeOfIRExpr(irsb->tyenv, src) == Ity_I64); + return unop(Iop_32Sto64, unop(Iop_64to32, src)); + } + + static IRExpr* /* :: Ity_I64 */ mk64lo32Uto64 ( IRExpr* src ) + { + vassert(typeOfIRExpr(irsb->tyenv, src) == Ity_I64); + return unop(Iop_32Uto64, unop(Iop_64to32, src)); + } + + static IROp mkSzOp ( IRType ty, IROp op8 ) + { + Int adj; + vassert(ty == Ity_I8 || ty == Ity_I16 || + ty == Ity_I32 || ty == Ity_I64); + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 || op8 == Iop_Mul8 || + op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 || + op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 || + op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 || + op8 == Iop_Not8 ); + adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : (ty==Ity_I32 ? 2 : 3)); + return adj + op8; + } + + /* Make sure we get valid 32 and 64bit addresses */ + static Addr64 mkSzAddr ( IRType ty, Addr64 addr ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ( ty == Ity_I64 ? + (Addr64)addr : + (Addr64)extend_s_32to64( toUInt(addr) ) ); + } + + /* sz, ULong -> IRExpr */ + static IRExpr* mkSzImm ( IRType ty, ULong imm64 ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ty == Ity_I64 ? mkU64(imm64) : mkU32((UInt)imm64); + } + + /* sz, ULong -> IRConst */ + static IRConst* mkSzConst ( IRType ty, ULong imm64 ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ( ty == Ity_I64 ? + IRConst_U64(imm64) : + IRConst_U32((UInt)imm64) ); + } + + /* Sign extend imm16 -> IRExpr* */ + static IRExpr* mkSzExtendS16 ( IRType ty, UInt imm16 ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ( ty == Ity_I64 ? + mkU64(extend_s_16to64(imm16)) : + mkU32(extend_s_16to32(imm16)) ); + } + + /* Sign extend imm32 -> IRExpr* */ + static IRExpr* mkSzExtendS32 ( IRType ty, UInt imm32 ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ( ty == Ity_I64 ? + mkU64(extend_s_32to64(imm32)) : + mkU32(imm32) ); + } + + /* IR narrows I32/I64 -> I8/I16/I32 */ + static IRExpr* mkNarrowTo8 ( IRType ty, IRExpr* src ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ty == Ity_I64 ? unop(Iop_64to8, src) : unop(Iop_32to8, src); + } + + static IRExpr* mkNarrowTo16 ( IRType ty, IRExpr* src ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ty == Ity_I64 ? unop(Iop_64to16, src) : unop(Iop_32to16, src); + } + + static IRExpr* mkNarrowTo32 ( IRType ty, IRExpr* src ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + return ty == Ity_I64 ? unop(Iop_64to32, src) : src; + } + + /* Signed/Unsigned IR widens I8/I16/I32 -> I32/I64 */ + static IRExpr* mkWidenFrom8 ( IRType ty, IRExpr* src, Bool sined ) + { + IROp op; + vassert(ty == Ity_I32 || ty == Ity_I64); + if (sined) op = (ty==Ity_I32) ? Iop_8Sto32 : Iop_8Sto64; + else op = (ty==Ity_I32) ? Iop_8Uto32 : Iop_8Uto64; + return unop(op, src); + } + + static IRExpr* mkWidenFrom16 ( IRType ty, IRExpr* src, Bool sined ) + { + IROp op; + vassert(ty == Ity_I32 || ty == Ity_I64); + if (sined) op = (ty==Ity_I32) ? Iop_16Sto32 : Iop_16Sto64; + else op = (ty==Ity_I32) ? Iop_16Uto32 : Iop_16Uto64; + return unop(op, src); + } + + static IRExpr* mkWidenFrom32 ( IRType ty, IRExpr* src, Bool sined ) + { + vassert(ty == Ity_I32 || ty == Ity_I64); + if (ty == Ity_I32) + return src; + return (sined) ? unop(Iop_32Sto64, src) : unop(Iop_32Uto64, src); + } + + + static Int integerGuestRegOffset ( UInt archreg ) + { + vassert(archreg < 32); + + // jrs: probably not necessary; only matters if we reference sub-parts + // of the ppc registers, but that isn't the case + // later: this might affect Altivec though? + vassert(host_is_bigendian); + + switch (archreg) { + case 0: return offsetofPPCGuestState(guest_GPR0); + case 1: return offsetofPPCGuestState(guest_GPR1); + case 2: return offsetofPPCGuestState(guest_GPR2); + case 3: return offsetofPPCGuestState(guest_GPR3); + case 4: return offsetofPPCGuestState(guest_GPR4); + case 5: return offsetofPPCGuestState(guest_GPR5); + case 6: return offsetofPPCGuestState(guest_GPR6); + case 7: return offsetofPPCGuestState(guest_GPR7); + case 8: return offsetofPPCGuestState(guest_GPR8); + case 9: return offsetofPPCGuestState(guest_GPR9); + case 10: return offsetofPPCGuestState(guest_GPR10); + case 11: return offsetofPPCGuestState(guest_GPR11); + case 12: return offsetofPPCGuestState(guest_GPR12); + case 13: return offsetofPPCGuestState(guest_GPR13); + case 14: return offsetofPPCGuestState(guest_GPR14); + case 15: return offsetofPPCGuestState(guest_GPR15); + case 16: return offsetofPPCGuestState(guest_GPR16); + case 17: return offsetofPPCGuestState(guest_GPR17); + case 18: return offsetofPPCGuestState(guest_GPR18); + case 19: return offsetofPPCGuestState(guest_GPR19); + case 20: return offsetofPPCGuestState(guest_GPR20); + case 21: return offsetofPPCGuestState(guest_GPR21); + case 22: return offsetofPPCGuestState(guest_GPR22); + case 23: return offsetofPPCGuestState(guest_GPR23); + case 24: return offsetofPPCGuestState(guest_GPR24); + case 25: return offsetofPPCGuestState(guest_GPR25); + case 26: return offsetofPPCGuestState(guest_GPR26); + case 27: return offsetofPPCGuestState(guest_GPR27); + case 28: return offsetofPPCGuestState(guest_GPR28); + case 29: return offsetofPPCGuestState(guest_GPR29); + case 30: return offsetofPPCGuestState(guest_GPR30); + case 31: return offsetofPPCGuestState(guest_GPR31); + default: break; + } + vpanic("integerGuestRegOffset(ppc,be)"); /*notreached*/ + } + + static IRExpr* getIReg ( UInt archreg ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert(archreg < 32); + return IRExpr_Get( integerGuestRegOffset(archreg), ty ); + } + + /* Ditto, but write to a reg instead. */ + static void putIReg ( UInt archreg, IRExpr* e ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert(archreg < 32); + vassert(typeOfIRExpr(irsb->tyenv, e) == ty ); + stmt( IRStmt_Put(integerGuestRegOffset(archreg), e) ); + } + + + static Int floatGuestRegOffset ( UInt archreg ) + { + vassert(archreg < 32); + + switch (archreg) { + case 0: return offsetofPPCGuestState(guest_FPR0); + case 1: return offsetofPPCGuestState(guest_FPR1); + case 2: return offsetofPPCGuestState(guest_FPR2); + case 3: return offsetofPPCGuestState(guest_FPR3); + case 4: return offsetofPPCGuestState(guest_FPR4); + case 5: return offsetofPPCGuestState(guest_FPR5); + case 6: return offsetofPPCGuestState(guest_FPR6); + case 7: return offsetofPPCGuestState(guest_FPR7); + case 8: return offsetofPPCGuestState(guest_FPR8); + case 9: return offsetofPPCGuestState(guest_FPR9); + case 10: return offsetofPPCGuestState(guest_FPR10); + case 11: return offsetofPPCGuestState(guest_FPR11); + case 12: return offsetofPPCGuestState(guest_FPR12); + case 13: return offsetofPPCGuestState(guest_FPR13); + case 14: return offsetofPPCGuestState(guest_FPR14); + case 15: return offsetofPPCGuestState(guest_FPR15); + case 16: return offsetofPPCGuestState(guest_FPR16); + case 17: return offsetofPPCGuestState(guest_FPR17); + case 18: return offsetofPPCGuestState(guest_FPR18); + case 19: return offsetofPPCGuestState(guest_FPR19); + case 20: return offsetofPPCGuestState(guest_FPR20); + case 21: return offsetofPPCGuestState(guest_FPR21); + case 22: return offsetofPPCGuestState(guest_FPR22); + case 23: return offsetofPPCGuestState(guest_FPR23); + case 24: return offsetofPPCGuestState(guest_FPR24); + case 25: return offsetofPPCGuestState(guest_FPR25); + case 26: return offsetofPPCGuestState(guest_FPR26); + case 27: return offsetofPPCGuestState(guest_FPR27); + case 28: return offsetofPPCGuestState(guest_FPR28); + case 29: return offsetofPPCGuestState(guest_FPR29); + case 30: return offsetofPPCGuestState(guest_FPR30); + case 31: return offsetofPPCGuestState(guest_FPR31); + default: break; + } + vpanic("floatGuestRegOffset(ppc)"); /*notreached*/ + } + + static IRExpr* getFReg ( UInt archreg ) + { + vassert(archreg < 32); + return IRExpr_Get( floatGuestRegOffset(archreg), Ity_F64 ); + } + + /* Ditto, but write to a reg instead. */ + static void putFReg ( UInt archreg, IRExpr* e ) + { + vassert(archreg < 32); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64); + stmt( IRStmt_Put(floatGuestRegOffset(archreg), e) ); + } + + + static Int vectorGuestRegOffset ( UInt archreg ) + { + vassert(archreg < 32); + + switch (archreg) { + case 0: return offsetofPPCGuestState(guest_VR0); + case 1: return offsetofPPCGuestState(guest_VR1); + case 2: return offsetofPPCGuestState(guest_VR2); + case 3: return offsetofPPCGuestState(guest_VR3); + case 4: return offsetofPPCGuestState(guest_VR4); + case 5: return offsetofPPCGuestState(guest_VR5); + case 6: return offsetofPPCGuestState(guest_VR6); + case 7: return offsetofPPCGuestState(guest_VR7); + case 8: return offsetofPPCGuestState(guest_VR8); + case 9: return offsetofPPCGuestState(guest_VR9); + case 10: return offsetofPPCGuestState(guest_VR10); + case 11: return offsetofPPCGuestState(guest_VR11); + case 12: return offsetofPPCGuestState(guest_VR12); + case 13: return offsetofPPCGuestState(guest_VR13); + case 14: return offsetofPPCGuestState(guest_VR14); + case 15: return offsetofPPCGuestState(guest_VR15); + case 16: return offsetofPPCGuestState(guest_VR16); + case 17: return offsetofPPCGuestState(guest_VR17); + case 18: return offsetofPPCGuestState(guest_VR18); + case 19: return offsetofPPCGuestState(guest_VR19); + case 20: return offsetofPPCGuestState(guest_VR20); + case 21: return offsetofPPCGuestState(guest_VR21); + case 22: return offsetofPPCGuestState(guest_VR22); + case 23: return offsetofPPCGuestState(guest_VR23); + case 24: return offsetofPPCGuestState(guest_VR24); + case 25: return offsetofPPCGuestState(guest_VR25); + case 26: return offsetofPPCGuestState(guest_VR26); + case 27: return offsetofPPCGuestState(guest_VR27); + case 28: return offsetofPPCGuestState(guest_VR28); + case 29: return offsetofPPCGuestState(guest_VR29); + case 30: return offsetofPPCGuestState(guest_VR30); + case 31: return offsetofPPCGuestState(guest_VR31); + default: break; + } + vpanic("vextorGuestRegOffset(ppc)"); /*notreached*/ + } + + static IRExpr* getVReg ( UInt archreg ) + { + vassert(archreg < 32); + return IRExpr_Get( vectorGuestRegOffset(archreg), Ity_V128 ); + } + + /* Ditto, but write to a reg instead. */ + static void putVReg ( UInt archreg, IRExpr* e ) + { + vassert(archreg < 32); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); + stmt( IRStmt_Put(vectorGuestRegOffset(archreg), e) ); + } + + static Int guestCR321offset ( UInt cr ) + { + switch (cr) { + case 0: return offsetofPPCGuestState(guest_CR0_321 ); + case 1: return offsetofPPCGuestState(guest_CR1_321 ); + case 2: return offsetofPPCGuestState(guest_CR2_321 ); + case 3: return offsetofPPCGuestState(guest_CR3_321 ); + case 4: return offsetofPPCGuestState(guest_CR4_321 ); + case 5: return offsetofPPCGuestState(guest_CR5_321 ); + case 6: return offsetofPPCGuestState(guest_CR6_321 ); + case 7: return offsetofPPCGuestState(guest_CR7_321 ); + default: vpanic("guestCR321offset(ppc)"); + } + } + + static Int guestCR0offset ( UInt cr ) + { + switch (cr) { + case 0: return offsetofPPCGuestState(guest_CR0_0 ); + case 1: return offsetofPPCGuestState(guest_CR1_0 ); + case 2: return offsetofPPCGuestState(guest_CR2_0 ); + case 3: return offsetofPPCGuestState(guest_CR3_0 ); + case 4: return offsetofPPCGuestState(guest_CR4_0 ); + case 5: return offsetofPPCGuestState(guest_CR5_0 ); + case 6: return offsetofPPCGuestState(guest_CR6_0 ); + case 7: return offsetofPPCGuestState(guest_CR7_0 ); + default: vpanic("guestCR3offset(ppc)"); + } + } + + // ROTL(src32/64, rot_amt5/6) + static IRExpr* /* :: Ity_I32/64 */ ROTL ( IRExpr* src, + IRExpr* rot_amt ) + { + IRExpr *mask, *rot; + vassert(typeOfIRExpr(irsb->tyenv,rot_amt) == Ity_I8); + + if (typeOfIRExpr(irsb->tyenv,src) == Ity_I64) { + // rot = (src << rot_amt) | (src >> (64-rot_amt)) + mask = binop(Iop_And8, rot_amt, mkU8(63)); + rot = binop(Iop_Or64, + binop(Iop_Shl64, src, mask), + binop(Iop_Shr64, src, binop(Iop_Sub8, mkU8(64), mask))); + } else { + // rot = (src << rot_amt) | (src >> (32-rot_amt)) + mask = binop(Iop_And8, rot_amt, mkU8(31)); + rot = binop(Iop_Or32, + binop(Iop_Shl32, src, mask), + binop(Iop_Shr32, src, binop(Iop_Sub8, mkU8(32), mask))); + } + /* Note: the MuxOX is not merely an optimisation; it's needed + because otherwise the Shr is a shift by the word size when + mask denotes zero. For rotates by immediates, a lot of + this junk gets folded out. */ + return IRExpr_Mux0X( mask, /* zero rotate */ src, + /* non-zero rotate */ rot ); + } + + /* Standard effective address calc: (rA + rB) */ + static IRExpr* ea_rA_idxd ( UInt rA, UInt rB ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert(rA < 32); + vassert(rB < 32); + return binop(mkSzOp(ty, Iop_Add8), getIReg(rA), getIReg(rB)); + } + + /* Standard effective address calc: (rA + simm) */ + static IRExpr* ea_rA_simm ( UInt rA, UInt simm16 ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert(rA < 32); + return binop(mkSzOp(ty, Iop_Add8), getIReg(rA), + mkSzExtendS16(ty, simm16)); + } + + /* Standard effective address calc: (rA|0) */ + static IRExpr* ea_rAor0 ( UInt rA ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert(rA < 32); + if (rA == 0) { + return mkSzImm(ty, 0); + } else { + return getIReg(rA); + } + } + + /* Standard effective address calc: (rA|0) + rB */ + static IRExpr* ea_rAor0_idxd ( UInt rA, UInt rB ) + { + vassert(rA < 32); + vassert(rB < 32); + return (rA == 0) ? getIReg(rB) : ea_rA_idxd( rA, rB ); + } + + /* Standard effective address calc: (rA|0) + simm16 */ + static IRExpr* ea_rAor0_simm ( UInt rA, UInt simm16 ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert(rA < 32); + if (rA == 0) { + return mkSzExtendS16(ty, simm16); + } else { + return ea_rA_simm( rA, simm16 ); + } + } + + + /* Align effective address */ + static IRExpr* addr_align( IRExpr* addr, UChar align ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + Long mask; + switch (align) { + case 1: return addr; // byte aligned + case 2: mask = ((Long)-1) << 1; break; // half-word aligned + case 4: mask = ((Long)-1) << 2; break; // word aligned + case 16: mask = ((Long)-1) << 4; break; // quad-word aligned + default: + vex_printf("addr_align: align = %u\n", align); + vpanic("addr_align(ppc)"); + } + + vassert(typeOfIRExpr(irsb->tyenv,addr) == ty); + return binop( mkSzOp(ty, Iop_And8), addr, mkSzImm(ty, mask) ); + } + + + /* Exit the trace if ADDR (intended to be a guest memory address) is + not ALIGN-aligned, generating a request for a SIGBUS followed by a + restart of the current insn. */ + static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align ) + { + vassert(align == 4 || align == 8); + if (mode64) { + vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64); + stmt( + IRStmt_Exit( + binop(Iop_CmpNE64, + binop(Iop_And64, mkexpr(addr), mkU64(align-1)), + mkU64(0)), + Ijk_SigBUS, + IRConst_U64( guest_CIA_curr_instr ) + ) + ); + } else { + vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I32); + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, + binop(Iop_And32, mkexpr(addr), mkU32(align-1)), + mkU32(0)), + Ijk_SigBUS, + IRConst_U32( guest_CIA_curr_instr ) + ) + ); + } + } + + + /* Generate AbiHints which mark points at which the ELF or PowerOpen + ABIs say that the stack red zone (viz, -N(r1) .. -1(r1), for some + N) becomes undefined. That is at function calls and returns. ELF + ppc32 doesn't have this "feature" (how fortunate for it). nia is + the address of the next instruction to be executed. + */ + static void make_redzone_AbiHint ( VexAbiInfo* vbi, + IRTemp nia, HChar* who ) + { + Int szB = vbi->guest_stack_redzone_size; + if (0) vex_printf("AbiHint: %s\n", who); + vassert(szB >= 0); + if (szB > 0) { + if (mode64) { + vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); + stmt( IRStmt_AbiHint( + binop(Iop_Sub64, getIReg(1), mkU64(szB)), + szB, + mkexpr(nia) + )); + } else { + vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I32); + stmt( IRStmt_AbiHint( + binop(Iop_Sub32, getIReg(1), mkU32(szB)), + szB, + mkexpr(nia) + )); + } + } + } + + + /*------------------------------------------------------------*/ + /*--- Helpers for condition codes. ---*/ + /*------------------------------------------------------------*/ + + /* Condition register layout. + + In the hardware, CR is laid out like this. The leftmost end is the + most significant bit in the register; however the IBM documentation + numbers the bits backwards for some reason. + + CR0 CR1 .......... CR6 CR7 + 0 .. 3 ....................... 28 .. 31 (IBM bit numbering) + 31 28 3 0 (normal bit numbering) + + Each CR field is 4 bits: [<,>,==,SO] + + Hence in IBM's notation, BI=0 is CR7[SO], BI=1 is CR7[==], etc. + + Indexing from BI to guest state: + + let n = BI / 4 + off = BI % 4 + this references CR n: + + off==0 -> guest_CRn_321 >> 3 + off==1 -> guest_CRn_321 >> 2 + off==2 -> guest_CRn_321 >> 1 + off==3 -> guest_CRn_SO + + Bear in mind the only significant bit in guest_CRn_SO is bit 0 + (normal notation) and in guest_CRn_321 the significant bits are + 3, 2 and 1 (normal notation). + */ + + static void putCR321 ( UInt cr, IRExpr* e ) + { + vassert(cr < 8); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); + stmt( IRStmt_Put(guestCR321offset(cr), e) ); + } + + static void putCR0 ( UInt cr, IRExpr* e ) + { + vassert(cr < 8); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); + stmt( IRStmt_Put(guestCR0offset(cr), e) ); + } + + static IRExpr* /* :: Ity_I8 */ getCR0 ( UInt cr ) + { + vassert(cr < 8); + return IRExpr_Get(guestCR0offset(cr), Ity_I8); + } + + static IRExpr* /* :: Ity_I8 */ getCR321 ( UInt cr ) + { + vassert(cr < 8); + return IRExpr_Get(guestCR321offset(cr), Ity_I8); + } + + /* Fetch the specified CR bit (as per IBM/hardware notation) and + return it at the bottom of an I32; the top 31 bits are guaranteed + to be zero. */ + static IRExpr* /* :: Ity_I32 */ getCRbit ( UInt bi ) + { + UInt n = bi / 4; + UInt off = bi % 4; + vassert(bi < 32); + if (off == 3) { + /* Fetch the SO bit for this CR field */ + /* Note: And32 is redundant paranoia iff guest state only has 0 + or 1 in that slot. */ + return binop(Iop_And32, unop(Iop_8Uto32, getCR0(n)), mkU32(1)); + } else { + /* Fetch the <, > or == bit for this CR field */ + return binop( Iop_And32, + binop( Iop_Shr32, + unop(Iop_8Uto32, getCR321(n)), + mkU8(toUChar(3-off)) ), + mkU32(1) ); + } + } + + /* Dually, write the least significant bit of BIT to the specified CR + bit. Indexing as per getCRbit. */ + static void putCRbit ( UInt bi, IRExpr* bit ) + { + UInt n, off; + IRExpr* safe; + vassert(typeOfIRExpr(irsb->tyenv,bit) == Ity_I32); + safe = binop(Iop_And32, bit, mkU32(1)); + n = bi / 4; + off = bi % 4; + vassert(bi < 32); + if (off == 3) { + /* This is the SO bit for this CR field */ + putCR0(n, unop(Iop_32to8, safe)); + } else { + off = 3 - off; + vassert(off == 1 || off == 2 || off == 3); + putCR321( + n, + unop( Iop_32to8, + binop( Iop_Or32, + /* old value with field masked out */ + binop(Iop_And32, unop(Iop_8Uto32, getCR321(n)), + mkU32(~(1 << off))), + /* new value in the right place */ + binop(Iop_Shl32, safe, mkU8(toUChar(off))) + ) + ) + ); + } + } + + /* Fetch the specified CR bit (as per IBM/hardware notation) and + return it somewhere in an I32; it does not matter where, but + whichever bit it is, all other bits are guaranteed to be zero. In + other words, the I32-typed expression will be zero if the bit is + zero and nonzero if the bit is 1. Write into *where the index + of where the bit will be. */ + + static + IRExpr* /* :: Ity_I32 */ getCRbit_anywhere ( UInt bi, Int* where ) + { + UInt n = bi / 4; + UInt off = bi % 4; + vassert(bi < 32); + if (off == 3) { + /* Fetch the SO bit for this CR field */ + /* Note: And32 is redundant paranoia iff guest state only has 0 + or 1 in that slot. */ + *where = 0; + return binop(Iop_And32, unop(Iop_8Uto32, getCR0(n)), mkU32(1)); + } else { + /* Fetch the <, > or == bit for this CR field */ + *where = 3-off; + return binop( Iop_And32, + unop(Iop_8Uto32, getCR321(n)), + mkU32(1 << (3-off)) ); + } + } + + /* Set the CR0 flags following an arithmetic operation. + (Condition Register CR0 Field Definition, PPC32 p60) + */ + static IRExpr* getXER_SO ( void ); + static void set_CR0 ( IRExpr* result ) + { + vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_I32 || + typeOfIRExpr(irsb->tyenv,result) == Ity_I64); + if (mode64) { + putCR321( 0, unop(Iop_64to8, + binop(Iop_CmpORD64S, result, mkU64(0))) ); + } else { + putCR321( 0, unop(Iop_32to8, + binop(Iop_CmpORD32S, result, mkU32(0))) ); + } + putCR0( 0, getXER_SO() ); + } + + + /* Set the CR6 flags following an AltiVec compare operation. */ + static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones ) + { + /* CR6[0:3] = {all_ones, 0, all_zeros, 0} + all_ones = (v[0] && v[1] && v[2] && v[3]) + all_zeros = ~(v[0] || v[1] || v[2] || v[3]) + */ + IRTemp v0 = newTemp(Ity_V128); + IRTemp v1 = newTemp(Ity_V128); + IRTemp v2 = newTemp(Ity_V128); + IRTemp v3 = newTemp(Ity_V128); + IRTemp rOnes = newTemp(Ity_I8); + IRTemp rZeros = newTemp(Ity_I8); + + vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_V128); + + assign( v0, result ); + assign( v1, binop(Iop_ShrV128, result, mkU8(32)) ); + assign( v2, binop(Iop_ShrV128, result, mkU8(64)) ); + assign( v3, binop(Iop_ShrV128, result, mkU8(96)) ); + + assign( rZeros, unop(Iop_1Uto8, + binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF), + unop(Iop_Not32, + unop(Iop_V128to32, + binop(Iop_OrV128, + binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)), + binop(Iop_OrV128, mkexpr(v2), mkexpr(v3)))) + ))) ); + + if (test_all_ones) { + assign( rOnes, unop(Iop_1Uto8, + binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF), + unop(Iop_V128to32, + binop(Iop_AndV128, + binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)), + binop(Iop_AndV128, mkexpr(v2), mkexpr(v3))) + ))) ); + putCR321( 6, binop(Iop_Or8, + binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)), + binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) ); + } else { + putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) ); + } + putCR0( 6, mkU8(0) ); + } + + + + /*------------------------------------------------------------*/ + /*--- Helpers for XER flags. ---*/ + /*------------------------------------------------------------*/ + + static void putXER_SO ( IRExpr* e ) + { + IRExpr* so; + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); + so = binop(Iop_And8, e, mkU8(1)); + stmt( IRStmt_Put( OFFB_XER_SO, so ) ); + } + + static void putXER_OV ( IRExpr* e ) + { + IRExpr* ov; + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); + ov = binop(Iop_And8, e, mkU8(1)); + stmt( IRStmt_Put( OFFB_XER_OV, ov ) ); + } + + static void putXER_CA ( IRExpr* e ) + { + IRExpr* ca; + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); + ca = binop(Iop_And8, e, mkU8(1)); + stmt( IRStmt_Put( OFFB_XER_CA, ca ) ); + } + + static void putXER_BC ( IRExpr* e ) + { + IRExpr* bc; + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); + bc = binop(Iop_And8, e, mkU8(0x7F)); + stmt( IRStmt_Put( OFFB_XER_BC, bc ) ); + } + + static IRExpr* /* :: Ity_I8 */ getXER_SO ( void ) + { + return IRExpr_Get( OFFB_XER_SO, Ity_I8 ); + } + + static IRExpr* /* :: Ity_I32 */ getXER_SO32 ( void ) + { + return binop( Iop_And32, unop(Iop_8Uto32, getXER_SO()), mkU32(1) ); + } + + static IRExpr* /* :: Ity_I8 */ getXER_OV ( void ) + { + return IRExpr_Get( OFFB_XER_OV, Ity_I8 ); + } + + static IRExpr* /* :: Ity_I32 */ getXER_OV32 ( void ) + { + return binop( Iop_And32, unop(Iop_8Uto32, getXER_OV()), mkU32(1) ); + } + + static IRExpr* /* :: Ity_I32 */ getXER_CA32 ( void ) + { + IRExpr* ca = IRExpr_Get( OFFB_XER_CA, Ity_I8 ); + return binop( Iop_And32, unop(Iop_8Uto32, ca ), mkU32(1) ); + } + + static IRExpr* /* :: Ity_I8 */ getXER_BC ( void ) + { + return IRExpr_Get( OFFB_XER_BC, Ity_I8 ); + } + + static IRExpr* /* :: Ity_I32 */ getXER_BC32 ( void ) + { + IRExpr* bc = IRExpr_Get( OFFB_XER_BC, Ity_I8 ); + return binop( Iop_And32, unop(Iop_8Uto32, bc), mkU32(0x7F) ); + } + + + /* RES is the result of doing OP on ARGL and ARGR. Set %XER.OV and + %XER.SO accordingly. */ + + static void set_XER_OV_32( UInt op, IRExpr* res, + IRExpr* argL, IRExpr* argR ) + { + IRTemp t64; + IRExpr* xer_ov; + vassert(op < PPCG_FLAG_OP_NUMBER); + vassert(typeOfIRExpr(irsb->tyenv,res) == Ity_I32); + vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I32); + vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I32); + + # define INT32_MIN 0x80000000 + + # define XOR2(_aa,_bb) \ + binop(Iop_Xor32,(_aa),(_bb)) + + # define XOR3(_cc,_dd,_ee) \ + binop(Iop_Xor32,binop(Iop_Xor32,(_cc),(_dd)),(_ee)) + + # define AND3(_ff,_gg,_hh) \ + binop(Iop_And32,binop(Iop_And32,(_ff),(_gg)),(_hh)) + + #define NOT(_jj) \ + unop(Iop_Not32, (_jj)) + + switch (op) { + case /* 0 */ PPCG_FLAG_OP_ADD: + case /* 1 */ PPCG_FLAG_OP_ADDE: + /* (argL^argR^-1) & (argL^res) & (1<<31) ?1:0 */ + // i.e. ((both_same_sign) & (sign_changed) & (sign_mask)) + xer_ov + = AND3( XOR3(argL,argR,mkU32(-1)), + XOR2(argL,res), + mkU32(INT32_MIN) ); + /* xer_ov can only be 0 or 1<<31 */ + xer_ov + = binop(Iop_Shr32, xer_ov, mkU8(31) ); + break; + + case /* 2 */ PPCG_FLAG_OP_DIVW: + /* (argL == INT32_MIN && argR == -1) || argR == 0 */ + xer_ov + = mkOR1( + mkAND1( + binop(Iop_CmpEQ32, argL, mkU32(INT32_MIN)), + binop(Iop_CmpEQ32, argR, mkU32(-1)) + ), + binop(Iop_CmpEQ32, argR, mkU32(0) ) + ); + xer_ov + = unop(Iop_1Uto32, xer_ov); + break; + + case /* 3 */ PPCG_FLAG_OP_DIVWU: + /* argR == 0 */ + xer_ov + = unop(Iop_1Uto32, binop(Iop_CmpEQ32, argR, mkU32(0))); + break; + + case /* 4 */ PPCG_FLAG_OP_MULLW: + /* OV true if result can't be represented in 32 bits + i.e sHi != sign extension of sLo */ + t64 = newTemp(Ity_I64); + assign( t64, binop(Iop_MullS32, argL, argR) ); + xer_ov + = binop( Iop_CmpNE32, + unop(Iop_64HIto32, mkexpr(t64)), + binop( Iop_Sar32, + unop(Iop_64to32, mkexpr(t64)), + mkU8(31)) + ); + xer_ov + = unop(Iop_1Uto32, xer_ov); + break; + + case /* 5 */ PPCG_FLAG_OP_NEG: + /* argL == INT32_MIN */ + xer_ov + = unop( Iop_1Uto32, + binop(Iop_CmpEQ32, argL, mkU32(INT32_MIN)) ); + break; + + case /* 6 */ PPCG_FLAG_OP_SUBF: + case /* 7 */ PPCG_FLAG_OP_SUBFC: + case /* 8 */ PPCG_FLAG_OP_SUBFE: + /* ((~argL)^argR^-1) & ((~argL)^res) & (1<<31) ?1:0; */ + xer_ov + = AND3( XOR3(NOT(argL),argR,mkU32(-1)), + XOR2(NOT(argL),res), + mkU32(INT32_MIN) ); + /* xer_ov can only be 0 or 1<<31 */ + xer_ov + = binop(Iop_Shr32, xer_ov, mkU8(31) ); + break; + + default: + vex_printf("set_XER_OV: op = %u\n", op); + vpanic("set_XER_OV(ppc)"); + } + + /* xer_ov MUST denote either 0 or 1, no other value allowed */ + putXER_OV( unop(Iop_32to8, xer_ov) ); + + /* Update the summary overflow */ + putXER_SO( binop(Iop_Or8, getXER_SO(), getXER_OV()) ); + + # undef INT32_MIN + # undef AND3 + # undef XOR3 + # undef XOR2 + # undef NOT + } + + static void set_XER_OV_64( UInt op, IRExpr* res, + IRExpr* argL, IRExpr* argR ) + { + IRExpr* xer_ov; + vassert(op < PPCG_FLAG_OP_NUMBER); + vassert(typeOfIRExpr(irsb->tyenv,res) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I64); + + # define INT64_MIN 0x8000000000000000ULL + + # define XOR2(_aa,_bb) \ + binop(Iop_Xor64,(_aa),(_bb)) + + # define XOR3(_cc,_dd,_ee) \ + binop(Iop_Xor64,binop(Iop_Xor64,(_cc),(_dd)),(_ee)) + + # define AND3(_ff,_gg,_hh) \ + binop(Iop_And64,binop(Iop_And64,(_ff),(_gg)),(_hh)) + + #define NOT(_jj) \ + unop(Iop_Not64, (_jj)) + + switch (op) { + case /* 0 */ PPCG_FLAG_OP_ADD: + case /* 1 */ PPCG_FLAG_OP_ADDE: + /* (argL^argR^-1) & (argL^res) & (1<<63) ? 1:0 */ + // i.e. ((both_same_sign) & (sign_changed) & (sign_mask)) + xer_ov + = AND3( XOR3(argL,argR,mkU64(-1)), + XOR2(argL,res), + mkU64(INT64_MIN) ); + /* xer_ov can only be 0 or 1<<63 */ + xer_ov + = unop(Iop_64to1, binop(Iop_Shr64, xer_ov, mkU8(63))); + break; + + case /* 2 */ PPCG_FLAG_OP_DIVW: + /* (argL == INT64_MIN && argR == -1) || argR == 0 */ + xer_ov + = mkOR1( + mkAND1( + binop(Iop_CmpEQ64, argL, mkU64(INT64_MIN)), + binop(Iop_CmpEQ64, argR, mkU64(-1)) + ), + binop(Iop_CmpEQ64, argR, mkU64(0) ) + ); + break; + + case /* 3 */ PPCG_FLAG_OP_DIVWU: + /* argR == 0 */ + xer_ov + = binop(Iop_CmpEQ64, argR, mkU64(0)); + break; + + case /* 4 */ PPCG_FLAG_OP_MULLW: { + /* OV true if result can't be represented in 64 bits + i.e sHi != sign extension of sLo */ + xer_ov + = binop( Iop_CmpNE32, + unop(Iop_64HIto32, res), + binop( Iop_Sar32, + unop(Iop_64to32, res), + mkU8(31)) + ); + break; + } + + case /* 5 */ PPCG_FLAG_OP_NEG: + /* argL == INT64_MIN */ + xer_ov + = binop(Iop_CmpEQ64, argL, mkU64(INT64_MIN)); + break; + + case /* 6 */ PPCG_FLAG_OP_SUBF: + case /* 7 */ PPCG_FLAG_OP_SUBFC: + case /* 8 */ PPCG_FLAG_OP_SUBFE: + /* ((~argL)^argR^-1) & ((~argL)^res) & (1<<63) ?1:0; */ + xer_ov + = AND3( XOR3(NOT(argL),argR,mkU64(-1)), + XOR2(NOT(argL),res), + mkU64(INT64_MIN) ); + /* xer_ov can only be 0 or 1<<63 */ + xer_ov + = unop(Iop_64to1, binop(Iop_Shr64, xer_ov, mkU8(63))); + break; + + default: + vex_printf("set_XER_OV: op = %u\n", op); + vpanic("set_XER_OV(ppc64)"); + } + + /* xer_ov MUST denote either 0 or 1, no other value allowed */ + putXER_OV( unop(Iop_1Uto8, xer_ov) ); + + /* Update the summary overflow */ + putXER_SO( binop(Iop_Or8, getXER_SO(), getXER_OV()) ); + + # undef INT64_MIN + # undef AND3 + # undef XOR3 + # undef XOR2 + # undef NOT + } + + static void set_XER_OV ( IRType ty, UInt op, IRExpr* res, + IRExpr* argL, IRExpr* argR ) + { + if (ty == Ity_I32) + set_XER_OV_32( op, res, argL, argR ); + else + set_XER_OV_64( op, res, argL, argR ); + } + + + + /* RES is the result of doing OP on ARGL and ARGR with the old %XER.CA + value being OLDCA. Set %XER.CA accordingly. */ + + static void set_XER_CA_32 ( UInt op, IRExpr* res, + IRExpr* argL, IRExpr* argR, IRExpr* oldca ) + { + IRExpr* xer_ca; + vassert(op < PPCG_FLAG_OP_NUMBER); + vassert(typeOfIRExpr(irsb->tyenv,res) == Ity_I32); + vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I32); + vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I32); + vassert(typeOfIRExpr(irsb->tyenv,oldca) == Ity_I32); + + /* Incoming oldca is assumed to hold the values 0 or 1 only. This + seems reasonable given that it's always generated by + getXER_CA32(), which masks it accordingly. In any case it being + 0 or 1 is an invariant of the ppc guest state representation; + if it has any other value, that invariant has been violated. */ + + switch (op) { + case /* 0 */ PPCG_FLAG_OP_ADD: + /* res 31 ? */ + unop(Iop_1Uto8, binop(Iop_CmpLT32U, mkU32(31), argR)), + /* no -- be like srawi */ + unop(Iop_1Uto32, binop(Iop_CmpNE32, xer_ca, mkU32(0))), + /* yes -- get sign bit of argL */ + binop(Iop_Shr32, argL, mkU8(31)) + ); + break; + + case /* 11 */ PPCG_FLAG_OP_SRAWI: + /* xer_ca is 1 iff src was negative and bits_shifted_out != + 0. Since the shift amount is known to be in the range + 0 .. 31 inclusive the following seems viable: + xer.ca == 1 iff the following is nonzero: + (argL >>s 31) -- either all 0s or all 1s + & (argL & (1<tyenv,res) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I64); + vassert(typeOfIRExpr(irsb->tyenv,oldca) == Ity_I64); + + /* Incoming oldca is assumed to hold the values 0 or 1 only. This + seems reasonable given that it's always generated by + getXER_CA32(), which masks it accordingly. In any case it being + 0 or 1 is an invariant of the ppc guest state representation; + if it has any other value, that invariant has been violated. */ + + switch (op) { + case /* 0 */ PPCG_FLAG_OP_ADD: + /* res 31 ? */ + unop(Iop_1Uto8, binop(Iop_CmpLT64U, mkU64(31), argR)), + /* no -- be like srawi */ + unop(Iop_1Uto32, binop(Iop_CmpNE64, xer_ca, mkU64(0))), + /* yes -- get sign bit of argL */ + unop(Iop_64to32, binop(Iop_Shr64, argL, mkU8(63))) + ); + break; + + case /* 11 */ PPCG_FLAG_OP_SRAWI: + /* xer_ca is 1 iff src was negative and bits_shifted_out != 0. + Since the shift amount is known to be in the range 0 .. 31 + inclusive the following seems viable: + xer.ca == 1 iff the following is nonzero: + (argL >>s 31) -- either all 0s or all 1s + & (argL & (1< 63 ? */ + unop(Iop_1Uto8, binop(Iop_CmpLT64U, mkU64(63), argR)), + /* no -- be like sradi */ + unop(Iop_1Uto32, binop(Iop_CmpNE64, xer_ca, mkU64(0))), + /* yes -- get sign bit of argL */ + unop(Iop_64to32, binop(Iop_Shr64, argL, mkU8(63))) + ); + break; + + + case /* 13 */ PPCG_FLAG_OP_SRADI: + /* xer_ca is 1 iff src was negative and bits_shifted_out != 0. + Since the shift amount is known to be in the range 0 .. 63 + inclusive, the following seems viable: + xer.ca == 1 iff the following is nonzero: + (argL >>s 63) -- either all 0s or all 1s + & (argL & (1<tyenv,src ); + vassert( reg < PPC_GST_MAX ); + switch (reg) { + case PPC_GST_IP_AT_SYSCALL: + vassert( ty_src == ty ); + stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, src ) ); + break; + case PPC_GST_CIA: + vassert( ty_src == ty ); + stmt( IRStmt_Put( OFFB_CIA, src ) ); + break; + case PPC_GST_LR: + vassert( ty_src == ty ); + stmt( IRStmt_Put( OFFB_LR, src ) ); + break; + case PPC_GST_CTR: + vassert( ty_src == ty ); + stmt( IRStmt_Put( OFFB_CTR, src ) ); + break; + case PPC_GST_VRSAVE: + vassert( ty_src == Ity_I32 ); + stmt( IRStmt_Put( OFFB_VRSAVE,src)); + break; + case PPC_GST_VSCR: + vassert( ty_src == Ity_I32 ); + stmt( IRStmt_Put( OFFB_VSCR, + binop(Iop_And32, src, + mkU32(MASK_VSCR_VALID)) ) ); + break; + case PPC_GST_XER: + vassert( ty_src == Ity_I32 ); + putXER_SO( unop(Iop_32to8, binop(Iop_Shr32, src, mkU8(31))) ); + putXER_OV( unop(Iop_32to8, binop(Iop_Shr32, src, mkU8(30))) ); + putXER_CA( unop(Iop_32to8, binop(Iop_Shr32, src, mkU8(29))) ); + putXER_BC( unop(Iop_32to8, src) ); + break; + + case PPC_GST_EMWARN: + vassert( ty_src == Ity_I32 ); + stmt( IRStmt_Put( OFFB_EMWARN,src) ); + break; + + case PPC_GST_TISTART: + vassert( ty_src == ty ); + stmt( IRStmt_Put( OFFB_TISTART, src) ); + break; + + case PPC_GST_TILEN: + vassert( ty_src == ty ); + stmt( IRStmt_Put( OFFB_TILEN, src) ); + break; + + default: + vex_printf("putGST(ppc): reg = %u", reg); + vpanic("putGST(ppc)"); + } + } + + /* Write masked src to the given reg */ + static void putGST_masked ( PPC_GST reg, IRExpr* src, UInt mask ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert( reg < PPC_GST_MAX ); + vassert( typeOfIRExpr(irsb->tyenv,src ) == Ity_I32 ); + + switch (reg) { + case PPC_GST_FPSCR: { + /* Allow writes to Rounding Mode */ + if (mask & 0x3) { + /* construct new fpround from new and old values as per mask: + new fpround = (src & (3 & mask)) | (fpround & (3 & ~mask)) */ + stmt( + IRStmt_Put( + OFFB_FPROUND, + binop( + Iop_Or32, + binop(Iop_And32, src, mkU32(3 & mask)), + binop( + Iop_And32, + IRExpr_Get(OFFB_FPROUND,Ity_I32), + mkU32(3 & ~mask) + ) + ) + ) + ); + } + + /* Give EmWarn for attempted writes to: + - Exception Controls + - Non-IEEE Mode + */ + if (mask & 0xFC) { // Exception Control, Non-IEE mode + VexEmWarn ew = EmWarn_PPCexns; + + /* If any of the src::exception_control bits are actually set, + side-exit to the next insn, reporting the warning, + so that Valgrind's dispatcher sees the warning. */ + putGST( PPC_GST_EMWARN, mkU32(ew) ); + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, mkU32(ew), mkU32(EmWarn_NONE)), + Ijk_EmWarn, + mkSzConst( ty, nextInsnAddr()) )); + } + + /* Ignore all other writes */ + break; + } + + default: + vex_printf("putGST_masked(ppc): reg = %u", reg); + vpanic("putGST_masked(ppc)"); + } + } + + /* Write the least significant nibble of src to the specified + REG[FLD] (as per IBM/hardware notation). */ + static void putGST_field ( PPC_GST reg, IRExpr* src, UInt fld ) + { + UInt shft, mask; + + vassert( typeOfIRExpr(irsb->tyenv,src ) == Ity_I32 ); + vassert( fld < 8 ); + vassert( reg < PPC_GST_MAX ); + + shft = 4*(7-fld); + mask = 0xF< Just another form of adde + assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) ); + min_one = mkSzImm(ty, (Long)-1); + assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA), + binop( mkSzOp(ty, Iop_Add8), + min_one, mkexpr(old_xer_ca)) )); + set_XER_CA( ty, PPCG_FLAG_OP_ADDE, + mkexpr(rD), mkexpr(rA), min_one, + mkexpr(old_xer_ca) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_ADDE, + mkexpr(rD), mkexpr(rA), min_one ); + } + break; + } + + case 0x0CA: { // addze (Add to Zero Extended, PPC32 p355) + IRTemp old_xer_ca = newTemp(ty); + if (rB_addr != 0) { + vex_printf("dis_int_arith(ppc)(addze,rB_addr)\n"); + return False; + } + DIP("addze%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + // rD = rA + (0) + XER[CA] + // => Just another form of adde + assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) ); + assign( rD, binop( mkSzOp(ty, Iop_Add8), + mkexpr(rA), mkexpr(old_xer_ca)) ); + set_XER_CA( ty, PPCG_FLAG_OP_ADDE, + mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0), + mkexpr(old_xer_ca) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_ADDE, + mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0) ); + } + break; + } + + case 0x1EB: // divw (Divide Word, PPC32 p388) + DIP("divw%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + if (mode64) { + /* Note: + XER settings are mode independent, and reflect the + overflow of the low-order 32bit result + CR0[LT|GT|EQ] are undefined if flag_rC && mode64 + */ + /* rD[hi32] are undefined: setting them to sign of lo32 + - makes set_CR0 happy */ + IRExpr* dividend = mk64lo32Sto64( mkexpr(rA) ); + IRExpr* divisor = mk64lo32Sto64( mkexpr(rB) ); + assign( rD, mk64lo32Uto64( binop(Iop_DivS64, dividend, + divisor) ) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_DIVW, + mkexpr(rD), dividend, divisor ); + } + } else { + assign( rD, binop(Iop_DivS32, mkexpr(rA), mkexpr(rB)) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_DIVW, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + } + /* Note: + if (0x8000_0000 / -1) or (x / 0) + => rD=undef, if(flag_rC) CR7=undef, if(flag_OE) XER_OV=1 + => But _no_ exception raised. */ + break; + + case 0x1CB: // divwu (Divide Word Unsigned, PPC32 p389) + DIP("divwu%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + if (mode64) { + /* Note: + XER settings are mode independent, and reflect the + overflow of the low-order 32bit result + CR0[LT|GT|EQ] are undefined if flag_rC && mode64 + */ + IRExpr* dividend = mk64lo32Uto64( mkexpr(rA) ); + IRExpr* divisor = mk64lo32Uto64( mkexpr(rB) ); + assign( rD, mk64lo32Uto64( binop(Iop_DivU64, dividend, + divisor) ) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_DIVWU, + mkexpr(rD), dividend, divisor ); + } + } else { + assign( rD, binop(Iop_DivU32, mkexpr(rA), mkexpr(rB)) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_DIVWU, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + } + /* Note: ditto comment divw, for (x / 0) */ + break; + + case 0x04B: // mulhw (Multiply High Word, PPC32 p488) + if (flag_OE != 0) { + vex_printf("dis_int_arith(ppc)(mulhw,flag_OE)\n"); + return False; + } + DIP("mulhw%s r%u,r%u,r%u\n", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + if (mode64) { + /* rD[hi32] are undefined: setting them to sign of lo32 + - makes set_CR0 happy */ + assign( rD, binop(Iop_Sar64, + binop(Iop_Mul64, + mk64lo32Sto64( mkexpr(rA) ), + mk64lo32Sto64( mkexpr(rB) )), + mkU8(32)) ); + } else { + assign( rD, unop(Iop_64HIto32, + binop(Iop_MullS32, + mkexpr(rA), mkexpr(rB))) ); + } + break; + + case 0x00B: // mulhwu (Multiply High Word Unsigned, PPC32 p489) + if (flag_OE != 0) { + vex_printf("dis_int_arith(ppc)(mulhwu,flag_OE)\n"); + return False; + } + DIP("mulhwu%s r%u,r%u,r%u\n", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + if (mode64) { + /* rD[hi32] are undefined: setting them to sign of lo32 + - makes set_CR0 happy */ + assign( rD, binop(Iop_Sar64, + binop(Iop_Mul64, + mk64lo32Uto64( mkexpr(rA) ), + mk64lo32Uto64( mkexpr(rB) ) ), + mkU8(32)) ); + } else { + assign( rD, unop(Iop_64HIto32, + binop(Iop_MullU32, + mkexpr(rA), mkexpr(rB))) ); + } + break; + + case 0x0EB: // mullw (Multiply Low Word, PPC32 p491) + DIP("mullw%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + if (mode64) { + /* rD[hi32] are undefined: setting them to sign of lo32 + - set_XER_OV() and set_CR0() depend on this */ + IRExpr *a = unop(Iop_64to32, mkexpr(rA) ); + IRExpr *b = unop(Iop_64to32, mkexpr(rB) ); + assign( rD, binop(Iop_MullS32, a, b) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_MULLW, + mkexpr(rD), + unop(Iop_32Uto64, a), unop(Iop_32Uto64, b) ); + } + } else { + assign( rD, unop(Iop_64to32, + binop(Iop_MullU32, + mkexpr(rA), mkexpr(rB))) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_MULLW, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + } + break; + + case 0x068: // neg (Negate, PPC32 p493) + if (rB_addr != 0) { + vex_printf("dis_int_arith(ppc)(neg,rB_addr)\n"); + return False; + } + DIP("neg%s%s r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr); + // rD = (~rA) + 1 + assign( rD, binop( mkSzOp(ty, Iop_Add8), + unop( mkSzOp(ty, Iop_Not8), mkexpr(rA) ), + mkSzImm(ty, 1)) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_NEG, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + break; + + case 0x028: // subf (Subtract From, PPC32 p537) + DIP("subf%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + // rD = rB - rA + assign( rD, binop( mkSzOp(ty, Iop_Sub8), + mkexpr(rB), mkexpr(rA)) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_SUBF, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + break; + + case 0x008: // subfc (Subtract from Carrying, PPC32 p538) + DIP("subfc%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + // rD = rB - rA + assign( rD, binop( mkSzOp(ty, Iop_Sub8), + mkexpr(rB), mkexpr(rA)) ); + set_XER_CA( ty, PPCG_FLAG_OP_SUBFC, + mkexpr(rD), mkexpr(rA), mkexpr(rB), + mkSzImm(ty, 0)/*old xer.ca, which is ignored*/ ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_SUBFC, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + break; + + case 0x088: {// subfe (Subtract from Extended, PPC32 p539) + IRTemp old_xer_ca = newTemp(ty); + DIP("subfe%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + // rD = (log not)rA + rB + XER[CA] + assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) ); + assign( rD, binop( mkSzOp(ty, Iop_Add8), + unop( mkSzOp(ty, Iop_Not8), mkexpr(rA)), + binop( mkSzOp(ty, Iop_Add8), + mkexpr(rB), mkexpr(old_xer_ca))) ); + set_XER_CA( ty, PPCG_FLAG_OP_SUBFE, + mkexpr(rD), mkexpr(rA), mkexpr(rB), + mkexpr(old_xer_ca) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_SUBFE, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + break; + } + + case 0x0E8: { // subfme (Subtract from -1 Extended, PPC32 p541) + IRTemp old_xer_ca = newTemp(ty); + IRExpr *min_one; + if (rB_addr != 0) { + vex_printf("dis_int_arith(ppc)(subfme,rB_addr)\n"); + return False; + } + DIP("subfme%s%s r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr); + // rD = (log not)rA + (-1) + XER[CA] + // => Just another form of subfe + assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) ); + min_one = mkSzImm(ty, (Long)-1); + assign( rD, binop( mkSzOp(ty, Iop_Add8), + unop( mkSzOp(ty, Iop_Not8), mkexpr(rA)), + binop( mkSzOp(ty, Iop_Add8), + min_one, mkexpr(old_xer_ca))) ); + set_XER_CA( ty, PPCG_FLAG_OP_SUBFE, + mkexpr(rD), mkexpr(rA), min_one, + mkexpr(old_xer_ca) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_SUBFE, + mkexpr(rD), mkexpr(rA), min_one ); + } + break; + } + + case 0x0C8: { // subfze (Subtract from Zero Extended, PPC32 p542) + IRTemp old_xer_ca = newTemp(ty); + if (rB_addr != 0) { + vex_printf("dis_int_arith(ppc)(subfze,rB_addr)\n"); + return False; + } + DIP("subfze%s%s r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr); + // rD = (log not)rA + (0) + XER[CA] + // => Just another form of subfe + assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) ); + assign( rD, binop( mkSzOp(ty, Iop_Add8), + unop( mkSzOp(ty, Iop_Not8), + mkexpr(rA)), mkexpr(old_xer_ca)) ); + set_XER_CA( ty, PPCG_FLAG_OP_SUBFE, + mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0), + mkexpr(old_xer_ca) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_SUBFE, + mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0) ); + } + break; + } + + + /* 64bit Arithmetic */ + case 0x49: // mulhd (Multiply High DWord, PPC64 p539) + if (flag_OE != 0) { + vex_printf("dis_int_arith(ppc)(mulhd,flagOE)\n"); + return False; + } + DIP("mulhd%s r%u,r%u,r%u\n", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + assign( rD, unop(Iop_128HIto64, + binop(Iop_MullS64, + mkexpr(rA), mkexpr(rB))) ); + + break; + + case 0x9: // mulhdu (Multiply High DWord Unsigned, PPC64 p540) + if (flag_OE != 0) { + vex_printf("dis_int_arith(ppc)(mulhdu,flagOE)\n"); + return False; + } + DIP("mulhdu%s r%u,r%u,r%u\n", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + assign( rD, unop(Iop_128HIto64, + binop(Iop_MullU64, + mkexpr(rA), mkexpr(rB))) ); + break; + + case 0xE9: // mulld (Multiply Low DWord, PPC64 p543) + DIP("mulld%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + assign( rD, binop(Iop_Mul64, mkexpr(rA), mkexpr(rB)) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_MULLW, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + break; + + case 0x1E9: // divd (Divide DWord, PPC64 p419) + DIP("divd%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + assign( rD, binop(Iop_DivS64, mkexpr(rA), mkexpr(rB)) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_DIVW, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + break; + /* Note: + if (0x8000_0000_0000_0000 / -1) or (x / 0) + => rD=undef, if(flag_rC) CR7=undef, if(flag_OE) XER_OV=1 + => But _no_ exception raised. */ + + case 0x1C9: // divdu (Divide DWord Unsigned, PPC64 p420) + DIP("divdu%s%s r%u,r%u,r%u\n", + flag_OE ? "o" : "", flag_rC ? ".":"", + rD_addr, rA_addr, rB_addr); + assign( rD, binop(Iop_DivU64, mkexpr(rA), mkexpr(rB)) ); + if (flag_OE) { + set_XER_OV( ty, PPCG_FLAG_OP_DIVWU, + mkexpr(rD), mkexpr(rA), mkexpr(rB) ); + } + break; + /* Note: ditto comment divd, for (x / 0) */ + + default: + vex_printf("dis_int_arith(ppc)(opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_int_arith(ppc)(opc1)\n"); + return False; + } + + putIReg( rD_addr, mkexpr(rD) ); + + if (do_rc && flag_rC) { + set_CR0( mkexpr(rD) ); + } + return True; + } + + + + /* + Integer Compare Instructions + */ + static Bool dis_int_cmp ( UInt theInstr ) + { + /* D-Form, X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); + UChar b22 = toUChar( IFIELD( theInstr, 22, 1 ) ); + UChar flag_L = toUChar( IFIELD( theInstr, 21, 1 ) ); + UChar rA_addr = ifieldRegA(theInstr); + UInt uimm16 = ifieldUIMM16(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRExpr *a = getIReg(rA_addr); + IRExpr *b; + + if (!mode64 && flag_L==1) { // L==1 invalid for 32 bit. + vex_printf("dis_int_cmp(ppc)(flag_L)\n"); + return False; + } + + if (b22 != 0) { + vex_printf("dis_int_cmp(ppc)(b22)\n"); + return False; + } + + switch (opc1) { + case 0x0B: // cmpi (Compare Immediate, PPC32 p368) + DIP("cmpi cr%u,%u,r%u,%d\n", crfD, flag_L, rA_addr, + (Int)extend_s_16to32(uimm16)); + b = mkSzExtendS16( ty, uimm16 ); + if (flag_L == 1) { + putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64S, a, b))); + } else { + a = mkNarrowTo32( ty, a ); + b = mkNarrowTo32( ty, b ); + putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32S, a, b))); + } + putCR0( crfD, getXER_SO() ); + break; + + case 0x0A: // cmpli (Compare Logical Immediate, PPC32 p370) + DIP("cmpli cr%u,%u,r%u,0x%x\n", crfD, flag_L, rA_addr, uimm16); + b = mkSzImm( ty, uimm16 ); + if (flag_L == 1) { + putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64U, a, b))); + } else { + a = mkNarrowTo32( ty, a ); + b = mkNarrowTo32( ty, b ); + putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32U, a, b))); + } + putCR0( crfD, getXER_SO() ); + break; + + /* X Form */ + case 0x1F: + if (b0 != 0) { + vex_printf("dis_int_cmp(ppc)(0x1F,b0)\n"); + return False; + } + b = getIReg(rB_addr); + + switch (opc2) { + case 0x000: // cmp (Compare, PPC32 p367) + DIP("cmp cr%u,%u,r%u,r%u\n", crfD, flag_L, rA_addr, rB_addr); + /* Comparing a reg with itself produces a result which + doesn't depend on the contents of the reg. Therefore + remove the false dependency, which has been known to cause + memcheck to produce false errors. */ + if (rA_addr == rB_addr) + a = b = typeOfIRExpr(irsb->tyenv,a) == Ity_I64 + ? mkU64(0) : mkU32(0); + if (flag_L == 1) { + putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64S, a, b))); + } else { + a = mkNarrowTo32( ty, a ); + b = mkNarrowTo32( ty, b ); + putCR321(crfD, unop(Iop_32to8,binop(Iop_CmpORD32S, a, b))); + } + putCR0( crfD, getXER_SO() ); + break; + + case 0x020: // cmpl (Compare Logical, PPC32 p369) + DIP("cmpl cr%u,%u,r%u,r%u\n", crfD, flag_L, rA_addr, rB_addr); + /* Comparing a reg with itself produces a result which + doesn't depend on the contents of the reg. Therefore + remove the false dependency, which has been known to cause + memcheck to produce false errors. */ + if (rA_addr == rB_addr) + a = b = typeOfIRExpr(irsb->tyenv,a) == Ity_I64 + ? mkU64(0) : mkU32(0); + if (flag_L == 1) { + putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64U, a, b))); + } else { + a = mkNarrowTo32( ty, a ); + b = mkNarrowTo32( ty, b ); + putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32U, a, b))); + } + putCR0( crfD, getXER_SO() ); + break; + + default: + vex_printf("dis_int_cmp(ppc)(opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_int_cmp(ppc)(opc1)\n"); + return False; + } + + return True; + } + + + /* + Integer Logical Instructions + */ + static Bool dis_int_logic ( UInt theInstr ) + { + /* D-Form, X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar rS_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UInt uimm16 = ifieldUIMM16(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp rS = newTemp(ty); + IRTemp rA = newTemp(ty); + IRTemp rB = newTemp(ty); + IRExpr* irx; + Bool do_rc = False; + + assign( rS, getIReg(rS_addr) ); + assign( rB, getIReg(rB_addr) ); + + switch (opc1) { + case 0x1C: // andi. (AND Immediate, PPC32 p358) + DIP("andi. r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16); + assign( rA, binop( mkSzOp(ty, Iop_And8), mkexpr(rS), + mkSzImm(ty, uimm16)) ); + do_rc = True; // Always record to CR + flag_rC = 1; + break; + + case 0x1D: // andis. (AND Immediate Shifted, PPC32 p359) + DIP("andis r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16); + assign( rA, binop( mkSzOp(ty, Iop_And8), mkexpr(rS), + mkSzImm(ty, uimm16 << 16)) ); + do_rc = True; // Always record to CR + flag_rC = 1; + break; + + case 0x18: // ori (OR Immediate, PPC32 p497) + DIP("ori r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16); + assign( rA, binop( mkSzOp(ty, Iop_Or8), mkexpr(rS), + mkSzImm(ty, uimm16)) ); + break; + + case 0x19: // oris (OR Immediate Shifted, PPC32 p498) + DIP("oris r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16); + assign( rA, binop( mkSzOp(ty, Iop_Or8), mkexpr(rS), + mkSzImm(ty, uimm16 << 16)) ); + break; + + case 0x1A: // xori (XOR Immediate, PPC32 p550) + DIP("xori r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16); + assign( rA, binop( mkSzOp(ty, Iop_Xor8), mkexpr(rS), + mkSzImm(ty, uimm16)) ); + break; + + case 0x1B: // xoris (XOR Immediate Shifted, PPC32 p551) + DIP("xoris r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16); + assign( rA, binop( mkSzOp(ty, Iop_Xor8), mkexpr(rS), + mkSzImm(ty, uimm16 << 16)) ); + break; + + /* X Form */ + case 0x1F: + do_rc = True; // All below record to CR + + switch (opc2) { + case 0x01C: // and (AND, PPC32 p356) + DIP("and%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign(rA, binop( mkSzOp(ty, Iop_And8), + mkexpr(rS), mkexpr(rB))); + break; + + case 0x03C: // andc (AND with Complement, PPC32 p357) + DIP("andc%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign(rA, binop( mkSzOp(ty, Iop_And8), mkexpr(rS), + unop( mkSzOp(ty, Iop_Not8), + mkexpr(rB)))); + break; + + case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371) + IRExpr* lo32; + if (rB_addr!=0) { + vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n"); + return False; + } + DIP("cntlzw%s r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr); + + // mode64: count in low word only + lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS); + + // Iop_Clz32 undefined for arg==0, so deal with that case: + irx = binop(Iop_CmpNE32, lo32, mkU32(0)); + assign(rA, mkWidenFrom32(ty, + IRExpr_Mux0X( unop(Iop_1Uto8, irx), + mkU32(32), + unop(Iop_Clz32, lo32)), + False)); + + // TODO: alternatively: assign(rA, verbose_Clz32(rS)); + break; + } + + case 0x11C: // eqv (Equivalent, PPC32 p396) + DIP("eqv%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign( rA, unop( mkSzOp(ty, Iop_Not8), + binop( mkSzOp(ty, Iop_Xor8), + mkexpr(rS), mkexpr(rB))) ); + break; + + case 0x3BA: // extsb (Extend Sign Byte, PPC32 p397 + if (rB_addr!=0) { + vex_printf("dis_int_logic(ppc)(extsb,rB_addr)\n"); + return False; + } + DIP("extsb%s r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr); + if (mode64) + assign( rA, unop(Iop_8Sto64, unop(Iop_64to8, mkexpr(rS))) ); + else + assign( rA, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rS))) ); + break; + + case 0x39A: // extsh (Extend Sign Half Word, PPC32 p398) + if (rB_addr!=0) { + vex_printf("dis_int_logic(ppc)(extsh,rB_addr)\n"); + return False; + } + DIP("extsh%s r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr); + if (mode64) + assign( rA, unop(Iop_16Sto64, + unop(Iop_64to16, mkexpr(rS))) ); + else + assign( rA, unop(Iop_16Sto32, + unop(Iop_32to16, mkexpr(rS))) ); + break; + + case 0x1DC: // nand (NAND, PPC32 p492) + DIP("nand%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign( rA, unop( mkSzOp(ty, Iop_Not8), + binop( mkSzOp(ty, Iop_And8), + mkexpr(rS), mkexpr(rB))) ); + break; + + case 0x07C: // nor (NOR, PPC32 p494) + DIP("nor%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign( rA, unop( mkSzOp(ty, Iop_Not8), + binop( mkSzOp(ty, Iop_Or8), + mkexpr(rS), mkexpr(rB))) ); + break; + + case 0x1BC: // or (OR, PPC32 p495) + if ((!flag_rC) && rS_addr == rB_addr) { + DIP("mr r%u,r%u\n", rA_addr, rS_addr); + assign( rA, mkexpr(rS) ); + } else { + DIP("or%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign( rA, binop( mkSzOp(ty, Iop_Or8), + mkexpr(rS), mkexpr(rB)) ); + } + break; + + case 0x19C: // orc (OR with Complement, PPC32 p496) + DIP("orc%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign( rA, binop( mkSzOp(ty, Iop_Or8), mkexpr(rS), + unop(mkSzOp(ty, Iop_Not8), mkexpr(rB)))); + break; + + case 0x13C: // xor (XOR, PPC32 p549) + DIP("xor%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + assign( rA, binop( mkSzOp(ty, Iop_Xor8), + mkexpr(rS), mkexpr(rB)) ); + break; + + + /* 64bit Integer Logical Instructions */ + case 0x3DA: // extsw (Extend Sign Word, PPC64 p430) + if (rB_addr!=0) { + vex_printf("dis_int_logic(ppc)(extsw,rB_addr)\n"); + return False; + } + DIP("extsw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr); + assign(rA, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(rS)))); + break; + + case 0x03A: // cntlzd (Count Leading Zeros DWord, PPC64 p401) + if (rB_addr!=0) { + vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n"); + return False; + } + DIP("cntlzd%s r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr); + // Iop_Clz64 undefined for arg==0, so deal with that case: + irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0)); + assign(rA, IRExpr_Mux0X( unop(Iop_1Uto8, irx), + mkU64(64), + unop(Iop_Clz64, mkexpr(rS)) )); + // TODO: alternatively: assign(rA, verbose_Clz64(rS)); + break; + + default: + vex_printf("dis_int_logic(ppc)(opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_int_logic(ppc)(opc1)\n"); + return False; + } + + putIReg( rA_addr, mkexpr(rA) ); + + if (do_rc && flag_rC) { + set_CR0( mkexpr(rA) ); + } + return True; + } + + + + /* + Integer Rotate Instructions + */ + static Bool dis_int_rot ( UInt theInstr ) + { + /* M-Form, MDS-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar rS_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UChar sh_imm = rB_addr; + UChar MaskBeg = toUChar( IFIELD( theInstr, 6, 5 ) ); + UChar MaskEnd = toUChar( IFIELD( theInstr, 1, 5 ) ); + UChar msk_imm = toUChar( IFIELD( theInstr, 5, 6 ) ); + UChar opc2 = toUChar( IFIELD( theInstr, 2, 3 ) ); + UChar b1 = ifieldBIT1(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp rS = newTemp(ty); + IRTemp rA = newTemp(ty); + IRTemp rB = newTemp(ty); + IRTemp rot = newTemp(ty); + IRExpr *r; + UInt mask32; + ULong mask64; + + assign( rS, getIReg(rS_addr) ); + assign( rB, getIReg(rB_addr) ); + + switch (opc1) { + case 0x14: { + // rlwimi (Rotate Left Word Imm then Mask Insert, PPC32 p500) + DIP("rlwimi%s r%u,r%u,%d,%d,%d\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm, MaskBeg, MaskEnd); + if (mode64) { + // tmp32 = (ROTL(rS_Lo32, Imm) + // rA = ((tmp32 || tmp32) & mask64) | (rA & ~mask64) + mask64 = MASK64(31-MaskEnd, 31-MaskBeg); + r = ROTL( unop(Iop_64to32, mkexpr(rS) ), mkU8(sh_imm) ); + r = unop(Iop_32Uto64, r); + assign( rot, binop(Iop_Or64, r, + binop(Iop_Shl64, r, mkU8(32))) ); + assign( rA, + binop(Iop_Or64, + binop(Iop_And64, mkexpr(rot), mkU64(mask64)), + binop(Iop_And64, getIReg(rA_addr), mkU64(~mask64))) ); + } + else { + // rA = (ROTL(rS, Imm) & mask) | (rA & ~mask); + mask32 = MASK32(31-MaskEnd, 31-MaskBeg); + r = ROTL(mkexpr(rS), mkU8(sh_imm)); + assign( rA, + binop(Iop_Or32, + binop(Iop_And32, mkU32(mask32), r), + binop(Iop_And32, getIReg(rA_addr), mkU32(~mask32))) ); + } + break; + } + + case 0x15: { + // rlwinm (Rotate Left Word Imm then AND with Mask, PPC32 p501) + vassert(MaskBeg < 32); + vassert(MaskEnd < 32); + vassert(sh_imm < 32); + + if (mode64) { + IRTemp rTmp = newTemp(Ity_I64); + mask64 = MASK64(31-MaskEnd, 31-MaskBeg); + DIP("rlwinm%s r%u,r%u,%d,%d,%d\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm, MaskBeg, MaskEnd); + // tmp32 = (ROTL(rS_Lo32, Imm) + // rA = ((tmp32 || tmp32) & mask64) + r = ROTL( unop(Iop_64to32, mkexpr(rS) ), mkU8(sh_imm) ); + r = unop(Iop_32Uto64, r); + assign( rTmp, r ); + r = NULL; + assign( rot, binop(Iop_Or64, mkexpr(rTmp), + binop(Iop_Shl64, mkexpr(rTmp), mkU8(32))) ); + assign( rA, binop(Iop_And64, mkexpr(rot), mkU64(mask64)) ); + } + else { + if (MaskBeg == 0 && sh_imm+MaskEnd == 31) { + /* Special-case the ,n,0,31-n form as that is just n-bit + shift left, PPC32 p501 */ + DIP("slwi%s r%u,r%u,%d\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm); + assign( rA, binop(Iop_Shl32, mkexpr(rS), mkU8(sh_imm)) ); + } + else if (MaskEnd == 31 && sh_imm+MaskBeg == 32) { + /* Special-case the ,32-n,n,31 form as that is just n-bit + unsigned shift right, PPC32 p501 */ + DIP("srwi%s r%u,r%u,%d\n", flag_rC ? ".":"", + rA_addr, rS_addr, MaskBeg); + assign( rA, binop(Iop_Shr32, mkexpr(rS), mkU8(MaskBeg)) ); + } + else { + /* General case. */ + mask32 = MASK32(31-MaskEnd, 31-MaskBeg); + DIP("rlwinm%s r%u,r%u,%d,%d,%d\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm, MaskBeg, MaskEnd); + // rA = ROTL(rS, Imm) & mask + assign( rA, binop(Iop_And32, + ROTL(mkexpr(rS), mkU8(sh_imm)), + mkU32(mask32)) ); + } + } + break; + } + + case 0x17: { + // rlwnm (Rotate Left Word then AND with Mask, PPC32 p503 + DIP("rlwnm%s r%u,r%u,r%u,%d,%d\n", flag_rC ? ".":"", + rA_addr, rS_addr, rB_addr, MaskBeg, MaskEnd); + if (mode64) { + mask64 = MASK64(31-MaskEnd, 31-MaskBeg); + /* weird insn alert! + tmp32 = (ROTL(rS_Lo32, rB[0-4]) + rA = ((tmp32 || tmp32) & mask64) + */ + // note, ROTL does the masking, so we don't do it here + r = ROTL( unop(Iop_64to32, mkexpr(rS)), + unop(Iop_64to8, mkexpr(rB)) ); + r = unop(Iop_32Uto64, r); + assign(rot, binop(Iop_Or64, r, binop(Iop_Shl64, r, mkU8(32)))); + assign( rA, binop(Iop_And64, mkexpr(rot), mkU64(mask64)) ); + } else { + mask32 = MASK32(31-MaskEnd, 31-MaskBeg); + // rA = ROTL(rS, rB[0-4]) & mask + // note, ROTL does the masking, so we don't do it here + assign( rA, binop(Iop_And32, + ROTL(mkexpr(rS), + unop(Iop_32to8, mkexpr(rB))), + mkU32(mask32)) ); + } + break; + } + + /* 64bit Integer Rotates */ + case 0x1E: { + msk_imm = ((msk_imm & 1) << 5) | (msk_imm >> 1); + sh_imm |= b1 << 5; + + vassert( msk_imm < 64 ); + vassert( sh_imm < 64 ); + + switch (opc2) { + case 0x4: { + /* r = ROTL64( rS, rB_lo6) */ + r = ROTL( mkexpr(rS), unop(Iop_64to8, mkexpr(rB)) ); + + if (b1 == 0) { // rldcl (Rotl DWord, Clear Left, PPC64 p555) + DIP("rldcl%s r%u,r%u,r%u,%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, rB_addr, msk_imm); + // note, ROTL does the masking, so we don't do it here + mask64 = MASK64(0, 63-msk_imm); + assign( rA, binop(Iop_And64, r, mkU64(mask64)) ); + break; + } else { // rldcr (Rotl DWord, Clear Right, PPC64 p556) + DIP("rldcr%s r%u,r%u,r%u,%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, rB_addr, msk_imm); + mask64 = MASK64(63-msk_imm, 63); + assign( rA, binop(Iop_And64, r, mkU64(mask64)) ); + break; + } + break; + } + case 0x2: // rldic (Rotl DWord Imm, Clear, PPC64 p557) + DIP("rldic%s r%u,r%u,%u,%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm, msk_imm); + r = ROTL(mkexpr(rS), mkU8(sh_imm)); + mask64 = MASK64(sh_imm, 63-msk_imm); + assign( rA, binop(Iop_And64, r, mkU64(mask64)) ); + break; + // later: deal with special case: (msk_imm==0) => SHL(sh_imm) + /* + Hmm... looks like this'll do the job more simply: + r = SHL(rS, sh_imm) + m = ~(1 << (63-msk_imm)) + assign(rA, r & m); + */ + + case 0x0: // rldicl (Rotl DWord Imm, Clear Left, PPC64 p558) + if (mode64 + && sh_imm + msk_imm == 64 && msk_imm >= 1 && msk_imm <= 63) { + /* special-case the ,64-n,n form as that is just + unsigned shift-right by n */ + DIP("srdi%s r%u,r%u,%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, msk_imm); + assign( rA, binop(Iop_Shr64, mkexpr(rS), mkU8(msk_imm)) ); + } else { + DIP("rldicl%s r%u,r%u,%u,%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm, msk_imm); + r = ROTL(mkexpr(rS), mkU8(sh_imm)); + mask64 = MASK64(0, 63-msk_imm); + assign( rA, binop(Iop_And64, r, mkU64(mask64)) ); + } + break; + + case 0x1: // rldicr (Rotl DWord Imm, Clear Right, PPC64 p559) + if (mode64 + && sh_imm + msk_imm == 63 && sh_imm >= 1 && sh_imm <= 63) { + /* special-case the ,n,63-n form as that is just + shift-left by n */ + DIP("sldi%s r%u,r%u,%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, sh_imm); + assign( rA, binop(Iop_Shl64, mkexpr(rS), mkU8(sh_imm)) ); + } else { + DIP("rldicr%s r%u,r%u,%u,%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm, msk_imm); + r = ROTL(mkexpr(rS), mkU8(sh_imm)); + mask64 = MASK64(63-msk_imm, 63); + assign( rA, binop(Iop_And64, r, mkU64(mask64)) ); + } + break; + + case 0x3: { // rldimi (Rotl DWord Imm, Mask Insert, PPC64 p560) + IRTemp rA_orig = newTemp(ty); + DIP("rldimi%s r%u,r%u,%u,%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm, msk_imm); + r = ROTL(mkexpr(rS), mkU8(sh_imm)); + mask64 = MASK64(sh_imm, 63-msk_imm); + assign( rA_orig, getIReg(rA_addr) ); + assign( rA, binop(Iop_Or64, + binop(Iop_And64, mkU64(mask64), r), + binop(Iop_And64, mkU64(~mask64), + mkexpr(rA_orig))) ); + break; + } + default: + vex_printf("dis_int_rot(ppc)(opc2)\n"); + return False; + } + break; + } + + default: + vex_printf("dis_int_rot(ppc)(opc1)\n"); + return False; + } + + putIReg( rA_addr, mkexpr(rA) ); + + if (flag_rC) { + set_CR0( mkexpr(rA) ); + } + return True; + } + + + /* + Integer Load Instructions + */ + static Bool dis_int_load ( UInt theInstr ) + { + /* D-Form, X-Form, DS-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar rD_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UInt uimm16 = ifieldUIMM16(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b1 = ifieldBIT1(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + Int simm16 = extend_s_16to32(uimm16); + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + IRExpr* val; + + switch (opc1) { + case 0x1F: // register offset + assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) ); + break; + case 0x3A: // immediate offset: 64bit: ld/ldu/lwa: mask off + // lowest 2 bits of immediate before forming EA + simm16 = simm16 & 0xFFFFFFFC; + default: // immediate offset + assign( EA, ea_rAor0_simm( rA_addr, simm16 ) ); + break; + } + + switch (opc1) { + case 0x22: // lbz (Load B & Zero, PPC32 p433) + DIP("lbz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I8, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom8(ty, val, False) ); + break; + + case 0x23: // lbzu (Load B & Zero, Update, PPC32 p434) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lbzu,rA_addr|rD_addr)\n"); + return False; + } + DIP("lbzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I8, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom8(ty, val, False) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x2A: // lha (Load HW Alg, PPC32 p445) + DIP("lha r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, True) ); + break; + + case 0x2B: // lhau (Load HW Alg, Update, PPC32 p446) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lhau,rA_addr|rD_addr)\n"); + return False; + } + DIP("lhau r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, True) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x28: // lhz (Load HW & Zero, PPC32 p450) + DIP("lhz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, False) ); + break; + + case 0x29: // lhzu (Load HW & and Zero, Update, PPC32 p451) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lhzu,rA_addr|rD_addr)\n"); + return False; + } + DIP("lhzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, False) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x20: // lwz (Load W & Zero, PPC32 p460) + DIP("lwz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I32, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom32(ty, val, False) ); + break; + + case 0x21: // lwzu (Load W & Zero, Update, PPC32 p461)) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lwzu,rA_addr|rD_addr)\n"); + return False; + } + DIP("lwzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr); + val = loadBE(Ity_I32, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom32(ty, val, False) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + /* X Form */ + case 0x1F: + if (b0 != 0) { + vex_printf("dis_int_load(ppc)(Ox1F,b0)\n"); + return False; + } + + switch (opc2) { + case 0x077: // lbzux (Load B & Zero, Update Indexed, PPC32 p435) + DIP("lbzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lwzux,rA_addr|rD_addr)\n"); + return False; + } + val = loadBE(Ity_I8, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom8(ty, val, False) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x057: // lbzx (Load B & Zero, Indexed, PPC32 p436) + DIP("lbzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + val = loadBE(Ity_I8, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom8(ty, val, False) ); + break; + + case 0x177: // lhaux (Load HW Alg, Update Indexed, PPC32 p447) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lhaux,rA_addr|rD_addr)\n"); + return False; + } + DIP("lhaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, True) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x157: // lhax (Load HW Alg, Indexed, PPC32 p448) + DIP("lhax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, True) ); + break; + + case 0x137: // lhzux (Load HW & Zero, Update Indexed, PPC32 p452) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lhzux,rA_addr|rD_addr)\n"); + return False; + } + DIP("lhzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, False) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x117: // lhzx (Load HW & Zero, Indexed, PPC32 p453) + DIP("lhzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + val = loadBE(Ity_I16, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom16(ty, val, False) ); + break; + + case 0x037: // lwzux (Load W & Zero, Update Indexed, PPC32 p462) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lwzux,rA_addr|rD_addr)\n"); + return False; + } + DIP("lwzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + val = loadBE(Ity_I32, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom32(ty, val, False) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x017: // lwzx (Load W & Zero, Indexed, PPC32 p463) + DIP("lwzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + val = loadBE(Ity_I32, mkexpr(EA)); + putIReg( rD_addr, mkWidenFrom32(ty, val, False) ); + break; + + + /* 64bit Loads */ + case 0x035: // ldux (Load DWord, Update Indexed, PPC64 p475) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(ldux,rA_addr|rD_addr)\n"); + return False; + } + DIP("ldux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x015: // ldx (Load DWord, Indexed, PPC64 p476) + DIP("ldx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) ); + break; + + case 0x175: // lwaux (Load W Alg, Update Indexed, PPC64 p501) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(lwaux,rA_addr|rD_addr)\n"); + return False; + } + DIP("lwaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + putIReg( rD_addr, + unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x155: // lwax (Load W Alg, Indexed, PPC64 p502) + DIP("lwax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + putIReg( rD_addr, + unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) ); + break; + + default: + vex_printf("dis_int_load(ppc)(opc2)\n"); + return False; + } + break; + + /* DS Form - 64bit Loads. In each case EA will have been formed + with the lowest 2 bits masked off the immediate offset. */ + case 0x3A: + switch ((b1<<1) | b0) { + case 0x0: // ld (Load DWord, PPC64 p472) + DIP("ld r%u,%d(r%u)\n", rD_addr, simm16, rA_addr); + putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) ); + break; + + case 0x1: // ldu (Load DWord, Update, PPC64 p474) + if (rA_addr == 0 || rA_addr == rD_addr) { + vex_printf("dis_int_load(ppc)(ldu,rA_addr|rD_addr)\n"); + return False; + } + DIP("ldu r%u,%d(r%u)\n", rD_addr, simm16, rA_addr); + putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x2: // lwa (Load Word Alg, PPC64 p499) + DIP("lwa r%u,%d(r%u)\n", rD_addr, simm16, rA_addr); + putIReg( rD_addr, + unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) ); + break; + + default: + vex_printf("dis_int_load(ppc)(0x3A, opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_int_load(ppc)(opc1)\n"); + return False; + } + return True; + } + + + + /* + Integer Store Instructions + */ + static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi ) + { + /* D-Form, X-Form, DS-Form */ + UChar opc1 = ifieldOPC(theInstr); + UInt rS_addr = ifieldRegDS(theInstr); + UInt rA_addr = ifieldRegA(theInstr); + UInt uimm16 = ifieldUIMM16(theInstr); + UInt rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b1 = ifieldBIT1(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + Int simm16 = extend_s_16to32(uimm16); + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp rS = newTemp(ty); + IRTemp rB = newTemp(ty); + IRTemp EA = newTemp(ty); + + assign( rB, getIReg(rB_addr) ); + assign( rS, getIReg(rS_addr) ); + + switch (opc1) { + case 0x1F: // register offset + assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) ); + break; + case 0x3E: // immediate offset: 64bit: std/stdu: mask off + // lowest 2 bits of immediate before forming EA + simm16 = simm16 & 0xFFFFFFFC; + default: // immediate offset + assign( EA, ea_rAor0_simm( rA_addr, simm16 ) ); + break; + } + + switch (opc1) { + case 0x26: // stb (Store B, PPC32 p509) + DIP("stb r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) ); + break; + + case 0x27: // stbu (Store B, Update, PPC32 p510) + if (rA_addr == 0 ) { + vex_printf("dis_int_store(ppc)(stbu,rA_addr)\n"); + return False; + } + DIP("stbu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) ); + break; + + case 0x2C: // sth (Store HW, PPC32 p522) + DIP("sth r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) ); + break; + + case 0x2D: // sthu (Store HW, Update, PPC32 p524) + if (rA_addr == 0) { + vex_printf("dis_int_store(ppc)(sthu,rA_addr)\n"); + return False; + } + DIP("sthu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) ); + break; + + case 0x24: // stw (Store W, PPC32 p530) + DIP("stw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) ); + break; + + case 0x25: // stwu (Store W, Update, PPC32 p534) + if (rA_addr == 0) { + vex_printf("dis_int_store(ppc)(stwu,rA_addr)\n"); + return False; + } + DIP("stwu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) ); + break; + + /* X Form : all these use EA_indexed */ + case 0x1F: + if (b0 != 0) { + vex_printf("dis_int_store(ppc)(0x1F,b0)\n"); + return False; + } + + switch (opc2) { + case 0x0F7: // stbux (Store B, Update Indexed, PPC32 p511) + if (rA_addr == 0) { + vex_printf("dis_int_store(ppc)(stbux,rA_addr)\n"); + return False; + } + DIP("stbux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) ); + break; + + case 0x0D7: // stbx (Store B Indexed, PPC32 p512) + DIP("stbx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) ); + break; + + case 0x1B7: // sthux (Store HW, Update Indexed, PPC32 p525) + if (rA_addr == 0) { + vex_printf("dis_int_store(ppc)(sthux,rA_addr)\n"); + return False; + } + DIP("sthux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) ); + break; + + case 0x197: // sthx (Store HW Indexed, PPC32 p526) + DIP("sthx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) ); + break; + + case 0x0B7: // stwux (Store W, Update Indexed, PPC32 p535) + if (rA_addr == 0) { + vex_printf("dis_int_store(ppc)(stwux,rA_addr)\n"); + return False; + } + DIP("stwux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) ); + break; + + case 0x097: // stwx (Store W Indexed, PPC32 p536) + DIP("stwx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) ); + break; + + + /* 64bit Stores */ + case 0x0B5: // stdux (Store DWord, Update Indexed, PPC64 p584) + if (rA_addr == 0) { + vex_printf("dis_int_store(ppc)(stdux,rA_addr)\n"); + return False; + } + DIP("stdux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkexpr(rS) ); + break; + + case 0x095: // stdx (Store DWord Indexed, PPC64 p585) + DIP("stdx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + storeBE( mkexpr(EA), mkexpr(rS) ); + break; + + default: + vex_printf("dis_int_store(ppc)(opc2)\n"); + return False; + } + break; + + /* DS Form - 64bit Stores. In each case EA will have been formed + with the lowest 2 bits masked off the immediate offset. */ + case 0x3E: + switch ((b1<<1) | b0) { + case 0x0: // std (Store DWord, PPC64 p580) + DIP("std r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + storeBE( mkexpr(EA), mkexpr(rS) ); + break; + + case 0x1: // stdu (Store DWord, Update, PPC64 p583) + DIP("stdu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + putIReg( rA_addr, mkexpr(EA) ); + storeBE( mkexpr(EA), mkexpr(rS) ); + break; + + default: + vex_printf("dis_int_load(ppc)(0x3A, opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_int_store(ppc)(opc1)\n"); + return False; + } + return True; + } + + + + /* + Integer Load/Store Multiple Instructions + */ + static Bool dis_int_ldst_mult ( UInt theInstr ) + { + /* D-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar rD_addr = ifieldRegDS(theInstr); + UChar rS_addr = rD_addr; + UChar rA_addr = ifieldRegA(theInstr); + UInt uimm16 = ifieldUIMM16(theInstr); + + Int simm16 = extend_s_16to32(uimm16); + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + UInt r = 0; + UInt ea_off = 0; + IRExpr* irx_addr; + + assign( EA, ea_rAor0_simm( rA_addr, simm16 ) ); + + switch (opc1) { + case 0x2E: // lmw (Load Multiple Word, PPC32 p454) + if (rA_addr >= rD_addr) { + vex_printf("dis_int_ldst_mult(ppc)(lmw,rA_addr)\n"); + return False; + } + DIP("lmw r%u,%d(r%u)\n", rD_addr, simm16, rA_addr); + for (r = rD_addr; r <= 31; r++) { + irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off)); + putIReg( r, mkWidenFrom32(ty, loadBE(Ity_I32, irx_addr ), + False) ); + ea_off += 4; + } + break; + + case 0x2F: // stmw (Store Multiple Word, PPC32 p527) + DIP("stmw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr); + for (r = rS_addr; r <= 31; r++) { + irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off)); + storeBE( irx_addr, mkNarrowTo32(ty, getIReg(r)) ); + ea_off += 4; + } + break; + + default: + vex_printf("dis_int_ldst_mult(ppc)(opc1)\n"); + return False; + } + return True; + } + + + + /* + Integer Load/Store String Instructions + */ + static + void generate_lsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32 + IRTemp EA, // EA + Int rD, // first dst register + Int maxBytes ) // 32 or 128 + { + Int i, shift = 24; + IRExpr* e_nbytes = mkexpr(tNBytes); + IRExpr* e_EA = mkexpr(EA); + IRType ty = mode64 ? Ity_I64 : Ity_I32; + + vassert(rD >= 0 && rD < 32); + rD--; if (rD < 0) rD = 31; + + for (i = 0; i < maxBytes; i++) { + /* if (nBytes < (i+1)) goto NIA; */ + stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), + Ijk_Boring, + mkSzConst( ty, nextInsnAddr()) )); + /* when crossing into a new dest register, set it to zero. */ + if ((i % 4) == 0) { + rD++; if (rD == 32) rD = 0; + putIReg(rD, mkSzImm(ty, 0)); + shift = 24; + } + /* rD |= (8Uto32(*(EA+i))) << shift */ + vassert(shift == 0 || shift == 8 || shift == 16 || shift == 24); + putIReg( + rD, + mkWidenFrom32( + ty, + binop( + Iop_Or32, + mkNarrowTo32(ty, getIReg(rD)), + binop( + Iop_Shl32, + unop( + Iop_8Uto32, + loadBE(Ity_I8, + binop(mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i))) + ), + mkU8(toUChar(shift)) + ) + ), + /*Signed*/False + ) + ); + shift -= 8; + } + } + + static + void generate_stsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32 + IRTemp EA, // EA + Int rS, // first src register + Int maxBytes ) // 32 or 128 + { + Int i, shift = 24; + IRExpr* e_nbytes = mkexpr(tNBytes); + IRExpr* e_EA = mkexpr(EA); + IRType ty = mode64 ? Ity_I64 : Ity_I32; + + vassert(rS >= 0 && rS < 32); + rS--; if (rS < 0) rS = 31; + + for (i = 0; i < maxBytes; i++) { + /* if (nBytes < (i+1)) goto NIA; */ + stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), + Ijk_Boring, + mkSzConst( ty, nextInsnAddr() ) )); + /* check for crossing into a new src register. */ + if ((i % 4) == 0) { + rS++; if (rS == 32) rS = 0; + shift = 24; + } + /* *(EA+i) = 32to8(rS >> shift) */ + vassert(shift == 0 || shift == 8 || shift == 16 || shift == 24); + storeBE( + binop(mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i)), + unop(Iop_32to8, + binop(Iop_Shr32, + mkNarrowTo32(ty, getIReg(rS)), + mkU8(toUChar(shift)))) + ); + shift -= 8; + } + } + + static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar rD_addr = ifieldRegDS(theInstr); + UChar rS_addr = rD_addr; + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UChar NumBytes = rB_addr; + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp t_EA = newTemp(ty); + IRTemp t_nbytes = IRTemp_INVALID; + + *stopHere = False; + + if (opc1 != 0x1F || b0 != 0) { + vex_printf("dis_int_ldst_str(ppc)(opc1)\n"); + return False; + } + + switch (opc2) { + case 0x255: // lswi (Load String Word Immediate, PPC32 p455) + /* NB: does not reject the case where RA is in the range of + registers to be loaded. It should. */ + DIP("lswi r%u,r%u,%d\n", rD_addr, rA_addr, NumBytes); + assign( t_EA, ea_rAor0(rA_addr) ); + if (NumBytes == 8 && !mode64) { + /* Special case hack */ + /* rD = Mem[EA]; (rD+1)%32 = Mem[EA+4] */ + putIReg( rD_addr, + loadBE(Ity_I32, mkexpr(t_EA)) ); + putIReg( (rD_addr+1) % 32, + loadBE(Ity_I32, + binop(Iop_Add32, mkexpr(t_EA), mkU32(4))) ); + } else { + t_nbytes = newTemp(Ity_I32); + assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) ); + generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 32 ); + *stopHere = True; + } + return True; + + case 0x215: // lswx (Load String Word Indexed, PPC32 p456) + /* NB: does not reject the case where RA is in the range of + registers to be loaded. It should. Although considering + that that can only be detected at run time, it's not easy to + do so. */ + if (rD_addr == rA_addr || rD_addr == rB_addr) + return False; + if (rD_addr == 0 && rA_addr == 0) + return False; + DIP("lswx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + t_nbytes = newTemp(Ity_I32); + assign( t_EA, ea_rAor0_idxd(rA_addr,rB_addr) ); + assign( t_nbytes, unop( Iop_8Uto32, getXER_BC() ) ); + generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 128 ); + *stopHere = True; + return True; + + case 0x2D5: // stswi (Store String Word Immediate, PPC32 p528) + DIP("stswi r%u,r%u,%d\n", rS_addr, rA_addr, NumBytes); + assign( t_EA, ea_rAor0(rA_addr) ); + if (NumBytes == 8 && !mode64) { + /* Special case hack */ + /* Mem[EA] = rD; Mem[EA+4] = (rD+1)%32 */ + storeBE( mkexpr(t_EA), + getIReg(rD_addr) ); + storeBE( binop(Iop_Add32, mkexpr(t_EA), mkU32(4)), + getIReg((rD_addr+1) % 32) ); + } else { + t_nbytes = newTemp(Ity_I32); + assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) ); + generate_stsw_sequence( t_nbytes, t_EA, rD_addr, 32 ); + *stopHere = True; + } + return True; + + case 0x295: // stswx (Store String Word Indexed, PPC32 p529) + DIP("stswx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + t_nbytes = newTemp(Ity_I32); + assign( t_EA, ea_rAor0_idxd(rA_addr,rB_addr) ); + assign( t_nbytes, unop( Iop_8Uto32, getXER_BC() ) ); + generate_stsw_sequence( t_nbytes, t_EA, rS_addr, 128 ); + *stopHere = True; + return True; + + default: + vex_printf("dis_int_ldst_str(ppc)(opc2)\n"); + return False; + } + return True; + } + + + /* ------------------------------------------------------------------ + Integer Branch Instructions + ------------------------------------------------------------------ */ + + /* + Branch helper function + ok = BO[2] | ((CTR[0] != 0) ^ BO[1]) + Returns an I32 which is 0x00000000 if the ctr condition failed + and 0xFFFFFFFF otherwise. + */ + static IRExpr* /* :: Ity_I32 */ branch_ctr_ok( UInt BO ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp ok = newTemp(Ity_I32); + + if ((BO >> 2) & 1) { // independent of ctr + assign( ok, mkU32(0xFFFFFFFF) ); + } else { + if ((BO >> 1) & 1) { // ctr == 0 ? + assign( ok, unop( Iop_1Sto32, + binop( mkSzOp(ty, Iop_CmpEQ8), + getGST( PPC_GST_CTR ), + mkSzImm(ty,0))) ); + } else { // ctr != 0 ? + assign( ok, unop( Iop_1Sto32, + binop( mkSzOp(ty, Iop_CmpNE8), + getGST( PPC_GST_CTR ), + mkSzImm(ty,0))) ); + } + } + return mkexpr(ok); + } + + + /* + Branch helper function cond_ok = BO[4] | (CR[BI] == BO[3]) + Returns an I32 which is either 0 if the condition failed or + some arbitrary nonzero value otherwise. */ + + static IRExpr* /* :: Ity_I32 */ branch_cond_ok( UInt BO, UInt BI ) + { + Int where; + IRTemp res = newTemp(Ity_I32); + IRTemp cr_bi = newTemp(Ity_I32); + + if ((BO >> 4) & 1) { + assign( res, mkU32(1) ); + } else { + // ok = (CR[BI] == BO[3]) Note, the following relies on + // getCRbit_anywhere returning a value which + // is either zero or has exactly 1 bit set. + assign( cr_bi, getCRbit_anywhere( BI, &where ) ); + + if ((BO >> 3) & 1) { + /* We can use cr_bi as-is. */ + assign( res, mkexpr(cr_bi) ); + } else { + /* We have to invert the sense of the information held in + cr_bi. For that we need to know which bit + getCRbit_anywhere regards as significant. */ + assign( res, binop(Iop_Xor32, mkexpr(cr_bi), + mkU32(1<whatNext = Dis_StopHere; + + switch (opc1) { + case 0x12: // b (Branch, PPC32 p360) + if (flag_AA) { + tgt = mkSzAddr( ty, extend_s_26to64(LI_u26) ); + } else { + tgt = mkSzAddr( ty, guest_CIA_curr_instr + + (Long)extend_s_26to64(LI_u26) ); + } + if (mode64) { + DIP("b%s%s 0x%llx\n", + flag_LK ? "l" : "", flag_AA ? "a" : "", tgt); + } else { + DIP("b%s%s 0x%x\n", + flag_LK ? "l" : "", flag_AA ? "a" : "", (Addr32)tgt); + } + + if (flag_LK) { + putGST( PPC_GST_LR, e_nia ); + if (vbi->guest_ppc_zap_RZ_at_bl + && vbi->guest_ppc_zap_RZ_at_bl( (ULong)tgt) ) { + IRTemp t_tgt = newTemp(ty); + assign(t_tgt, mode64 ? mkU64(tgt) : mkU32(tgt) ); + make_redzone_AbiHint( vbi, t_tgt, + "branch-and-link (unconditional call)" ); + } + } + + if (resteerOkFn( callback_opaque, tgt )) { + dres->whatNext = Dis_Resteer; + dres->continueAt = tgt; + } else { + irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring; + irsb->next = mkSzImm(ty, tgt); + } + break; + + case 0x10: // bc (Branch Conditional, PPC32 p361) + DIP("bc%s%s 0x%x, 0x%x, 0x%x\n", + flag_LK ? "l" : "", flag_AA ? "a" : "", BO, BI, BD); + + if (!(BO & 0x4)) { + putGST( PPC_GST_CTR, + binop(mkSzOp(ty, Iop_Sub8), + getGST( PPC_GST_CTR ), mkSzImm(ty, 1)) ); + } + + /* This is a bit subtle. ctr_ok is either all 0s or all 1s. + cond_ok is either zero or nonzero, since that's the cheapest + way to compute it. Anding them together gives a value which + is either zero or non zero and so that's what we must test + for in the IRStmt_Exit. */ + assign( ctr_ok, branch_ctr_ok( BO ) ); + assign( cond_ok, branch_cond_ok( BO, BI ) ); + assign( do_branch, + binop(Iop_And32, mkexpr(cond_ok), mkexpr(ctr_ok)) ); + + if (flag_AA) { + tgt = mkSzAddr(ty, extend_s_16to64(BD_u16)); + } else { + tgt = mkSzAddr(ty, guest_CIA_curr_instr + + (Long)extend_s_16to64(BD_u16)); + } + if (flag_LK) + putGST( PPC_GST_LR, e_nia ); + + stmt( IRStmt_Exit( + binop(Iop_CmpNE32, mkexpr(do_branch), mkU32(0)), + flag_LK ? Ijk_Call : Ijk_Boring, + mkSzConst(ty, tgt) ) ); + + irsb->jumpkind = Ijk_Boring; + irsb->next = e_nia; + break; + + case 0x13: + /* For bclr and bcctr, it appears that the lowest two bits of + b11to15 are a branch hint, and so we only need to ensure it's + of the form 000XX. */ + if ((b11to15 & ~3) != 0) { + vex_printf("dis_int_branch(ppc)(0x13,b11to15)(%d)\n", (Int)b11to15); + return False; + } + + switch (opc2) { + case 0x210: // bcctr (Branch Cond. to Count Register, PPC32 p363) + if ((BO & 0x4) == 0) { // "decr and test CTR" option invalid + vex_printf("dis_int_branch(ppc)(bcctr,BO)\n"); + return False; + } + DIP("bcctr%s 0x%x, 0x%x\n", flag_LK ? "l" : "", BO, BI); + + assign( cond_ok, branch_cond_ok( BO, BI ) ); + + /* FIXME: this is confusing. lr_old holds the old value + of ctr, not lr :-) */ + assign( lr_old, addr_align( getGST( PPC_GST_CTR ), 4 )); + + if (flag_LK) + putGST( PPC_GST_LR, e_nia ); + + stmt( IRStmt_Exit( + binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)), + Ijk_Boring, + c_nia )); + + if (flag_LK && vbi->guest_ppc_zap_RZ_at_bl) { + make_redzone_AbiHint( vbi, lr_old, + "b-ctr-l (indirect call)" ); + } + + irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring; + irsb->next = mkexpr(lr_old); + break; + + case 0x010: { // bclr (Branch Cond. to Link Register, PPC32 p365) + Bool vanilla_return = False; + if ((BO & 0x14 /* 1z1zz */) == 0x14 && flag_LK == 0) { + DIP("blr\n"); + vanilla_return = True; + } else { + DIP("bclr%s 0x%x, 0x%x\n", flag_LK ? "l" : "", BO, BI); + } + + if (!(BO & 0x4)) { + putGST( PPC_GST_CTR, + binop(mkSzOp(ty, Iop_Sub8), + getGST( PPC_GST_CTR ), mkSzImm(ty, 1)) ); + } + + /* See comments above for 'bc' about this */ + assign( ctr_ok, branch_ctr_ok( BO ) ); + assign( cond_ok, branch_cond_ok( BO, BI ) ); + assign( do_branch, + binop(Iop_And32, mkexpr(cond_ok), mkexpr(ctr_ok)) ); + + assign( lr_old, addr_align( getGST( PPC_GST_LR ), 4 )); + + if (flag_LK) + putGST( PPC_GST_LR, e_nia ); + + stmt( IRStmt_Exit( + binop(Iop_CmpEQ32, mkexpr(do_branch), mkU32(0)), + Ijk_Boring, + c_nia )); + + if (vanilla_return && vbi->guest_ppc_zap_RZ_at_blr) { + make_redzone_AbiHint( vbi, lr_old, + "branch-to-lr (unconditional return)" ); + } + + /* blrl is pretty strange; it's like a return that sets the + return address of its caller to the insn following this + one. Mark it as a return. */ + irsb->jumpkind = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */ + irsb->next = mkexpr(lr_old); + break; + } + default: + vex_printf("dis_int_branch(ppc)(opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_int_branch(ppc)(opc1)\n"); + return False; + } + + return True; + } + + + + /* + Condition Register Logical Instructions + */ + static Bool dis_cond_logic ( UInt theInstr ) + { + /* XL-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar crbD_addr = ifieldRegDS(theInstr); + UChar crfD_addr = toUChar( IFIELD(theInstr, 23, 3) ); + UChar crbA_addr = ifieldRegA(theInstr); + UChar crfS_addr = toUChar( IFIELD(theInstr, 18, 3) ); + UChar crbB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRTemp crbD = newTemp(Ity_I32); + IRTemp crbA = newTemp(Ity_I32); + IRTemp crbB = newTemp(Ity_I32); + + if (opc1 != 19 || b0 != 0) { + vex_printf("dis_cond_logic(ppc)(opc1)\n"); + return False; + } + + if (opc2 == 0) { // mcrf (Move Cond Reg Field, PPC32 p464) + if (((crbD_addr & 0x3) != 0) || + ((crbA_addr & 0x3) != 0) || (crbB_addr != 0)) { + vex_printf("dis_cond_logic(ppc)(crbD|crbA|crbB != 0)\n"); + return False; + } + DIP("mcrf cr%u,cr%u\n", crfD_addr, crfS_addr); + putCR0( crfD_addr, getCR0( crfS_addr) ); + putCR321( crfD_addr, getCR321(crfS_addr) ); + } else { + assign( crbA, getCRbit(crbA_addr) ); + if (crbA_addr == crbB_addr) + crbB = crbA; + else + assign( crbB, getCRbit(crbB_addr) ); + + switch (opc2) { + case 0x101: // crand (Cond Reg AND, PPC32 p372) + DIP("crand crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, binop(Iop_And32, mkexpr(crbA), mkexpr(crbB)) ); + break; + case 0x081: // crandc (Cond Reg AND w. Complement, PPC32 p373) + DIP("crandc crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, binop(Iop_And32, + mkexpr(crbA), + unop(Iop_Not32, mkexpr(crbB))) ); + break; + case 0x121: // creqv (Cond Reg Equivalent, PPC32 p374) + DIP("creqv crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, unop(Iop_Not32, + binop(Iop_Xor32, mkexpr(crbA), mkexpr(crbB))) ); + break; + case 0x0E1: // crnand (Cond Reg NAND, PPC32 p375) + DIP("crnand crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, unop(Iop_Not32, + binop(Iop_And32, mkexpr(crbA), mkexpr(crbB))) ); + break; + case 0x021: // crnor (Cond Reg NOR, PPC32 p376) + DIP("crnor crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, unop(Iop_Not32, + binop(Iop_Or32, mkexpr(crbA), mkexpr(crbB))) ); + break; + case 0x1C1: // cror (Cond Reg OR, PPC32 p377) + DIP("cror crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, binop(Iop_Or32, mkexpr(crbA), mkexpr(crbB)) ); + break; + case 0x1A1: // crorc (Cond Reg OR w. Complement, PPC32 p378) + DIP("crorc crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, binop(Iop_Or32, + mkexpr(crbA), + unop(Iop_Not32, mkexpr(crbB))) ); + break; + case 0x0C1: // crxor (Cond Reg XOR, PPC32 p379) + DIP("crxor crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr); + assign( crbD, binop(Iop_Xor32, mkexpr(crbA), mkexpr(crbB)) ); + break; + default: + vex_printf("dis_cond_logic(ppc)(opc2)\n"); + return False; + } + + putCRbit( crbD_addr, mkexpr(crbD) ); + } + return True; + } + + + /* + Trap instructions + */ + + /* Do the code generation for a trap. Returned Bool is true iff + this is an unconditional trap. If the two arg IRExpr*s are + Ity_I32s then the comparison is 32-bit. If they are Ity_I64s + then they are 64-bit, and we must be disassembling 64-bit + instructions. */ + static Bool do_trap ( UChar TO, + IRExpr* argL0, IRExpr* argR0, Addr64 cia ) + { + IRTemp argL, argR; + IRExpr *argLe, *argRe, *cond, *tmp; + + Bool is32bit = typeOfIRExpr(irsb->tyenv, argL0 ) == Ity_I32; + + IROp opAND = is32bit ? Iop_And32 : Iop_And64; + IROp opOR = is32bit ? Iop_Or32 : Iop_Or64; + IROp opCMPORDS = is32bit ? Iop_CmpORD32S : Iop_CmpORD64S; + IROp opCMPORDU = is32bit ? Iop_CmpORD32U : Iop_CmpORD64U; + IROp opCMPNE = is32bit ? Iop_CmpNE32 : Iop_CmpNE64; + IROp opCMPEQ = is32bit ? Iop_CmpEQ32 : Iop_CmpEQ64; + IRExpr* const0 = is32bit ? mkU32(0) : mkU64(0); + IRExpr* const2 = is32bit ? mkU32(2) : mkU64(2); + IRExpr* const4 = is32bit ? mkU32(4) : mkU64(4); + IRExpr* const8 = is32bit ? mkU32(8) : mkU64(8); + + const UChar b11100 = 0x1C; + const UChar b00111 = 0x07; + + if (is32bit) { + vassert( typeOfIRExpr(irsb->tyenv, argL0) == Ity_I32 ); + vassert( typeOfIRExpr(irsb->tyenv, argR0) == Ity_I32 ); + } else { + vassert( typeOfIRExpr(irsb->tyenv, argL0) == Ity_I64 ); + vassert( typeOfIRExpr(irsb->tyenv, argR0) == Ity_I64 ); + vassert( mode64 ); + } + + if ((TO & b11100) == b11100 || (TO & b00111) == b00111) { + /* Unconditional trap. Just do the exit without + testing the arguments. */ + stmt( IRStmt_Exit( + binop(opCMPEQ, const0, const0), + Ijk_SigTRAP, + mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia) + )); + return True; /* unconditional trap */ + } + + if (is32bit) { + argL = newTemp(Ity_I32); + argR = newTemp(Ity_I32); + } else { + argL = newTemp(Ity_I64); + argR = newTemp(Ity_I64); + } + + assign( argL, argL0 ); + assign( argR, argR0 ); + + argLe = mkexpr(argL); + argRe = mkexpr(argR); + + cond = const0; + if (TO & 16) { // L s R + tmp = binop(opAND, binop(opCMPORDS, argLe, argRe), const4); + cond = binop(opOR, tmp, cond); + } + if (TO & 4) { // L == R + tmp = binop(opAND, binop(opCMPORDS, argLe, argRe), const2); + cond = binop(opOR, tmp, cond); + } + if (TO & 2) { // L u R + tmp = binop(opAND, binop(opCMPORDU, argLe, argRe), const4); + cond = binop(opOR, tmp, cond); + } + stmt( IRStmt_Exit( + binop(opCMPNE, cond, const0), + Ijk_SigTRAP, + mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia) + )); + return False; /* not an unconditional trap */ + } + + static Bool dis_trapi ( UInt theInstr, + /*OUT*/DisResult* dres ) + { + /* D-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar TO = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UInt uimm16 = ifieldUIMM16(theInstr); + ULong simm16 = extend_s_16to64(uimm16); + Addr64 cia = guest_CIA_curr_instr; + IRType ty = mode64 ? Ity_I64 : Ity_I32; + Bool uncond = False; + + switch (opc1) { + case 0x03: // twi (Trap Word Immediate, PPC32 p548) + uncond = do_trap( TO, + mode64 ? unop(Iop_64to32, getIReg(rA_addr)) + : getIReg(rA_addr), + mkU32( (UInt)simm16 ), + cia ); + if (TO == 4) { + DIP("tweqi r%u,%d\n", (UInt)rA_addr, (Int)simm16); + } else { + DIP("tw%di r%u,%d\n", (Int)TO, (UInt)rA_addr, (Int)simm16); + } + break; + case 0x02: // tdi + if (!mode64) + return False; + uncond = do_trap( TO, getIReg(rA_addr), mkU64( (ULong)simm16 ), cia ); + if (TO == 4) { + DIP("tdeqi r%u,%d\n", (UInt)rA_addr, (Int)simm16); + } else { + DIP("td%di r%u,%d\n", (Int)TO, (UInt)rA_addr, (Int)simm16); + } + break; + default: + return False; + } + + if (uncond) { + /* If the trap shows signs of being unconditional, don't + continue decoding past it. */ + irsb->next = mkSzImm( ty, nextInsnAddr() ); + irsb->jumpkind = Ijk_Boring; + dres->whatNext = Dis_StopHere; + } + + return True; + } + + static Bool dis_trap ( UInt theInstr, + /*OUT*/DisResult* dres ) + { + /* X-Form */ + UInt opc2 = ifieldOPClo10(theInstr); + UChar TO = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + Addr64 cia = guest_CIA_curr_instr; + IRType ty = mode64 ? Ity_I64 : Ity_I32; + Bool uncond = False; + + if (ifieldBIT0(theInstr) != 0) + return False; + + switch (opc2) { + case 0x004: // tw (Trap Word, PPC64 p540) + uncond = do_trap( TO, + mode64 ? unop(Iop_64to32, getIReg(rA_addr)) + : getIReg(rA_addr), + mode64 ? unop(Iop_64to32, getIReg(rB_addr)) + : getIReg(rB_addr), + cia ); + if (TO == 4) { + DIP("tweq r%u,r%u\n", (UInt)rA_addr, (UInt)rB_addr); + } else { + DIP("tw%d r%u,r%u\n", (Int)TO, (UInt)rA_addr, (UInt)rB_addr); + } + break; + case 0x044: // td (Trap Doubleword, PPC64 p534) + if (!mode64) + return False; + uncond = do_trap( TO, getIReg(rA_addr), getIReg(rB_addr), cia ); + if (TO == 4) { + DIP("tdeq r%u,r%u\n", (UInt)rA_addr, (UInt)rB_addr); + } else { + DIP("td%d r%u,r%u\n", (Int)TO, (UInt)rA_addr, (UInt)rB_addr); + } + break; + default: + return False; + } + + if (uncond) { + /* If the trap shows signs of being unconditional, don't + continue decoding past it. */ + irsb->next = mkSzImm( ty, nextInsnAddr() ); + irsb->jumpkind = Ijk_Boring; + dres->whatNext = Dis_StopHere; + } + + return True; + } + + + /* + System Linkage Instructions + */ + static Bool dis_syslink ( UInt theInstr, + VexAbiInfo* abiinfo, DisResult* dres ) + { + IRType ty = mode64 ? Ity_I64 : Ity_I32; + + if (theInstr != 0x44000002) { + vex_printf("dis_syslink(ppc)(theInstr)\n"); + return False; + } + + // sc (System Call, PPC32 p504) + DIP("sc\n"); + + /* Copy CIA into the IP_AT_SYSCALL pseudo-register, so that on AIX + Valgrind can back the guest up to this instruction if it needs + to restart the syscall. */ + putGST( PPC_GST_IP_AT_SYSCALL, getGST( PPC_GST_CIA ) ); + + /* It's important that all ArchRegs carry their up-to-date value + at this point. So we declare an end-of-block here, which + forces any TempRegs caching ArchRegs to be flushed. */ + irsb->next = abiinfo->guest_ppc_sc_continues_at_LR + ? getGST( PPC_GST_LR ) + : mkSzImm( ty, nextInsnAddr() ); + irsb->jumpkind = Ijk_Sys_syscall; + + dres->whatNext = Dis_StopHere; + return True; + } + + + /* + Memory Synchronization Instructions + + Note on Reservations: + We rely on the assumption that V will in fact only allow one thread at + once to run. In effect, a thread can make a reservation, but we don't + check any stores it does. Instead, the reservation is cancelled when + the scheduler switches to another thread (run_thread_for_a_while()). + */ + static Bool dis_memsync ( UInt theInstr ) + { + /* X-Form, XL-Form */ + UChar opc1 = ifieldOPC(theInstr); + UInt b11to25 = IFIELD(theInstr, 11, 15); + UChar flag_L = ifieldRegDS(theInstr); + UInt b11to20 = IFIELD(theInstr, 11, 10); + UChar rD_addr = ifieldRegDS(theInstr); + UChar rS_addr = rD_addr; + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + + assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) ); + + switch (opc1) { + /* XL-Form */ + case 0x13: // isync (Instruction Synchronize, PPC32 p432) + if (opc2 != 0x096) { + vex_printf("dis_memsync(ppc)(0x13,opc2)\n"); + return False; + } + if (b11to25 != 0 || b0 != 0) { + vex_printf("dis_memsync(ppc)(0x13,b11to25|b0)\n"); + return False; + } + DIP("isync\n"); + stmt( IRStmt_MBE(Imbe_Fence) ); + break; + + /* X-Form */ + case 0x1F: + switch (opc2) { + case 0x356: // eieio (Enforce In-Order Exec of I/O, PPC32 p394) + if (b11to25 != 0 || b0 != 0) { + vex_printf("dis_memsync(ppc)(eiei0,b11to25|b0)\n"); + return False; + } + DIP("eieio\n"); + /* Insert a memory fence, just to be on the safe side. */ + stmt( IRStmt_MBE(Imbe_Fence) ); + break; + + case 0x014: // lwarx (Load Word and Reserve Indexed, PPC32 p458) + /* According to the PowerPC ISA version 2.05, b0 (called EH + in the documentation) is merely a hint bit to the + hardware, I think as to whether or not contention is + likely. So we can just ignore it. */ + DIP("lwarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0); + + // trap if misaligned + gen_SIGBUS_if_misaligned( EA, 4 ); + + // and actually do the load + putIReg( rD_addr, mkWidenFrom32(ty, loadlinkedBE(Ity_I32, mkexpr(EA)), + False) ); + break; + + case 0x096: { + // stwcx. (Store Word Conditional Indexed, PPC32 p532) + // Note this has to handle stwcx. in both 32- and 64-bit modes, + // so isn't quite as straightforward as it might otherwise be. + IRTemp rS = newTemp(Ity_I32); + IRTemp resSC; + if (b0 != 1) { + vex_printf("dis_memsync(ppc)(stwcx.,b0)\n"); + return False; + } + DIP("stwcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + + // trap if misaligned + gen_SIGBUS_if_misaligned( EA, 4 ); + + // Get the data to be stored, and narrow to 32 bits if necessary + assign( rS, mkNarrowTo32(ty, getIReg(rS_addr)) ); + + // Do the store, and get success/failure bit into resSC + resSC = newTemp(Ity_I1); + stmt( IRStmt_Store(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) ); + + // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure + // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success + putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1))); + putCR0(0, getXER_SO()); + + /* Note: + If resaddr != lwarx_resaddr, CR0[EQ] is undefined, and + whether rS is stored is dependent on that value. */ + /* So I guess we can just ignore this case? */ + break; + } + + case 0x256: // sync (Synchronize, PPC32 p543), + // also lwsync (L==1), ptesync (L==2) + /* http://sources.redhat.com/ml/binutils/2000-12/msg00311.html + + The PowerPC architecture used in IBM chips has expanded + the sync instruction into two variants: lightweight sync + and heavyweight sync. The original sync instruction is + the new heavyweight sync and lightweight sync is a strict + subset of the heavyweight sync functionality. This allows + the programmer to specify a less expensive operation on + high-end systems when the full sync functionality is not + necessary. + + The basic "sync" mnemonic now utilizes an operand. "sync" + without an operand now becomes a extended mnemonic for + heavyweight sync. Processors without the lwsync + instruction will not decode the L field and will perform a + heavyweight sync. Everything is backward compatible. + + sync = sync 0 + lwsync = sync 1 + ptesync = sync 2 *** TODO - not implemented *** + */ + if (b11to20 != 0 || b0 != 0) { + vex_printf("dis_memsync(ppc)(sync/lwsync,b11to20|b0)\n"); + return False; + } + if (flag_L != 0/*sync*/ && flag_L != 1/*lwsync*/) { + vex_printf("dis_memsync(ppc)(sync/lwsync,flag_L)\n"); + return False; + } + DIP("%ssync\n", flag_L == 1 ? "lw" : ""); + /* Insert a memory fence. It's sometimes important that these + are carried through to the generated code. */ + stmt( IRStmt_MBE(Imbe_Fence) ); + break; + + /* 64bit Memsync */ + case 0x054: // ldarx (Load DWord and Reserve Indexed, PPC64 p473) + /* According to the PowerPC ISA version 2.05, b0 (called EH + in the documentation) is merely a hint bit to the + hardware, I think as to whether or not contention is + likely. So we can just ignore it. */ + if (!mode64) + return False; + DIP("ldarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0); + + // trap if misaligned + gen_SIGBUS_if_misaligned( EA, 8 ); + + // and actually do the load + putIReg( rD_addr, loadlinkedBE(Ity_I64, mkexpr(EA)) ); + break; + + case 0x0D6: { // stdcx. (Store DWord Condition Indexd, PPC64 p581) + // A marginally simplified version of the stwcx. case + IRTemp rS = newTemp(Ity_I64); + IRTemp resSC; + if (b0 != 1) { + vex_printf("dis_memsync(ppc)(stdcx.,b0)\n"); + return False; + } + if (!mode64) + return False; + DIP("stdcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + + // trap if misaligned + gen_SIGBUS_if_misaligned( EA, 8 ); + + // Get the data to be stored + assign( rS, getIReg(rS_addr) ); + + // Do the store, and get success/failure bit into resSC + resSC = newTemp(Ity_I1); + stmt( IRStmt_Store(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) ); + + // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure + // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success + putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1))); + putCR0(0, getXER_SO()); + + /* Note: + If resaddr != lwarx_resaddr, CR0[EQ] is undefined, and + whether rS is stored is dependent on that value. */ + /* So I guess we can just ignore this case? */ + break; + } + + default: + vex_printf("dis_memsync(ppc)(opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_memsync(ppc)(opc1)\n"); + return False; + } + return True; + } + + + + /* + Integer Shift Instructions + */ + static Bool dis_int_shift ( UInt theInstr ) + { + /* X-Form, XS-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar rS_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UChar sh_imm = rB_addr; + UInt opc2 = ifieldOPClo10(theInstr); + UChar b1 = ifieldBIT1(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp rA = newTemp(ty); + IRTemp rS = newTemp(ty); + IRTemp rB = newTemp(ty); + IRTemp outofrange = newTemp(Ity_I8); + IRTemp rS_lo32 = newTemp(Ity_I32); + IRTemp rB_lo32 = newTemp(Ity_I32); + IRExpr* e_tmp; + + assign( rS, getIReg(rS_addr) ); + assign( rB, getIReg(rB_addr) ); + assign( rS_lo32, mkNarrowTo32(ty, mkexpr(rS)) ); + assign( rB_lo32, mkNarrowTo32(ty, mkexpr(rB)) ); + + if (opc1 == 0x1F) { + switch (opc2) { + case 0x018: { // slw (Shift Left Word, PPC32 p505) + DIP("slw%s r%u,r%u,r%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, rB_addr); + /* rA = rS << rB */ + /* ppc32 semantics are: + slw(x,y) = (x << (y & 31)) -- primary result + & ~((y << 26) >>s 31) -- make result 0 + for y in 32 .. 63 + */ + e_tmp = + binop( Iop_And32, + binop( Iop_Shl32, + mkexpr(rS_lo32), + unop( Iop_32to8, + binop(Iop_And32, + mkexpr(rB_lo32), mkU32(31)))), + unop( Iop_Not32, + binop( Iop_Sar32, + binop(Iop_Shl32, mkexpr(rB_lo32), mkU8(26)), + mkU8(31))) ); + assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */False) ); + break; + } + + case 0x318: { // sraw (Shift Right Alg Word, PPC32 p506) + IRTemp sh_amt = newTemp(Ity_I32); + DIP("sraw%s r%u,r%u,r%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, rB_addr); + /* JRS: my reading of the (poorly worded) PPC32 doc p506 is: + amt = rB & 63 + rA = Sar32( rS, amt > 31 ? 31 : amt ) + XER.CA = amt > 31 ? sign-of-rS : (computation as per srawi) + */ + assign( sh_amt, binop(Iop_And32, mkU32(0x3F), + mkexpr(rB_lo32)) ); + assign( outofrange, + unop( Iop_1Uto8, + binop(Iop_CmpLT32U, mkU32(31), + mkexpr(sh_amt)) )); + e_tmp = binop( Iop_Sar32, + mkexpr(rS_lo32), + unop( Iop_32to8, + IRExpr_Mux0X( mkexpr(outofrange), + mkexpr(sh_amt), + mkU32(31)) ) ); + assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */True) ); + + set_XER_CA( ty, PPCG_FLAG_OP_SRAW, + mkexpr(rA), + mkWidenFrom32(ty, mkexpr(rS_lo32), True), + mkWidenFrom32(ty, mkexpr(sh_amt), True ), + mkWidenFrom32(ty, getXER_CA32(), True) ); + break; + } + + case 0x338: // srawi (Shift Right Alg Word Immediate, PPC32 p507) + DIP("srawi%s r%u,r%u,%d\n", flag_rC ? ".":"", + rA_addr, rS_addr, sh_imm); + vassert(sh_imm < 32); + if (mode64) { + assign( rA, binop(Iop_Sar64, + binop(Iop_Shl64, getIReg(rS_addr), + mkU8(32)), + mkU8(32 + sh_imm)) ); + } else { + assign( rA, binop(Iop_Sar32, mkexpr(rS_lo32), + mkU8(sh_imm)) ); + } + + set_XER_CA( ty, PPCG_FLAG_OP_SRAWI, + mkexpr(rA), + mkWidenFrom32(ty, mkexpr(rS_lo32), /* Syned */True), + mkSzImm(ty, sh_imm), + mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) ); + break; + + case 0x218: // srw (Shift Right Word, PPC32 p508) + DIP("srw%s r%u,r%u,r%u\n", flag_rC ? ".":"", + rA_addr, rS_addr, rB_addr); + /* rA = rS >>u rB */ + /* ppc32 semantics are: + srw(x,y) = (x >>u (y & 31)) -- primary result + & ~((y << 26) >>s 31) -- make result 0 + for y in 32 .. 63 + */ + e_tmp = + binop( + Iop_And32, + binop( Iop_Shr32, + mkexpr(rS_lo32), + unop( Iop_32to8, + binop(Iop_And32, mkexpr(rB_lo32), + mkU32(31)))), + unop( Iop_Not32, + binop( Iop_Sar32, + binop(Iop_Shl32, mkexpr(rB_lo32), + mkU8(26)), + mkU8(31)))); + assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */False) ); + break; + + + /* 64bit Shifts */ + case 0x01B: // sld (Shift Left DWord, PPC64 p568) + DIP("sld%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + /* rA = rS << rB */ + /* ppc64 semantics are: + slw(x,y) = (x << (y & 63)) -- primary result + & ~((y << 57) >>s 63) -- make result 0 + for y in 64 .. + */ + assign( rA, + binop( + Iop_And64, + binop( Iop_Shl64, + mkexpr(rS), + unop( Iop_64to8, + binop(Iop_And64, mkexpr(rB), mkU64(63)))), + unop( Iop_Not64, + binop( Iop_Sar64, + binop(Iop_Shl64, mkexpr(rB), mkU8(57)), + mkU8(63)))) ); + break; + + case 0x31A: { // srad (Shift Right Alg DWord, PPC64 p570) + IRTemp sh_amt = newTemp(Ity_I64); + DIP("srad%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + /* amt = rB & 127 + rA = Sar64( rS, amt > 63 ? 63 : amt ) + XER.CA = amt > 63 ? sign-of-rS : (computation as per srawi) + */ + assign( sh_amt, binop(Iop_And64, mkU64(0x7F), mkexpr(rB)) ); + assign( outofrange, + unop( Iop_1Uto8, + binop(Iop_CmpLT64U, mkU64(63), + mkexpr(sh_amt)) )); + assign( rA, + binop( Iop_Sar64, + mkexpr(rS), + unop( Iop_64to8, + IRExpr_Mux0X( mkexpr(outofrange), + mkexpr(sh_amt), + mkU64(63)) )) + ); + set_XER_CA( ty, PPCG_FLAG_OP_SRAD, + mkexpr(rA), mkexpr(rS), mkexpr(sh_amt), + mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) ); + break; + } + + case 0x33A: case 0x33B: // sradi (Shr Alg DWord Imm, PPC64 p571) + sh_imm |= b1<<5; + vassert(sh_imm < 64); + DIP("sradi%s r%u,r%u,%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, sh_imm); + assign( rA, binop(Iop_Sar64, getIReg(rS_addr), mkU8(sh_imm)) ); + + set_XER_CA( ty, PPCG_FLAG_OP_SRADI, + mkexpr(rA), + getIReg(rS_addr), + mkU64(sh_imm), + mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) ); + break; + + case 0x21B: // srd (Shift Right DWord, PPC64 p574) + DIP("srd%s r%u,r%u,r%u\n", + flag_rC ? ".":"", rA_addr, rS_addr, rB_addr); + /* rA = rS >>u rB */ + /* ppc semantics are: + srw(x,y) = (x >>u (y & 63)) -- primary result + & ~((y << 57) >>s 63) -- make result 0 + for y in 64 .. 127 + */ + assign( rA, + binop( + Iop_And64, + binop( Iop_Shr64, + mkexpr(rS), + unop( Iop_64to8, + binop(Iop_And64, mkexpr(rB), mkU64(63)))), + unop( Iop_Not64, + binop( Iop_Sar64, + binop(Iop_Shl64, mkexpr(rB), mkU8(57)), + mkU8(63)))) ); + break; + + default: + vex_printf("dis_int_shift(ppc)(opc2)\n"); + return False; + } + } else { + vex_printf("dis_int_shift(ppc)(opc1)\n"); + return False; + } + + putIReg( rA_addr, mkexpr(rA) ); + + if (flag_rC) { + set_CR0( mkexpr(rA) ); + } + return True; + } + + + + /* + Integer Load/Store Reverse Instructions + */ + /* Generates code to swap the byte order in an Ity_I32. */ + static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t ) + { + vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32); + return + binop(Iop_Or32, + binop(Iop_Shl32, mkexpr(t), mkU8(24)), + binop(Iop_Or32, + binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)), + mkU32(0x00FF0000)), + binop(Iop_Or32, + binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)), + mkU32(0x0000FF00)), + binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)), + mkU32(0x000000FF) ) + ))); + } + + /* Generates code to swap the byte order in the lower half of an Ity_I32, + and zeroes the upper half. */ + static IRExpr* /* :: Ity_I32 */ gen_byterev16 ( IRTemp t ) + { + vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32); + return + binop(Iop_Or32, + binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)), + mkU32(0x0000FF00)), + binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)), + mkU32(0x000000FF)) + ); + } + + static Bool dis_int_ldst_rev ( UInt theInstr ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar rD_addr = ifieldRegDS(theInstr); + UChar rS_addr = rD_addr; + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + IRTemp w1 = newTemp(Ity_I32); + IRTemp w2 = newTemp(Ity_I32); + + if (opc1 != 0x1F || b0 != 0) { + vex_printf("dis_int_ldst_rev(ppc)(opc1|b0)\n"); + return False; + } + + assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) ); + + switch (opc2) { + + case 0x316: // lhbrx (Load Halfword Byte-Reverse Indexed, PPC32 p449) + DIP("lhbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + assign( w1, unop(Iop_16Uto32, loadBE(Ity_I16, mkexpr(EA))) ); + assign( w2, gen_byterev16(w1) ); + putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2), + /* Signed */False) ); + break; + + case 0x216: // lwbrx (Load Word Byte-Reverse Indexed, PPC32 p459) + DIP("lwbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr); + assign( w1, loadBE(Ity_I32, mkexpr(EA)) ); + assign( w2, gen_byterev32(w1) ); + putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2), + /* Signed */False) ); + break; + + case 0x396: // sthbrx (Store Half Word Byte-Reverse Indexed, PPC32 p523) + DIP("sthbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) ); + storeBE( mkexpr(EA), unop(Iop_32to16, gen_byterev16(w1)) ); + break; + + case 0x296: // stwbrx (Store Word Byte-Reverse Indxd, PPC32 p531) + DIP("stwbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr); + assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) ); + storeBE( mkexpr(EA), gen_byterev32(w1) ); + break; + + default: + vex_printf("dis_int_ldst_rev(ppc)(opc2)\n"); + return False; + } + return True; + } + + + + /* + Processor Control Instructions + */ + static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr ) + { + UChar opc1 = ifieldOPC(theInstr); + + /* X-Form */ + UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); + UChar b21to22 = toUChar( IFIELD( theInstr, 21, 2 ) ); + UChar rD_addr = ifieldRegDS(theInstr); + UInt b11to20 = IFIELD( theInstr, 11, 10 ); + + /* XFX-Form */ + UChar rS_addr = rD_addr; + UInt SPR = b11to20; + UInt TBR = b11to20; + UChar b20 = toUChar( IFIELD( theInstr, 20, 1 ) ); + UInt CRM = IFIELD( theInstr, 12, 8 ); + UChar b11 = toUChar( IFIELD( theInstr, 11, 1 ) ); + + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp rS = newTemp(ty); + assign( rS, getIReg(rS_addr) ); + + /* Reorder SPR field as per PPC32 p470 */ + SPR = ((SPR & 0x1F) << 5) | ((SPR >> 5) & 0x1F); + /* Reorder TBR field as per PPC32 p475 */ + TBR = ((TBR & 31) << 5) | ((TBR >> 5) & 31); + + if (opc1 != 0x1F || b0 != 0) { + vex_printf("dis_proc_ctl(ppc)(opc1|b0)\n"); + return False; + } + + switch (opc2) { + /* X-Form */ + case 0x200: { // mcrxr (Move to Cond Register from XER, PPC32 p466) + if (b21to22 != 0 || b11to20 != 0) { + vex_printf("dis_proc_ctl(ppc)(mcrxr,b21to22|b11to20)\n"); + return False; + } + DIP("mcrxr crf%d\n", crfD); + /* Move XER[0-3] (the top 4 bits of XER) to CR[crfD] */ + putGST_field( PPC_GST_CR, + getGST_field( PPC_GST_XER, 7 ), + crfD ); + + // Clear XER[0-3] + putXER_SO( mkU8(0) ); + putXER_OV( mkU8(0) ); + putXER_CA( mkU8(0) ); + break; + } + + case 0x013: + // b11to20==0: mfcr (Move from Cond Register, PPC32 p467) + // b20==1 & b11==0: mfocrf (Move from One CR Field) + // However it seems that the 'mfcr' behaviour is an acceptable + // implementation of mfocr (from the 2.02 arch spec) + if (b11to20 == 0) { + DIP("mfcr r%u\n", rD_addr); + putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_CR ), + /* Signed */False) ); + break; + } + if (b20 == 1 && b11 == 0) { + DIP("mfocrf r%u,%u\n", rD_addr, CRM); + putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_CR ), + /* Signed */False) ); + break; + } + /* not decodable */ + return False; + + /* XFX-Form */ + case 0x153: // mfspr (Move from Special-Purpose Register, PPC32 p470) + + switch (SPR) { // Choose a register... + case 0x1: + DIP("mfxer r%u\n", rD_addr); + putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_XER ), + /* Signed */False) ); + break; + case 0x8: + DIP("mflr r%u\n", rD_addr); + putIReg( rD_addr, getGST( PPC_GST_LR ) ); + break; + case 0x9: + DIP("mfctr r%u\n", rD_addr); + putIReg( rD_addr, getGST( PPC_GST_CTR ) ); + break; + case 0x100: + DIP("mfvrsave r%u\n", rD_addr); + putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_VRSAVE ), + /* Signed */False) ); + break; + + case 0x103: + DIP("mfspr r%u, SPRG3(readonly)\n", rD_addr); + putIReg( rD_addr, getGST( PPC_GST_SPRG3_RO ) ); + break; + + /* Even a lowly PPC7400 can run the associated helper, so no + obvious need for feature testing at this point. */ + case 268 /* 0x10C */: + case 269 /* 0x10D */: { + UInt arg = SPR==268 ? 0 : 1; + IRTemp val = newTemp(Ity_I32); + IRExpr** args = mkIRExprVec_1( mkU32(arg) ); + IRDirty* d = unsafeIRDirty_1_N( + val, + 0/*regparms*/, + "ppc32g_dirtyhelper_MFSPR_268_269", + fnptr_to_fnentry + (vbi, &ppc32g_dirtyhelper_MFSPR_268_269), + args + ); + /* execute the dirty call, dumping the result in val. */ + stmt( IRStmt_Dirty(d) ); + putIReg( rD_addr, + mkWidenFrom32(ty, mkexpr(val), False/*unsigned*/) ); + DIP("mfspr r%u,%u", rD_addr, (UInt)SPR); + break; + } + + /* Again, runs natively on PPC7400 (7447, really). Not + bothering with a feature test. */ + case 287: /* 0x11F */ { + IRTemp val = newTemp(Ity_I32); + IRExpr** args = mkIRExprVec_0(); + IRDirty* d = unsafeIRDirty_1_N( + val, + 0/*regparms*/, + "ppc32g_dirtyhelper_MFSPR_287", + fnptr_to_fnentry + (vbi, &ppc32g_dirtyhelper_MFSPR_287), + args + ); + /* execute the dirty call, dumping the result in val. */ + stmt( IRStmt_Dirty(d) ); + putIReg( rD_addr, + mkWidenFrom32(ty, mkexpr(val), False/*unsigned*/) ); + DIP("mfspr r%u,%u", rD_addr, (UInt)SPR); + break; + } + + default: + vex_printf("dis_proc_ctl(ppc)(mfspr,SPR)(0x%x)\n", SPR); + return False; + } + break; + + case 0x173: { // mftb (Move from Time Base, PPC32 p475) + IRTemp val = newTemp(Ity_I64); + IRExpr** args = mkIRExprVec_0(); + IRDirty* d = unsafeIRDirty_1_N( + val, + 0/*regparms*/, + "ppcg_dirtyhelper_MFTB", + fnptr_to_fnentry(vbi, &ppcg_dirtyhelper_MFTB), + args ); + /* execute the dirty call, dumping the result in val. */ + stmt( IRStmt_Dirty(d) ); + + switch (TBR) { + case 269: + DIP("mftbu r%u", rD_addr); + putIReg( rD_addr, + mkWidenFrom32(ty, unop(Iop_64HIto32, mkexpr(val)), + /* Signed */False) ); + break; + case 268: + DIP("mftb r%u", rD_addr); + putIReg( rD_addr, (mode64) ? mkexpr(val) : + unop(Iop_64to32, mkexpr(val)) ); + break; + default: + return False; /* illegal instruction */ + } + break; + } + + case 0x090: { + // b20==0: mtcrf (Move to Cond Register Fields, PPC32 p477) + // b20==1: mtocrf (Move to One Cond Reg Field) + Int cr; + UChar shft; + if (b11 != 0) + return False; + if (b20 == 1) { + /* ppc64 v2.02 spec says mtocrf gives undefined outcome if > + 1 field is written. It seems more robust to decline to + decode the insn if so. */ + switch (CRM) { + case 0x01: case 0x02: case 0x04: case 0x08: + case 0x10: case 0x20: case 0x40: case 0x80: + break; + default: + return False; + } + } + DIP("%s 0x%x,r%u\n", b20==1 ? "mtocrf" : "mtcrf", + CRM, rS_addr); + /* Write to each field specified by CRM */ + for (cr = 0; cr < 8; cr++) { + if ((CRM & (1 << (7-cr))) == 0) + continue; + shft = 4*(7-cr); + putGST_field( PPC_GST_CR, + binop(Iop_Shr32, + mkNarrowTo32(ty, mkexpr(rS)), + mkU8(shft)), cr ); + } + break; + } + + case 0x1D3: // mtspr (Move to Special-Purpose Register, PPC32 p483) + + switch (SPR) { // Choose a register... + case 0x1: + DIP("mtxer r%u\n", rS_addr); + putGST( PPC_GST_XER, mkNarrowTo32(ty, mkexpr(rS)) ); + break; + case 0x8: + DIP("mtlr r%u\n", rS_addr); + putGST( PPC_GST_LR, mkexpr(rS) ); + break; + case 0x9: + DIP("mtctr r%u\n", rS_addr); + putGST( PPC_GST_CTR, mkexpr(rS) ); + break; + case 0x100: + DIP("mtvrsave r%u\n", rS_addr); + putGST( PPC_GST_VRSAVE, mkNarrowTo32(ty, mkexpr(rS)) ); + break; + + default: + vex_printf("dis_proc_ctl(ppc)(mtspr,SPR)(%u)\n", SPR); + return False; + } + break; + + default: + vex_printf("dis_proc_ctl(ppc)(opc2)\n"); + return False; + } + return True; + } + + + /* + Cache Management Instructions + */ + static Bool dis_cache_manage ( UInt theInstr, + DisResult* dres, + VexArchInfo* guest_archinfo ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar b21to25 = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + UInt lineszB = guest_archinfo->ppc_cache_line_szB; + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + + /* For dcbt, the lowest two bits of b21to25 encode an + access-direction hint (TH field) which we ignore. Well, that's + what the PowerPC documentation says. In fact xlc -O4 on POWER5 + seems to generate values of 8 and 10 for b21to25. */ + if (opc1 == 0x1F && opc2 == 0x116) { + /* b21to25 &= ~3; */ /* if the docs were true */ + b21to25 = 0; /* blunt instrument */ + } + + if (opc1 != 0x1F || b21to25 != 0 || b0 != 0) { + if (0) vex_printf("dis_cache_manage %d %d %d\n", + (Int)opc1, (Int)b21to25, (Int)b0); + vex_printf("dis_cache_manage(ppc)(opc1|b21to25|b0)\n"); + return False; + } + + /* stay sane .. */ + vassert(lineszB == 32 || lineszB == 64 || lineszB == 128); + + switch (opc2) { + //zz case 0x2F6: // dcba (Data Cache Block Allocate, PPC32 p380) + //zz vassert(0); /* AWAITING TEST CASE */ + //zz DIP("dcba r%u,r%u\n", rA_addr, rB_addr); + //zz if (0) vex_printf("vex ppc->IR: kludged dcba\n"); + //zz break; + + case 0x056: // dcbf (Data Cache Block Flush, PPC32 p382) + DIP("dcbf r%u,r%u\n", rA_addr, rB_addr); + /* nop as far as vex is concerned */ + break; + + case 0x036: // dcbst (Data Cache Block Store, PPC32 p384) + DIP("dcbst r%u,r%u\n", rA_addr, rB_addr); + /* nop as far as vex is concerned */ + break; + + case 0x116: // dcbt (Data Cache Block Touch, PPC32 p385) + DIP("dcbt r%u,r%u\n", rA_addr, rB_addr); + /* nop as far as vex is concerned */ + break; + + case 0x0F6: // dcbtst (Data Cache Block Touch for Store, PPC32 p386) + DIP("dcbtst r%u,r%u\n", rA_addr, rB_addr); + /* nop as far as vex is concerned */ + break; + + case 0x3F6: { // dcbz (Data Cache Block Clear to Zero, PPC32 p387) + /* Clear all bytes in cache block at (rA|0) + rB. */ + IRTemp EA = newTemp(ty); + IRTemp addr = newTemp(ty); + IRExpr* irx_addr; + UInt i; + DIP("dcbz r%u,r%u\n", rA_addr, rB_addr); + + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + + if (mode64) { + /* Round EA down to the start of the containing block. */ + assign( addr, binop( Iop_And64, + mkexpr(EA), + mkU64( ~((ULong)lineszB-1) )) ); + + for (i = 0; i < lineszB / 8; i++) { + irx_addr = binop( Iop_Add64, mkexpr(addr), mkU64(i*8) ); + storeBE( irx_addr, mkU64(0) ); + } + } else { + /* Round EA down to the start of the containing block. */ + assign( addr, binop( Iop_And32, + mkexpr(EA), + mkU32( ~(lineszB-1) )) ); + + for (i = 0; i < lineszB / 4; i++) { + irx_addr = binop( Iop_Add32, mkexpr(addr), mkU32(i*4) ); + storeBE( irx_addr, mkU32(0) ); + } + } + break; + } + + case 0x3D6: { + // icbi (Instruction Cache Block Invalidate, PPC32 p431) + /* Invalidate all translations containing code from the cache + block at (rA|0) + rB. */ + IRTemp EA = newTemp(ty); + IRTemp addr = newTemp(ty); + DIP("icbi r%u,r%u\n", rA_addr, rB_addr); + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + + /* Round EA down to the start of the containing block. */ + assign( addr, binop( mkSzOp(ty, Iop_And8), + mkexpr(EA), + mkSzImm(ty, ~(((ULong)lineszB)-1) )) ); + putGST( PPC_GST_TISTART, mkexpr(addr) ); + putGST( PPC_GST_TILEN, mkSzImm(ty, lineszB) ); + + /* be paranoid ... */ + stmt( IRStmt_MBE(Imbe_Fence) ); + + irsb->jumpkind = Ijk_TInval; + irsb->next = mkSzImm(ty, nextInsnAddr()); + dres->whatNext = Dis_StopHere; + break; + } + + default: + vex_printf("dis_cache_manage(ppc)(opc2)\n"); + return False; + } + return True; + } + + + /*------------------------------------------------------------*/ + /*--- Floating Point Helpers ---*/ + /*------------------------------------------------------------*/ + + /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ + /* Produces a value in 0 .. 3, which is encoded as per the type + IRRoundingMode. PPCRoundingMode encoding is different to + IRRoundingMode, so need to map it. + */ + static IRExpr* /* :: Ity_I32 */ get_IR_roundingmode ( void ) + { + /* + rounding mode | PPC | IR + ------------------------ + to nearest | 00 | 00 + to zero | 01 | 11 + to +infinity | 10 | 10 + to -infinity | 11 | 01 + */ + IRTemp rm_PPC32 = newTemp(Ity_I32); + assign( rm_PPC32, getGST_masked( PPC_GST_FPSCR, MASK_FPSCR_RN ) ); + + // rm_IR = XOR( rm_PPC32, (rm_PPC32 << 1) & 2) + return binop( Iop_Xor32, + mkexpr(rm_PPC32), + binop( Iop_And32, + binop(Iop_Shl32, mkexpr(rm_PPC32), mkU8(1)), + mkU32(2) )); + } + + + /*------------------------------------------------------------*/ + /*--- Floating Point Instruction Translation ---*/ + /*------------------------------------------------------------*/ + + /* + Floating Point Load Instructions + */ + static Bool dis_fp_load ( UInt theInstr ) + { + /* X-Form, D-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar frD_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + UInt uimm16 = ifieldUIMM16(theInstr); + + Int simm16 = extend_s_16to32(uimm16); + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + IRTemp rA = newTemp(ty); + IRTemp rB = newTemp(ty); + + assign( rA, getIReg(rA_addr) ); + assign( rB, getIReg(rB_addr) ); + + /* These are completely straightforward from a rounding and status + bits perspective: no rounding involved and no funny status or CR + bits affected. */ + + switch (opc1) { + case 0x30: // lfs (Load Float Single, PPC32 p441) + DIP("lfs fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr); + assign( EA, ea_rAor0_simm(rA_addr, simm16) ); + putFReg( frD_addr, + unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) ); + break; + + case 0x31: // lfsu (Load Float Single, Update, PPC32 p442) + if (rA_addr == 0) + return False; + DIP("lfsu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr); + assign( EA, ea_rA_simm(rA_addr, simm16) ); + putFReg( frD_addr, + unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x32: // lfd (Load Float Double, PPC32 p437) + DIP("lfd fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr); + assign( EA, ea_rAor0_simm(rA_addr, simm16) ); + putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) ); + break; + + case 0x33: // lfdu (Load Float Double, Update, PPC32 p438) + if (rA_addr == 0) + return False; + DIP("lfdu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr); + assign( EA, ea_rA_simm(rA_addr, simm16) ); + putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x1F: + if (b0 != 0) { + vex_printf("dis_fp_load(ppc)(instr,b0)\n"); + return False; + } + + switch(opc2) { + case 0x217: // lfsx (Load Float Single Indexed, PPC32 p444) + DIP("lfsx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr); + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + putFReg( frD_addr, unop( Iop_F32toF64, + loadBE(Ity_F32, mkexpr(EA))) ); + break; + + case 0x237: // lfsux (Load Float Single, Update Indxd, PPC32 p443) + if (rA_addr == 0) + return False; + DIP("lfsux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr); + assign( EA, ea_rA_idxd(rA_addr, rB_addr) ); + putFReg( frD_addr, + unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x257: // lfdx (Load Float Double Indexed, PPC32 p440) + DIP("lfdx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr); + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) ); + break; + + case 0x277: // lfdux (Load Float Double, Update Indxd, PPC32 p439) + if (rA_addr == 0) + return False; + DIP("lfdux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr); + assign( EA, ea_rA_idxd(rA_addr, rB_addr) ); + putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + default: + vex_printf("dis_fp_load(ppc)(opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_fp_load(ppc)(opc1)\n"); + return False; + } + return True; + } + + + + /* + Floating Point Store Instructions + */ + static Bool dis_fp_store ( UInt theInstr ) + { + /* X-Form, D-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar frS_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + Int uimm16 = ifieldUIMM16(theInstr); + + Int simm16 = extend_s_16to32(uimm16); + IRTemp frS = newTemp(Ity_F64); + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + IRTemp rA = newTemp(ty); + IRTemp rB = newTemp(ty); + + assign( frS, getFReg(frS_addr) ); + assign( rA, getIReg(rA_addr) ); + assign( rB, getIReg(rB_addr) ); + + /* These are straightforward from a status bits perspective: no + funny status or CR bits affected. For single precision stores, + the values are truncated and denormalised (not rounded) to turn + them into single precision values. */ + + switch (opc1) { + + case 0x34: // stfs (Store Float Single, PPC32 p518) + DIP("stfs fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr); + assign( EA, ea_rAor0_simm(rA_addr, simm16) ); + /* Use Iop_TruncF64asF32 to truncate and possible denormalise + the value to be stored in the correct way, without any + rounding. */ + storeBE( mkexpr(EA), + unop(Iop_TruncF64asF32, mkexpr(frS)) ); + break; + + case 0x35: // stfsu (Store Float Single, Update, PPC32 p519) + if (rA_addr == 0) + return False; + DIP("stfsu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr); + assign( EA, ea_rA_simm(rA_addr, simm16) ); + /* See comment for stfs */ + storeBE( mkexpr(EA), + unop(Iop_TruncF64asF32, mkexpr(frS)) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x36: // stfd (Store Float Double, PPC32 p513) + DIP("stfd fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr); + assign( EA, ea_rAor0_simm(rA_addr, simm16) ); + storeBE( mkexpr(EA), mkexpr(frS) ); + break; + + case 0x37: // stfdu (Store Float Double, Update, PPC32 p514) + if (rA_addr == 0) + return False; + DIP("stfdu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr); + assign( EA, ea_rA_simm(rA_addr, simm16) ); + storeBE( mkexpr(EA), mkexpr(frS) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x1F: + if (b0 != 0) { + vex_printf("dis_fp_store(ppc)(instr,b0)\n"); + return False; + } + switch(opc2) { + case 0x297: // stfsx (Store Float Single Indexed, PPC32 p521) + DIP("stfsx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr); + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + /* See note for stfs */ + storeBE( mkexpr(EA), + unop(Iop_TruncF64asF32, mkexpr(frS)) ); + break; + + case 0x2B7: // stfsux (Store Float Sgl, Update Indxd, PPC32 p520) + if (rA_addr == 0) + return False; + DIP("stfsux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr); + assign( EA, ea_rA_idxd(rA_addr, rB_addr) ); + /* See note for stfs */ + storeBE( mkexpr(EA), + unop(Iop_TruncF64asF32, mkexpr(frS)) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x2D7: // stfdx (Store Float Double Indexed, PPC32 p516) + DIP("stfdx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr); + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + storeBE( mkexpr(EA), mkexpr(frS) ); + break; + + case 0x2F7: // stfdux (Store Float Dbl, Update Indxd, PPC32 p515) + if (rA_addr == 0) + return False; + DIP("stfdux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr); + assign( EA, ea_rA_idxd(rA_addr, rB_addr) ); + storeBE( mkexpr(EA), mkexpr(frS) ); + putIReg( rA_addr, mkexpr(EA) ); + break; + + case 0x3D7: // stfiwx (Store Float as Int, Indexed, PPC32 p517) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) + DIP("stfiwx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr); + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + storeBE( mkexpr(EA), + unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(frS))) ); + break; + + default: + vex_printf("dis_fp_store(ppc)(opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_fp_store(ppc)(opc1)\n"); + return False; + } + return True; + } + + + + /* + Floating Point Arith Instructions + */ + static Bool dis_fp_arith ( UInt theInstr ) + { + /* A-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar frD_addr = ifieldRegDS(theInstr); + UChar frA_addr = ifieldRegA(theInstr); + UChar frB_addr = ifieldRegB(theInstr); + UChar frC_addr = ifieldRegC(theInstr); + UChar opc2 = ifieldOPClo5(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + IRTemp frD = newTemp(Ity_F64); + IRTemp frA = newTemp(Ity_F64); + IRTemp frB = newTemp(Ity_F64); + IRTemp frC = newTemp(Ity_F64); + IRExpr* rm = get_IR_roundingmode(); + + /* By default, we will examine the results of the operation and set + fpscr[FPRF] accordingly. */ + Bool set_FPRF = True; + + /* By default, if flag_RC is set, we will clear cr1 after the + operation. In reality we should set cr1 to indicate the + exception status of the operation, but since we're not + simulating exceptions, the exception status will appear to be + zero. Hence cr1 should be cleared if this is a . form insn. */ + Bool clear_CR1 = True; + + assign( frA, getFReg(frA_addr)); + assign( frB, getFReg(frB_addr)); + assign( frC, getFReg(frC_addr)); + + switch (opc1) { + case 0x3B: + switch (opc2) { + case 0x12: // fdivs (Floating Divide Single, PPC32 p407) + if (frC_addr != 0) + return False; + DIP("fdivs%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frB_addr); + assign( frD, triop( Iop_DivF64r32, + rm, mkexpr(frA), mkexpr(frB) )); + break; + + case 0x14: // fsubs (Floating Subtract Single, PPC32 p430) + if (frC_addr != 0) + return False; + DIP("fsubs%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frB_addr); + assign( frD, triop( Iop_SubF64r32, + rm, mkexpr(frA), mkexpr(frB) )); + break; + + case 0x15: // fadds (Floating Add Single, PPC32 p401) + if (frC_addr != 0) + return False; + DIP("fadds%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frB_addr); + assign( frD, triop( Iop_AddF64r32, + rm, mkexpr(frA), mkexpr(frB) )); + break; + + case 0x16: // fsqrts (Floating SqRt (Single-Precision), PPC32 p428) + // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX) + if (frA_addr != 0 || frC_addr != 0) + return False; + DIP("fsqrts%s fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frB_addr); + // however illogically, on ppc970 this insn behaves identically + // to fsqrt (double-precision). So use SqrtF64, not SqrtF64r32. + assign( frD, binop( Iop_SqrtF64, rm, mkexpr(frB) )); + break; + + case 0x18: // fres (Floating Reciprocal Estimate Single, PPC32 p421) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) + if (frA_addr != 0 || frC_addr != 0) + return False; + DIP("fres%s fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frB_addr); + { IRExpr* ieee_one + = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)); + assign( frD, triop( Iop_DivF64r32, + rm, + ieee_one, mkexpr(frB) )); + } + break; + + case 0x19: // fmuls (Floating Multiply Single, PPC32 p414) + if (frB_addr != 0) + return False; + DIP("fmuls%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr); + assign( frD, triop( Iop_MulF64r32, + rm, mkexpr(frA), mkexpr(frC) )); + break; + + case 0x1A: // frsqrtes (Floating Recip SqRt Est Single) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) + // Undocumented instruction? + if (frA_addr != 0 || frC_addr != 0) + return False; + DIP("frsqrtes%s fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frB_addr); + assign( frD, unop(Iop_Est5FRSqrt, mkexpr(frB)) ); + break; + + default: + vex_printf("dis_fp_arith(ppc)(3B: opc2)\n"); + return False; + } + break; + + case 0x3F: + switch (opc2) { + case 0x12: // fdiv (Floating Div (Double-Precision), PPC32 p406) + if (frC_addr != 0) + return False; + DIP("fdiv%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frB_addr); + assign( frD, triop(Iop_DivF64, rm, mkexpr(frA), mkexpr(frB)) ); + break; + + case 0x14: // fsub (Floating Sub (Double-Precision), PPC32 p429) + if (frC_addr != 0) + return False; + DIP("fsub%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frB_addr); + assign( frD, triop(Iop_SubF64, rm, mkexpr(frA), mkexpr(frB)) ); + break; + + case 0x15: // fadd (Floating Add (Double-Precision), PPC32 p400) + if (frC_addr != 0) + return False; + DIP("fadd%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frB_addr); + assign( frD, triop(Iop_AddF64, rm, mkexpr(frA), mkexpr(frB)) ); + break; + + case 0x16: // fsqrt (Floating SqRt (Double-Precision), PPC32 p427) + // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX) + if (frA_addr != 0 || frC_addr != 0) + return False; + DIP("fsqrt%s fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frB_addr); + assign( frD, binop(Iop_SqrtF64, rm, mkexpr(frB)) ); + break; + + case 0x17: { // fsel (Floating Select, PPC32 p426) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) + IRTemp cc = newTemp(Ity_I32); + IRTemp cc_b0 = newTemp(Ity_I32); + + DIP("fsel%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + + // cc: UN == 0x41, LT == 0x01, GT == 0x00, EQ == 0x40 + // => GT|EQ == (cc & 0x1 == 0) + assign( cc, binop(Iop_CmpF64, mkexpr(frA), + IRExpr_Const(IRConst_F64(0))) ); + assign( cc_b0, binop(Iop_And32, mkexpr(cc), mkU32(1)) ); + + // frD = (frA >= 0.0) ? frC : frB + // = (cc_b0 == 0) ? frC : frB + assign( frD, + IRExpr_Mux0X( + unop(Iop_1Uto8, + binop(Iop_CmpEQ32, mkexpr(cc_b0), mkU32(0))), + mkexpr(frB), + mkexpr(frC) )); + + /* One of the rare ones which don't mess with FPRF */ + set_FPRF = False; + break; + } + + case 0x18: // fre (Floating Reciprocal Estimate) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) + // Note: unclear whether this insn really exists or not + // ppc970 doesn't have it, but POWER5 does + if (frA_addr != 0 || frC_addr != 0) + return False; + DIP("fre%s fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frB_addr); + { IRExpr* ieee_one + = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)); + assign( frD, triop( Iop_DivF64, + rm, + ieee_one, mkexpr(frB) )); + } + break; + + case 0x19: // fmul (Floating Mult (Double Precision), PPC32 p413) + if (frB_addr != 0) + vex_printf("dis_fp_arith(ppc)(instr,fmul)\n"); + DIP("fmul%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr); + assign( frD, triop(Iop_MulF64, rm, mkexpr(frA), mkexpr(frC)) ); + break; + + case 0x1A: // frsqrte (Floating Recip SqRt Est., PPC32 p424) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) + if (frA_addr != 0 || frC_addr != 0) + return False; + DIP("frsqrte%s fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frB_addr); + assign( frD, unop(Iop_Est5FRSqrt, mkexpr(frB)) ); + break; + + default: + vex_printf("dis_fp_arith(ppc)(3F: opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_fp_arith(ppc)(opc1)\n"); + return False; + } + + putFReg( frD_addr, mkexpr(frD) ); + + if (set_FPRF) { + // XXX XXX XXX FIXME + // set FPRF from frD + } + + if (flag_rC && clear_CR1) { + putCR321( 1, mkU8(0) ); + putCR0( 1, mkU8(0) ); + } + + return True; + } + + + + /* + Floating Point Mult-Add Instructions + */ + static Bool dis_fp_multadd ( UInt theInstr ) + { + /* A-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar frD_addr = ifieldRegDS(theInstr); + UChar frA_addr = ifieldRegA(theInstr); + UChar frB_addr = ifieldRegB(theInstr); + UChar frC_addr = ifieldRegC(theInstr); + UChar opc2 = ifieldOPClo5(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + IRTemp frD = newTemp(Ity_F64); + IRTemp frA = newTemp(Ity_F64); + IRTemp frB = newTemp(Ity_F64); + IRTemp frC = newTemp(Ity_F64); + IRTemp rmt = newTemp(Ity_I32); + IRExpr* rm; + + /* By default, we will examine the results of the operation and set + fpscr[FPRF] accordingly. */ + Bool set_FPRF = True; + + /* By default, if flag_RC is set, we will clear cr1 after the + operation. In reality we should set cr1 to indicate the + exception status of the operation, but since we're not + simulating exceptions, the exception status will appear to be + zero. Hence cr1 should be cleared if this is a . form insn. */ + Bool clear_CR1 = True; + + /* Bind the rounding mode expression to a temp; there's no + point in creating gratuitous CSEs, as we know we'll need + to use it twice. */ + assign( rmt, get_IR_roundingmode() ); + rm = mkexpr(rmt); + + assign( frA, getFReg(frA_addr)); + assign( frB, getFReg(frB_addr)); + assign( frC, getFReg(frC_addr)); + + /* The rounding in this is all a bit dodgy. The idea is to only do + one rounding. That clearly isn't achieveable without dedicated + four-input IR primops, although in the single precision case we + can sort-of simulate it by doing the inner multiply in double + precision. + + In the negated cases, the negation happens after rounding. */ + + switch (opc1) { + case 0x3B: + switch (opc2) { + case 0x1C: // fmsubs (Floating Mult-Subtr Single, PPC32 p412) + DIP("fmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, qop( Iop_MSubF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); + break; + + case 0x1D: // fmadds (Floating Mult-Add Single, PPC32 p409) + DIP("fmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, qop( Iop_MAddF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); + break; + + case 0x1E: // fnmsubs (Float Neg Mult-Subtr Single, PPC32 p420) + DIP("fnmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, unop( Iop_NegF64, + qop( Iop_MSubF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); + break; + + case 0x1F: // fnmadds (Floating Negative Multiply-Add Single, PPC32 p418) + DIP("fnmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, unop( Iop_NegF64, + qop( Iop_MAddF64r32, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); + break; + + default: + vex_printf("dis_fp_multadd(ppc)(3B: opc2)\n"); + return False; + } + break; + + case 0x3F: + switch (opc2) { + case 0x1C: // fmsub (Float Mult-Sub (Dbl Precision), PPC32 p411) + DIP("fmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, qop( Iop_MSubF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); + break; + + case 0x1D: // fmadd (Float Mult-Add (Dbl Precision), PPC32 p408) + DIP("fmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, qop( Iop_MAddF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) )); + break; + + case 0x1E: // fnmsub (Float Neg Mult-Subtr (Dbl Precision), PPC32 p419) + DIP("fnmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, unop( Iop_NegF64, + qop( Iop_MSubF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); + break; + + case 0x1F: // fnmadd (Float Neg Mult-Add (Dbl Precision), PPC32 p417) + DIP("fnmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"", + frD_addr, frA_addr, frC_addr, frB_addr); + assign( frD, unop( Iop_NegF64, + qop( Iop_MAddF64, rm, + mkexpr(frA), mkexpr(frC), mkexpr(frB) ))); + break; + + default: + vex_printf("dis_fp_multadd(ppc)(3F: opc2)\n"); + return False; + } + break; + + default: + vex_printf("dis_fp_multadd(ppc)(opc1)\n"); + return False; + } + + putFReg( frD_addr, mkexpr(frD) ); + + if (set_FPRF) { + // XXX XXX XXX FIXME + // set FPRF from frD + } + + if (flag_rC && clear_CR1) { + putCR321( 1, mkU8(0) ); + putCR0( 1, mkU8(0) ); + } + + return True; + } + + + + /* + Floating Point Compare Instructions + */ + static Bool dis_fp_cmp ( UInt theInstr ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); + UChar b21to22 = toUChar( IFIELD( theInstr, 21, 2 ) ); + UChar frA_addr = ifieldRegA(theInstr); + UChar frB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRTemp ccIR = newTemp(Ity_I32); + IRTemp ccPPC32 = newTemp(Ity_I32); + + IRTemp frA = newTemp(Ity_F64); + IRTemp frB = newTemp(Ity_F64); + + if (opc1 != 0x3F || b21to22 != 0 || b0 != 0) { + vex_printf("dis_fp_cmp(ppc)(instr)\n"); + return False; + } + + assign( frA, getFReg(frA_addr)); + assign( frB, getFReg(frB_addr)); + + assign( ccIR, binop(Iop_CmpF64, mkexpr(frA), mkexpr(frB)) ); + + /* Map compare result from IR to PPC32 */ + /* + FP cmp result | PPC | IR + -------------------------- + UN | 0x1 | 0x45 + EQ | 0x2 | 0x40 + GT | 0x4 | 0x00 + LT | 0x8 | 0x01 + */ + + // ccPPC32 = Shl(1, (~(ccIR>>5) & 2) + // | ((ccIR ^ (ccIR>>6)) & 1) + assign( + ccPPC32, + binop( + Iop_Shl32, + mkU32(1), + unop( + Iop_32to8, + binop( + Iop_Or32, + binop( + Iop_And32, + unop( + Iop_Not32, + binop(Iop_Shr32, mkexpr(ccIR), mkU8(5)) + ), + mkU32(2) + ), + binop( + Iop_And32, + binop( + Iop_Xor32, + mkexpr(ccIR), + binop(Iop_Shr32, mkexpr(ccIR), mkU8(6)) + ), + mkU32(1) + ) + ) + ) + ) + ); + + putGST_field( PPC_GST_CR, mkexpr(ccPPC32), crfD ); + + /* CAB: TODO?: Support writing cc to FPSCR->FPCC ? + putGST_field( PPC_GST_FPSCR, mkexpr(ccPPC32), 4 ); + */ + // XXX XXX XXX FIXME + // Also write the result into FPRF (it's not entirely clear how) + + /* Note: Differences between fcmpu and fcmpo are only in exception + flag settings, which aren't supported anyway. */ + switch (opc2) { + case 0x000: // fcmpu (Floating Compare Unordered, PPC32 p403) + DIP("fcmpu crf%d,fr%u,fr%u\n", crfD, frA_addr, frB_addr); + break; + case 0x020: // fcmpo (Floating Compare Ordered, PPC32 p402) + DIP("fcmpo crf%d,fr%u,fr%u\n", crfD, frA_addr, frB_addr); + break; + default: + vex_printf("dis_fp_cmp(ppc)(opc2)\n"); + return False; + } + return True; + } + + + + /* + Floating Point Rounding/Conversion Instructions + */ + static Bool dis_fp_round ( UInt theInstr ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar frD_addr = ifieldRegDS(theInstr); + UChar b16to20 = ifieldRegA(theInstr); + UChar frB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + IRTemp frD = newTemp(Ity_F64); + IRTemp frB = newTemp(Ity_F64); + IRTemp r_tmp32 = newTemp(Ity_I32); + IRTemp r_tmp64 = newTemp(Ity_I64); + IRExpr* rm = get_IR_roundingmode(); + + /* By default, we will examine the results of the operation and set + fpscr[FPRF] accordingly. */ + Bool set_FPRF = True; + + /* By default, if flag_RC is set, we will clear cr1 after the + operation. In reality we should set cr1 to indicate the + exception status of the operation, but since we're not + simulating exceptions, the exception status will appear to be + zero. Hence cr1 should be cleared if this is a . form insn. */ + Bool clear_CR1 = True; + + if (opc1 != 0x3F || b16to20 != 0) { + vex_printf("dis_fp_round(ppc)(instr)\n"); + return False; + } + + assign( frB, getFReg(frB_addr)); + + switch (opc2) { + case 0x00C: // frsp (Float Round to Single, PPC32 p423) + DIP("frsp%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( frD, binop( Iop_RoundF64toF32, rm, mkexpr(frB) )); + break; + + case 0x00E: // fctiw (Float Conv to Int, PPC32 p404) + DIP("fctiw%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp32, + binop(Iop_F64toI32, rm, mkexpr(frB)) ); + assign( frD, unop( Iop_ReinterpI64asF64, + unop( Iop_32Uto64, mkexpr(r_tmp32)))); + /* FPRF is undefined after fctiw. Leave unchanged. */ + set_FPRF = False; + break; + + case 0x00F: // fctiwz (Float Conv to Int, Round to Zero, PPC32 p405) + DIP("fctiwz%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp32, + binop(Iop_F64toI32, mkU32(Irrm_ZERO), mkexpr(frB) )); + assign( frD, unop( Iop_ReinterpI64asF64, + unop( Iop_32Uto64, mkexpr(r_tmp32)))); + /* FPRF is undefined after fctiwz. Leave unchanged. */ + set_FPRF = False; + break; + + case 0x32E: // fctid (Float Conv to Int DWord, PPC64 p437) + DIP("fctid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp64, + binop(Iop_F64toI64, rm, mkexpr(frB)) ); + assign( frD, unop( Iop_ReinterpI64asF64, mkexpr(r_tmp64)) ); + /* FPRF is undefined after fctid. Leave unchanged. */ + set_FPRF = False; + break; + + case 0x32F: // fctidz (Float Conv to Int DWord, Round to Zero, PPC64 p437) + DIP("fctidz%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp64, + binop(Iop_F64toI64, mkU32(Irrm_ZERO), mkexpr(frB)) ); + assign( frD, unop( Iop_ReinterpI64asF64, mkexpr(r_tmp64)) ); + /* FPRF is undefined after fctidz. Leave unchanged. */ + set_FPRF = False; + break; + + case 0x34E: // fcfid (Float Conv from Int DWord, PPC64 p434) + DIP("fcfid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) ); + assign( frD, + binop(Iop_I64toF64, rm, mkexpr(r_tmp64)) ); + break; + + default: + vex_printf("dis_fp_round(ppc)(opc2)\n"); + return False; + } + + putFReg( frD_addr, mkexpr(frD) ); + + if (set_FPRF) { + // XXX XXX XXX FIXME + // set FPRF from frD + } + + if (flag_rC && clear_CR1) { + putCR321( 1, mkU8(0) ); + putCR0( 1, mkU8(0) ); + } + + return True; + } + + + + /* + Floating Point Move Instructions + */ + static Bool dis_fp_move ( UInt theInstr ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar frD_addr = ifieldRegDS(theInstr); + UChar b16to20 = ifieldRegA(theInstr); + UChar frB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + IRTemp frD = newTemp(Ity_F64); + IRTemp frB = newTemp(Ity_F64); + + if (opc1 != 0x3F || b16to20 != 0) { + vex_printf("dis_fp_move(ppc)(instr)\n"); + return False; + } + + assign( frB, getFReg(frB_addr)); + + switch (opc2) { + case 0x028: // fneg (Floating Negate, PPC32 p416) + DIP("fneg%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( frD, unop( Iop_NegF64, mkexpr(frB) )); + break; + + case 0x048: // fmr (Floating Move Register, PPC32 p410) + DIP("fmr%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( frD, mkexpr(frB) ); + break; + + case 0x088: // fnabs (Floating Negative Absolute Value, PPC32 p415) + DIP("fnabs%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( frD, unop( Iop_NegF64, unop( Iop_AbsF64, mkexpr(frB) ))); + break; + + case 0x108: // fabs (Floating Absolute Value, PPC32 p399) + DIP("fabs%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr); + assign( frD, unop( Iop_AbsF64, mkexpr(frB) )); + break; + + default: + vex_printf("dis_fp_move(ppc)(opc2)\n"); + return False; + } + + putFReg( frD_addr, mkexpr(frD) ); + + /* None of these change FPRF. cr1 is set in the usual way though, + if flag_rC is set. */ + + if (flag_rC) { + putCR321( 1, mkU8(0) ); + putCR0( 1, mkU8(0) ); + } + + return True; + } + + + + /* + Floating Point Status/Control Register Instructions + */ + static Bool dis_fp_scr ( UInt theInstr ) + { + /* Many forms - see each switch case */ + UChar opc1 = ifieldOPC(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar flag_rC = ifieldBIT0(theInstr); + + if (opc1 != 0x3F) { + vex_printf("dis_fp_scr(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + case 0x026: { // mtfsb1 (Move to FPSCR Bit 1, PPC32 p479) + // Bit crbD of the FPSCR is set. + UChar crbD = ifieldRegDS(theInstr); + UInt b11to20 = IFIELD(theInstr, 11, 10); + + if (b11to20 != 0) { + vex_printf("dis_fp_scr(ppc)(instr,mtfsb1)\n"); + return False; + } + DIP("mtfsb1%s crb%d \n", flag_rC ? ".":"", crbD); + putGST_masked( PPC_GST_FPSCR, mkU32(1<<(31-crbD)), 1<<(31-crbD) ); + break; + } + + case 0x040: { // mcrfs (Move to Condition Register from FPSCR, PPC32 p465) + UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); + UChar b21to22 = toUChar( IFIELD( theInstr, 21, 2 ) ); + UChar crfS = toUChar( IFIELD( theInstr, 18, 3 ) ); + UChar b11to17 = toUChar( IFIELD( theInstr, 11, 7 ) ); + IRTemp tmp = newTemp(Ity_I32); + IRExpr* fpscr_all; + if (b21to22 != 0 || b11to17 != 0 || flag_rC != 0) { + vex_printf("dis_fp_scr(ppc)(instr,mcrfs)\n"); + return False; + } + DIP("mcrfs crf%d,crf%d\n", crfD, crfS); + vassert(crfD < 8); + vassert(crfS < 8); + fpscr_all = getGST_masked( PPC_GST_FPSCR, MASK_FPSCR_RN ); + assign( tmp, binop(Iop_And32, + binop(Iop_Shr32,fpscr_all,mkU8(4 * (7-crfS))), + mkU32(0xF)) ); + putGST_field( PPC_GST_CR, mkexpr(tmp), crfD ); + break; + } + + case 0x046: { // mtfsb0 (Move to FPSCR Bit 0, PPC32 p478) + // Bit crbD of the FPSCR is cleared. + UChar crbD = ifieldRegDS(theInstr); + UInt b11to20 = IFIELD(theInstr, 11, 10); + + if (b11to20 != 0) { + vex_printf("dis_fp_scr(ppc)(instr,mtfsb0)\n"); + return False; + } + DIP("mtfsb0%s crb%d\n", flag_rC ? ".":"", crbD); + putGST_masked( PPC_GST_FPSCR, mkU32(0), 1<<(31-crbD) ); + break; + } + + case 0x086: { // mtfsfi (Move to FPSCR Field Immediate, PPC32 p481) + UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); + UChar b16to22 = toUChar( IFIELD( theInstr, 16, 7 ) ); + UChar IMM = toUChar( IFIELD( theInstr, 12, 4 ) ); + UChar b11 = toUChar( IFIELD( theInstr, 11, 1 ) ); + + if (b16to22 != 0 || b11 != 0) { + vex_printf("dis_fp_scr(ppc)(instr,mtfsfi)\n"); + return False; + } + DIP("mtfsfi%s crf%d,%d\n", flag_rC ? ".":"", crfD, IMM); + putGST_field( PPC_GST_FPSCR, mkU32(IMM), crfD ); + break; + } + + case 0x247: { // mffs (Move from FPSCR, PPC32 p468) + UChar frD_addr = ifieldRegDS(theInstr); + UInt b11to20 = IFIELD(theInstr, 11, 10); + IRExpr* fpscr_all = getGST_masked( PPC_GST_FPSCR, MASK_FPSCR_RN ); + + if (b11to20 != 0) { + vex_printf("dis_fp_scr(ppc)(instr,mffs)\n"); + return False; + } + DIP("mffs%s fr%u\n", flag_rC ? ".":"", frD_addr); + putFReg( frD_addr, + unop( Iop_ReinterpI64asF64, + unop( Iop_32Uto64, fpscr_all ))); + break; + } + + case 0x2C7: { // mtfsf (Move to FPSCR Fields, PPC32 p480) + UChar b25 = toUChar( IFIELD(theInstr, 25, 1) ); + UChar FM = toUChar( IFIELD(theInstr, 17, 8) ); + UChar b16 = toUChar( IFIELD(theInstr, 16, 1) ); + UChar frB_addr = ifieldRegB(theInstr); + IRTemp frB = newTemp(Ity_F64); + IRTemp rB_32 = newTemp(Ity_I32); + Int i, mask; + + if (b25 != 0 || b16 != 0) { + vex_printf("dis_fp_scr(ppc)(instr,mtfsf)\n"); + return False; + } + DIP("mtfsf%s %d,fr%u\n", flag_rC ? ".":"", FM, frB_addr); + assign( frB, getFReg(frB_addr)); + assign( rB_32, unop( Iop_64to32, + unop( Iop_ReinterpF64asI64, mkexpr(frB) ))); + // Build 32bit mask from FM: + mask = 0; + for (i=0; i<8; i++) { + if ((FM & (1<<(7-i))) == 1) { + mask |= 0xF << (7-i); + } + } + putGST_masked( PPC_GST_FPSCR, mkexpr(rB_32), mask ); + break; + } + + default: + vex_printf("dis_fp_scr(ppc)(opc2)\n"); + return False; + } + return True; + } + + + + /*------------------------------------------------------------*/ + /*--- AltiVec Instruction Translation ---*/ + /*------------------------------------------------------------*/ + + /* + Altivec Cache Control Instructions (Data Streams) + */ + static Bool dis_av_datastream ( UInt theInstr ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar flag_T = toUChar( IFIELD( theInstr, 25, 1 ) ); + UChar flag_A = flag_T; + UChar b23to24 = toUChar( IFIELD( theInstr, 23, 2 ) ); + UChar STRM = toUChar( IFIELD( theInstr, 21, 2 ) ); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + if (opc1 != 0x1F || b23to24 != 0 || b0 != 0) { + vex_printf("dis_av_datastream(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + case 0x156: // dst (Data Stream Touch, AV p115) + DIP("dst%s r%u,r%u,%d\n", flag_T ? "t" : "", + rA_addr, rB_addr, STRM); + break; + + case 0x176: // dstst (Data Stream Touch for Store, AV p117) + DIP("dstst%s r%u,r%u,%d\n", flag_T ? "t" : "", + rA_addr, rB_addr, STRM); + break; + + case 0x336: // dss (Data Stream Stop, AV p114) + if (rA_addr != 0 || rB_addr != 0) { + vex_printf("dis_av_datastream(ppc)(opc2,dst)\n"); + return False; + } + if (flag_A == 0) { + DIP("dss %d\n", STRM); + } else { + DIP("dssall\n"); + } + break; + + default: + vex_printf("dis_av_datastream(ppc)(opc2)\n"); + return False; + } + return True; + } + + /* + AltiVec Processor Control Instructions + */ + static Bool dis_av_procctl ( UInt theInstr ) + { + /* VX-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 11 ); + + if (opc1 != 0x4) { + vex_printf("dis_av_procctl(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + case 0x604: // mfvscr (Move from VSCR, AV p129) + if (vA_addr != 0 || vB_addr != 0) { + vex_printf("dis_av_procctl(ppc)(opc2,dst)\n"); + return False; + } + DIP("mfvscr v%d\n", vD_addr); + putVReg( vD_addr, unop(Iop_32UtoV128, getGST( PPC_GST_VSCR )) ); + break; + + case 0x644: { // mtvscr (Move to VSCR, AV p130) + IRTemp vB = newTemp(Ity_V128); + if (vD_addr != 0 || vA_addr != 0) { + vex_printf("dis_av_procctl(ppc)(opc2,dst)\n"); + return False; + } + DIP("mtvscr v%d\n", vB_addr); + assign( vB, getVReg(vB_addr)); + putGST( PPC_GST_VSCR, unop(Iop_V128to32, mkexpr(vB)) ); + break; + } + default: + vex_printf("dis_av_procctl(ppc)(opc2)\n"); + return False; + } + return True; + } + + /* + AltiVec Load Instructions + */ + static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + IRTemp EA_align16 = newTemp(ty); + + if (opc1 != 0x1F || b0 != 0) { + vex_printf("dis_av_load(ppc)(instr)\n"); + return False; + } + + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + assign( EA_align16, addr_align( mkexpr(EA), 16 ) ); + + switch (opc2) { + + case 0x006: { // lvsl (Load Vector for Shift Left, AV p123) + IRDirty* d; + UInt vD_off = vectorGuestRegOffset(vD_addr); + IRExpr** args = mkIRExprVec_3( + mkU32(vD_off), + binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)), + mkU32(0xF)), + mkU32(0)/*left*/ ); + if (!mode64) { + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "ppc32g_dirtyhelper_LVS", + fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS), + args ); + } else { + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "ppc64g_dirtyhelper_LVS", + fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS), + args ); + } + DIP("lvsl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr); + /* declare guest state effects */ + d->needsBBP = True; + d->nFxState = 1; + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = vD_off; + d->fxState[0].size = sizeof(U128); + + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); + break; + } + case 0x026: { // lvsr (Load Vector for Shift Right, AV p125) + IRDirty* d; + UInt vD_off = vectorGuestRegOffset(vD_addr); + IRExpr** args = mkIRExprVec_3( + mkU32(vD_off), + binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)), + mkU32(0xF)), + mkU32(1)/*right*/ ); + if (!mode64) { + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "ppc32g_dirtyhelper_LVS", + fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS), + args ); + } else { + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "ppc64g_dirtyhelper_LVS", + fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS), + args ); + } + DIP("lvsr v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr); + /* declare guest state effects */ + d->needsBBP = True; + d->nFxState = 1; + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = vD_off; + d->fxState[0].size = sizeof(U128); + + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); + break; + } + case 0x007: // lvebx (Load Vector Element Byte Indexed, AV p119) + DIP("lvebx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr); + /* loads addressed byte into vector[EA[0:3] + since all other destination bytes are undefined, + can simply load entire vector from 16-aligned EA */ + putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) ); + break; + + case 0x027: // lvehx (Load Vector Element Half Word Indexed, AV p121) + DIP("lvehx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr); + /* see note for lvebx */ + putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) ); + break; + + case 0x047: // lvewx (Load Vector Element Word Indexed, AV p122) + DIP("lvewx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr); + /* see note for lvebx */ + putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) ); + break; + + case 0x067: // lvx (Load Vector Indexed, AV p127) + DIP("lvx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr); + putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) ); + break; + + case 0x167: // lvxl (Load Vector Indexed LRU, AV p128) + DIP("lvxl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr); + putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) ); + break; + + default: + vex_printf("dis_av_load(ppc)(opc2)\n"); + return False; + } + return True; + } + + + /* + AltiVec Store Instructions + */ + static Bool dis_av_store ( UInt theInstr ) + { + /* X-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vS_addr = ifieldRegDS(theInstr); + UChar rA_addr = ifieldRegA(theInstr); + UChar rB_addr = ifieldRegB(theInstr); + UInt opc2 = ifieldOPClo10(theInstr); + UChar b0 = ifieldBIT0(theInstr); + + IRType ty = mode64 ? Ity_I64 : Ity_I32; + IRTemp EA = newTemp(ty); + IRTemp addr_aligned = newTemp(ty); + IRTemp vS = newTemp(Ity_V128); + IRTemp eb = newTemp(Ity_I8); + IRTemp idx = newTemp(Ity_I8); + + if (opc1 != 0x1F || b0 != 0) { + vex_printf("dis_av_store(ppc)(instr)\n"); + return False; + } + + assign( vS, getVReg(vS_addr)); + assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); + + switch (opc2) { + case 0x087: { // stvebx (Store Vector Byte Indexed, AV p131) + DIP("stvebx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr); + assign( eb, binop(Iop_And8, mkU8(0xF), + unop(Iop_32to8, + mkNarrowTo32(ty, mkexpr(EA)) )) ); + assign( idx, binop(Iop_Shl8, + binop(Iop_Sub8, mkU8(15), mkexpr(eb)), + mkU8(3)) ); + storeBE( mkexpr(EA), + unop(Iop_32to8, unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) ); + break; + } + case 0x0A7: { // stvehx (Store Vector Half Word Indexed, AV p132) + DIP("stvehx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr); + assign( addr_aligned, addr_align(mkexpr(EA), 2) ); + assign( eb, binop(Iop_And8, mkU8(0xF), + mkNarrowTo8(ty, mkexpr(addr_aligned) )) ); + assign( idx, binop(Iop_Shl8, + binop(Iop_Sub8, mkU8(14), mkexpr(eb)), + mkU8(3)) ); + storeBE( mkexpr(addr_aligned), + unop(Iop_32to16, unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) ); + break; + } + case 0x0C7: { // stvewx (Store Vector Word Indexed, AV p133) + DIP("stvewx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr); + assign( addr_aligned, addr_align(mkexpr(EA), 4) ); + assign( eb, binop(Iop_And8, mkU8(0xF), + mkNarrowTo8(ty, mkexpr(addr_aligned) )) ); + assign( idx, binop(Iop_Shl8, + binop(Iop_Sub8, mkU8(12), mkexpr(eb)), + mkU8(3)) ); + storeBE( mkexpr(addr_aligned), + unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx))) ); + break; + } + + case 0x0E7: // stvx (Store Vector Indexed, AV p134) + DIP("stvx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr); + storeBE( addr_align( mkexpr(EA), 16 ), mkexpr(vS) ); + break; + + case 0x1E7: // stvxl (Store Vector Indexed LRU, AV p135) + DIP("stvxl v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr); + storeBE( addr_align( mkexpr(EA), 16 ), mkexpr(vS) ); + break; + + default: + vex_printf("dis_av_store(ppc)(opc2)\n"); + return False; + } + return True; + } + + /* + AltiVec Arithmetic Instructions + */ + static Bool dis_av_arith ( UInt theInstr ) + { + /* VX-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 11 ); + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + IRTemp z3 = newTemp(Ity_I64); + IRTemp z2 = newTemp(Ity_I64); + IRTemp z1 = newTemp(Ity_I64); + IRTemp z0 = newTemp(Ity_I64); + IRTemp aEvn, aOdd; + IRTemp a15, a14, a13, a12, a11, a10, a9, a8; + IRTemp a7, a6, a5, a4, a3, a2, a1, a0; + IRTemp b3, b2, b1, b0; + + aEvn = aOdd = IRTemp_INVALID; + a15 = a14 = a13 = a12 = a11 = a10 = a9 = a8 = IRTemp_INVALID; + a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; + b3 = b2 = b1 = b0 = IRTemp_INVALID; + + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + + if (opc1 != 0x4) { + vex_printf("dis_av_arith(ppc)(opc1 != 0x4)\n"); + return False; + } + + switch (opc2) { + /* Add */ + case 0x180: { // vaddcuw (Add Carryout Unsigned Word, AV p136) + DIP("vaddcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + /* unsigned_ov(x+y) = (y >u not(x)) */ + putVReg( vD_addr, binop(Iop_ShrN32x4, + binop(Iop_CmpGT32Ux4, mkexpr(vB), + unop(Iop_NotV128, mkexpr(vA))), + mkU8(31)) ); + break; + } + case 0x000: // vaddubm (Add Unsigned Byte Modulo, AV p141) + DIP("vaddubm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Add8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x040: // vadduhm (Add Unsigned Half Word Modulo, AV p143) + DIP("vadduhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Add16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x080: // vadduwm (Add Unsigned Word Modulo, AV p145) + DIP("vadduwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x200: // vaddubs (Add Unsigned Byte Saturate, AV p142) + DIP("vaddubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QAdd8Ux16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT], perhaps via new primop: Iop_SatOfQAdd8Ux16 + break; + + case 0x240: // vadduhs (Add Unsigned Half Word Saturate, AV p144) + DIP("vadduhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QAdd16Ux8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x280: // vadduws (Add Unsigned Word Saturate, AV p146) + DIP("vadduws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QAdd32Ux4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x300: // vaddsbs (Add Signed Byte Saturate, AV p138) + DIP("vaddsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QAdd8Sx16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x340: // vaddshs (Add Signed Half Word Saturate, AV p139) + DIP("vaddshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QAdd16Sx8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x380: // vaddsws (Add Signed Word Saturate, AV p140) + DIP("vaddsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QAdd32Sx4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + + /* Subtract */ + case 0x580: { // vsubcuw (Subtract Carryout Unsigned Word, AV p260) + DIP("vsubcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + /* unsigned_ov(x-y) = (y >u x) */ + putVReg( vD_addr, binop(Iop_ShrN32x4, + unop(Iop_NotV128, + binop(Iop_CmpGT32Ux4, mkexpr(vB), + mkexpr(vA))), + mkU8(31)) ); + break; + } + case 0x400: // vsububm (Subtract Unsigned Byte Modulo, AV p265) + DIP("vsububm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sub8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x440: // vsubuhm (Subtract Unsigned Half Word Modulo, AV p267) + DIP("vsubuhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sub16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x480: // vsubuwm (Subtract Unsigned Word Modulo, AV p269) + DIP("vsubuwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sub32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x600: // vsububs (Subtract Unsigned Byte Saturate, AV p266) + DIP("vsububs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QSub8Ux16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x640: // vsubuhs (Subtract Unsigned HWord Saturate, AV p268) + DIP("vsubuhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QSub16Ux8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x680: // vsubuws (Subtract Unsigned Word Saturate, AV p270) + DIP("vsubuws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QSub32Ux4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x700: // vsubsbs (Subtract Signed Byte Saturate, AV p262) + DIP("vsubsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QSub8Sx16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x740: // vsubshs (Subtract Signed Half Word Saturate, AV p263) + DIP("vsubshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QSub16Sx8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + case 0x780: // vsubsws (Subtract Signed Word Saturate, AV p264) + DIP("vsubsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_QSub32Sx4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + + /* Maximum */ + case 0x002: // vmaxub (Maximum Unsigned Byte, AV p182) + DIP("vmaxub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max8Ux16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x042: // vmaxuh (Maximum Unsigned Half Word, AV p183) + DIP("vmaxuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x082: // vmaxuw (Maximum Unsigned Word, AV p184) + DIP("vmaxuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x102: // vmaxsb (Maximum Signed Byte, AV p179) + DIP("vmaxsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max8Sx16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x142: // vmaxsh (Maximum Signed Half Word, AV p180) + DIP("vmaxsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x182: // vmaxsw (Maximum Signed Word, AV p181) + DIP("vmaxsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; + + + /* Minimum */ + case 0x202: // vminub (Minimum Unsigned Byte, AV p191) + DIP("vminub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min8Ux16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x242: // vminuh (Minimum Unsigned Half Word, AV p192) + DIP("vminuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x282: // vminuw (Minimum Unsigned Word, AV p193) + DIP("vminuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x302: // vminsb (Minimum Signed Byte, AV p188) + DIP("vminsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min8Sx16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x342: // vminsh (Minimum Signed Half Word, AV p189) + DIP("vminsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x382: // vminsw (Minimum Signed Word, AV p190) + DIP("vminsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; + + + /* Average */ + case 0x402: // vavgub (Average Unsigned Byte, AV p152) + DIP("vavgub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Avg8Ux16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x442: // vavguh (Average Unsigned Half Word, AV p153) + DIP("vavguh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Avg16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x482: // vavguw (Average Unsigned Word, AV p154) + DIP("vavguw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Avg32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x502: // vavgsb (Average Signed Byte, AV p149) + DIP("vavgsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Avg8Sx16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x542: // vavgsh (Average Signed Half Word, AV p150) + DIP("vavgsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Avg16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x582: // vavgsw (Average Signed Word, AV p151) + DIP("vavgsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Avg32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; + + + /* Multiply */ + case 0x008: // vmuloub (Multiply Odd Unsigned Byte, AV p213) + DIP("vmuloub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB))); + break; + + case 0x048: // vmulouh (Multiply Odd Unsigned Half Word, AV p214) + DIP("vmulouh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB))); + break; + + case 0x108: // vmulosb (Multiply Odd Signed Byte, AV p211) + DIP("vmulosb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_MullEven8Sx16, mkexpr(vA), mkexpr(vB))); + break; + + case 0x148: // vmulosh (Multiply Odd Signed Half Word, AV p212) + DIP("vmulosh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB))); + break; + + case 0x208: // vmuleub (Multiply Even Unsigned Byte, AV p209) + DIP("vmuleub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, MK_Iop_MullOdd8Ux16( mkexpr(vA), mkexpr(vB) )); + break; + + case 0x248: // vmuleuh (Multiply Even Unsigned Half Word, AV p210) + DIP("vmuleuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, MK_Iop_MullOdd16Ux8( mkexpr(vA), mkexpr(vB) )); + break; + + case 0x308: // vmulesb (Multiply Even Signed Byte, AV p207) + DIP("vmulesb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, MK_Iop_MullOdd8Sx16( mkexpr(vA), mkexpr(vB) )); + break; + + case 0x348: // vmulesh (Multiply Even Signed Half Word, AV p208) + DIP("vmulesh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) )); + break; + + + /* Sum Across Partial */ + case 0x608: { // vsum4ubs (Sum Partial (1/4) UB Saturate, AV p275) + IRTemp aEE, aEO, aOE, aOO; + aEE = aEO = aOE = aOO = IRTemp_INVALID; + DIP("vsum4ubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + + /* vA: V128_8Ux16 -> 4 x V128_32Ux4, sign-extended */ + expand8Ux16( mkexpr(vA), &aEvn, &aOdd ); // (15,13...),(14,12...) + expand16Ux8( mkexpr(aEvn), &aEE, &aEO ); // (15,11...),(13, 9...) + expand16Ux8( mkexpr(aOdd), &aOE, &aOO ); // (14,10...),(12, 8...) + + /* break V128 to 4xI32's, zero-extending to I64's */ + breakV128to4x64U( mkexpr(aEE), &a15, &a11, &a7, &a3 ); + breakV128to4x64U( mkexpr(aOE), &a14, &a10, &a6, &a2 ); + breakV128to4x64U( mkexpr(aEO), &a13, &a9, &a5, &a1 ); + breakV128to4x64U( mkexpr(aOO), &a12, &a8, &a4, &a0 ); + breakV128to4x64U( mkexpr(vB), &b3, &b2, &b1, &b0 ); + + /* add lanes */ + assign( z3, binop(Iop_Add64, mkexpr(b3), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a15), mkexpr(a14)), + binop(Iop_Add64, mkexpr(a13), mkexpr(a12)))) ); + assign( z2, binop(Iop_Add64, mkexpr(b2), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a11), mkexpr(a10)), + binop(Iop_Add64, mkexpr(a9), mkexpr(a8)))) ); + assign( z1, binop(Iop_Add64, mkexpr(b1), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a7), mkexpr(a6)), + binop(Iop_Add64, mkexpr(a5), mkexpr(a4)))) ); + assign( z0, binop(Iop_Add64, mkexpr(b0), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a3), mkexpr(a2)), + binop(Iop_Add64, mkexpr(a1), mkexpr(a0)))) ); + + /* saturate-narrow to 32bit, and combine to V128 */ + putVReg( vD_addr, mkV128from4x64U( mkexpr(z3), mkexpr(z2), + mkexpr(z1), mkexpr(z0)) ); + break; + } + case 0x708: { // vsum4sbs (Sum Partial (1/4) SB Saturate, AV p273) + IRTemp aEE, aEO, aOE, aOO; + aEE = aEO = aOE = aOO = IRTemp_INVALID; + DIP("vsum4sbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + + /* vA: V128_8Sx16 -> 4 x V128_32Sx4, sign-extended */ + expand8Sx16( mkexpr(vA), &aEvn, &aOdd ); // (15,13...),(14,12...) + expand16Sx8( mkexpr(aEvn), &aEE, &aEO ); // (15,11...),(13, 9...) + expand16Sx8( mkexpr(aOdd), &aOE, &aOO ); // (14,10...),(12, 8...) + + /* break V128 to 4xI32's, sign-extending to I64's */ + breakV128to4x64S( mkexpr(aEE), &a15, &a11, &a7, &a3 ); + breakV128to4x64S( mkexpr(aOE), &a14, &a10, &a6, &a2 ); + breakV128to4x64S( mkexpr(aEO), &a13, &a9, &a5, &a1 ); + breakV128to4x64S( mkexpr(aOO), &a12, &a8, &a4, &a0 ); + breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 ); + + /* add lanes */ + assign( z3, binop(Iop_Add64, mkexpr(b3), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a15), mkexpr(a14)), + binop(Iop_Add64, mkexpr(a13), mkexpr(a12)))) ); + assign( z2, binop(Iop_Add64, mkexpr(b2), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a11), mkexpr(a10)), + binop(Iop_Add64, mkexpr(a9), mkexpr(a8)))) ); + assign( z1, binop(Iop_Add64, mkexpr(b1), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a7), mkexpr(a6)), + binop(Iop_Add64, mkexpr(a5), mkexpr(a4)))) ); + assign( z0, binop(Iop_Add64, mkexpr(b0), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a3), mkexpr(a2)), + binop(Iop_Add64, mkexpr(a1), mkexpr(a0)))) ); + + /* saturate-narrow to 32bit, and combine to V128 */ + putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2), + mkexpr(z1), mkexpr(z0)) ); + break; + } + case 0x648: { // vsum4shs (Sum Partial (1/4) SHW Saturate, AV p274) + DIP("vsum4shs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + + /* vA: V128_16Sx8 -> 2 x V128_32Sx4, sign-extended */ + expand16Sx8( mkexpr(vA), &aEvn, &aOdd ); // (7,5...),(6,4...) + + /* break V128 to 4xI32's, sign-extending to I64's */ + breakV128to4x64S( mkexpr(aEvn), &a7, &a5, &a3, &a1 ); + breakV128to4x64S( mkexpr(aOdd), &a6, &a4, &a2, &a0 ); + breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 ); + + /* add lanes */ + assign( z3, binop(Iop_Add64, mkexpr(b3), + binop(Iop_Add64, mkexpr(a7), mkexpr(a6)))); + assign( z2, binop(Iop_Add64, mkexpr(b2), + binop(Iop_Add64, mkexpr(a5), mkexpr(a4)))); + assign( z1, binop(Iop_Add64, mkexpr(b1), + binop(Iop_Add64, mkexpr(a3), mkexpr(a2)))); + assign( z0, binop(Iop_Add64, mkexpr(b0), + binop(Iop_Add64, mkexpr(a1), mkexpr(a0)))); + + /* saturate-narrow to 32bit, and combine to V128 */ + putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2), + mkexpr(z1), mkexpr(z0)) ); + break; + } + case 0x688: { // vsum2sws (Sum Partial (1/2) SW Saturate, AV p272) + DIP("vsum2sws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + + /* break V128 to 4xI32's, sign-extending to I64's */ + breakV128to4x64S( mkexpr(vA), &a3, &a2, &a1, &a0 ); + breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 ); + + /* add lanes */ + assign( z2, binop(Iop_Add64, mkexpr(b2), + binop(Iop_Add64, mkexpr(a3), mkexpr(a2))) ); + assign( z0, binop(Iop_Add64, mkexpr(b0), + binop(Iop_Add64, mkexpr(a1), mkexpr(a0))) ); + + /* saturate-narrow to 32bit, and combine to V128 */ + putVReg( vD_addr, mkV128from4x64S( mkU64(0), mkexpr(z2), + mkU64(0), mkexpr(z0)) ); + break; + } + case 0x788: { // vsumsws (Sum SW Saturate, AV p271) + DIP("vsumsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + + /* break V128 to 4xI32's, sign-extending to I64's */ + breakV128to4x64S( mkexpr(vA), &a3, &a2, &a1, &a0 ); + breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 ); + + /* add lanes */ + assign( z0, binop(Iop_Add64, mkexpr(b0), + binop(Iop_Add64, + binop(Iop_Add64, mkexpr(a3), mkexpr(a2)), + binop(Iop_Add64, mkexpr(a1), mkexpr(a0)))) ); + + /* saturate-narrow to 32bit, and combine to V128 */ + putVReg( vD_addr, mkV128from4x64S( mkU64(0), mkU64(0), + mkU64(0), mkexpr(z0)) ); + break; + } + default: + vex_printf("dis_av_arith(ppc)(opc2=0x%x)\n", opc2); + return False; + } + return True; + } + + /* + AltiVec Logic Instructions + */ + static Bool dis_av_logic ( UInt theInstr ) + { + /* VX-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 11 ); + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + + if (opc1 != 0x4) { + vex_printf("dis_av_logic(ppc)(opc1 != 0x4)\n"); + return False; + } + + switch (opc2) { + case 0x404: // vand (And, AV p147) + DIP("vand v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_AndV128, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x444: // vandc (And, AV p148) + DIP("vandc v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_AndV128, mkexpr(vA), + unop(Iop_NotV128, mkexpr(vB))) ); + break; + + case 0x484: // vor (Or, AV p217) + DIP("vor v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_OrV128, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x4C4: // vxor (Xor, AV p282) + DIP("vxor v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_XorV128, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x504: // vnor (Nor, AV p216) + DIP("vnor v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + unop(Iop_NotV128, binop(Iop_OrV128, mkexpr(vA), mkexpr(vB))) ); + break; + + default: + vex_printf("dis_av_logic(ppc)(opc2=0x%x)\n", opc2); + return False; + } + return True; + } + + /* + AltiVec Compare Instructions + */ + static Bool dis_av_cmp ( UInt theInstr ) + { + /* VXR-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UChar flag_rC = ifieldBIT10(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 10 ); + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + IRTemp vD = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + + if (opc1 != 0x4) { + vex_printf("dis_av_cmp(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + case 0x006: // vcmpequb (Compare Equal-to Unsigned B, AV p160) + DIP("vcmpequb%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpEQ8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x046: // vcmpequh (Compare Equal-to Unsigned HW, AV p161) + DIP("vcmpequh%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpEQ16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x086: // vcmpequw (Compare Equal-to Unsigned W, AV p162) + DIP("vcmpequw%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpEQ32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x206: // vcmpgtub (Compare Greater-than Unsigned B, AV p168) + DIP("vcmpgtub%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT8Ux16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x246: // vcmpgtuh (Compare Greater-than Unsigned HW, AV p169) + DIP("vcmpgtuh%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x286: // vcmpgtuw (Compare Greater-than Unsigned W, AV p170) + DIP("vcmpgtuw%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x306: // vcmpgtsb (Compare Greater-than Signed B, AV p165) + DIP("vcmpgtsb%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT8Sx16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x346: // vcmpgtsh (Compare Greater-than Signed HW, AV p166) + DIP("vcmpgtsh%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x386: // vcmpgtsw (Compare Greater-than Signed W, AV p167) + DIP("vcmpgtsw%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; + + default: + vex_printf("dis_av_cmp(ppc)(opc2)\n"); + return False; + } + + putVReg( vD_addr, mkexpr(vD) ); + + if (flag_rC) { + set_AV_CR6( mkexpr(vD), True ); + } + return True; + } + + /* + AltiVec Multiply-Sum Instructions + */ + static Bool dis_av_multarith ( UInt theInstr ) + { + /* VA-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UChar vC_addr = ifieldRegC(theInstr); + UChar opc2 = toUChar( IFIELD( theInstr, 0, 6 ) ); + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + IRTemp vC = newTemp(Ity_V128); + IRTemp zeros = newTemp(Ity_V128); + IRTemp aLo = newTemp(Ity_V128); + IRTemp bLo = newTemp(Ity_V128); + IRTemp cLo = newTemp(Ity_V128); + IRTemp zLo = newTemp(Ity_V128); + IRTemp aHi = newTemp(Ity_V128); + IRTemp bHi = newTemp(Ity_V128); + IRTemp cHi = newTemp(Ity_V128); + IRTemp zHi = newTemp(Ity_V128); + IRTemp abEvn = newTemp(Ity_V128); + IRTemp abOdd = newTemp(Ity_V128); + IRTemp z3 = newTemp(Ity_I64); + IRTemp z2 = newTemp(Ity_I64); + IRTemp z1 = newTemp(Ity_I64); + IRTemp z0 = newTemp(Ity_I64); + IRTemp ab7, ab6, ab5, ab4, ab3, ab2, ab1, ab0; + IRTemp c3, c2, c1, c0; + + ab7 = ab6 = ab5 = ab4 = ab3 = ab2 = ab1 = ab0 = IRTemp_INVALID; + c3 = c2 = c1 = c0 = IRTemp_INVALID; + + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + assign( vC, getVReg(vC_addr)); + assign( zeros, unop(Iop_Dup32x4, mkU32(0)) ); + + if (opc1 != 0x4) { + vex_printf("dis_av_multarith(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + /* Multiply-Add */ + case 0x20: { // vmhaddshs (Mult Hi, Add Signed HW Saturate, AV p185) + IRTemp cSigns = newTemp(Ity_V128); + DIP("vmhaddshs v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + assign(cSigns, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vC))); + assign(aLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vA))); + assign(bLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vB))); + assign(cLo, binop(Iop_InterleaveLO16x8, mkexpr(cSigns),mkexpr(vC))); + assign(aHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vA))); + assign(bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB))); + assign(cHi, binop(Iop_InterleaveHI16x8, mkexpr(cSigns),mkexpr(vC))); + + assign( zLo, binop(Iop_Add32x4, mkexpr(cLo), + binop(Iop_SarN32x4, + binop(Iop_MullEven16Sx8, + mkexpr(aLo), mkexpr(bLo)), + mkU8(15))) ); + + assign( zHi, binop(Iop_Add32x4, mkexpr(cHi), + binop(Iop_SarN32x4, + binop(Iop_MullEven16Sx8, + mkexpr(aHi), mkexpr(bHi)), + mkU8(15))) ); + + putVReg( vD_addr, + binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) ); + break; + } + case 0x21: { // vmhraddshs (Mult High Round, Add Signed HW Saturate, AV p186) + IRTemp zKonst = newTemp(Ity_V128); + IRTemp cSigns = newTemp(Ity_V128); + DIP("vmhraddshs v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + assign(cSigns, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vC)) ); + assign(aLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vA))); + assign(bLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vB))); + assign(cLo, binop(Iop_InterleaveLO16x8, mkexpr(cSigns),mkexpr(vC))); + assign(aHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vA))); + assign(bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB))); + assign(cHi, binop(Iop_InterleaveHI16x8, mkexpr(cSigns),mkexpr(vC))); + + /* shifting our const avoids store/load version of Dup */ + assign( zKonst, binop(Iop_ShlN32x4, unop(Iop_Dup32x4, mkU32(0x1)), + mkU8(14)) ); + + assign( zLo, binop(Iop_Add32x4, mkexpr(cLo), + binop(Iop_SarN32x4, + binop(Iop_Add32x4, mkexpr(zKonst), + binop(Iop_MullEven16Sx8, + mkexpr(aLo), mkexpr(bLo))), + mkU8(15))) ); + + assign( zHi, binop(Iop_Add32x4, mkexpr(cHi), + binop(Iop_SarN32x4, + binop(Iop_Add32x4, mkexpr(zKonst), + binop(Iop_MullEven16Sx8, + mkexpr(aHi), mkexpr(bHi))), + mkU8(15))) ); + + putVReg( vD_addr, binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) ); + break; + } + case 0x22: { // vmladduhm (Mult Low, Add Unsigned HW Modulo, AV p194) + DIP("vmladduhm v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + assign(aLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vA))); + assign(bLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vB))); + assign(cLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vC))); + assign(aHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vA))); + assign(bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB))); + assign(cHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vC))); + assign(zLo, binop(Iop_Add32x4, + binop(Iop_MullEven16Ux8, mkexpr(aLo), mkexpr(bLo)), + mkexpr(cLo)) ); + assign(zHi, binop(Iop_Add32x4, + binop(Iop_MullEven16Ux8, mkexpr(aHi), mkexpr(bHi)), + mkexpr(cHi))); + putVReg(vD_addr, binop(Iop_Narrow32x4, mkexpr(zHi), mkexpr(zLo))); + break; + } + + + /* Multiply-Sum */ + case 0x24: { // vmsumubm (Multiply Sum Unsigned B Modulo, AV p204) + IRTemp abEE, abEO, abOE, abOO; + abEE = abEO = abOE = abOO = IRTemp_INVALID; + DIP("vmsumubm v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + + /* multiply vA,vB (unsigned, widening) */ + assign( abEvn, MK_Iop_MullOdd8Ux16( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) ); + + /* evn,odd: V128_16Ux8 -> 2 x V128_32Ux4, zero-extended */ + expand16Ux8( mkexpr(abEvn), &abEE, &abEO ); + expand16Ux8( mkexpr(abOdd), &abOE, &abOO ); + + putVReg( vD_addr, + binop(Iop_Add32x4, mkexpr(vC), + binop(Iop_Add32x4, + binop(Iop_Add32x4, mkexpr(abEE), mkexpr(abEO)), + binop(Iop_Add32x4, mkexpr(abOE), mkexpr(abOO)))) ); + break; + } + case 0x25: { // vmsummbm (Multiply Sum Mixed-Sign B Modulo, AV p201) + IRTemp aEvn, aOdd, bEvn, bOdd; + IRTemp abEE = newTemp(Ity_V128); + IRTemp abEO = newTemp(Ity_V128); + IRTemp abOE = newTemp(Ity_V128); + IRTemp abOO = newTemp(Ity_V128); + aEvn = aOdd = bEvn = bOdd = IRTemp_INVALID; + DIP("vmsummbm v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + + /* sign-extend vA, zero-extend vB, for mixed-sign multiply + (separating out adjacent lanes to different vectors) */ + expand8Sx16( mkexpr(vA), &aEvn, &aOdd ); + expand8Ux16( mkexpr(vB), &bEvn, &bOdd ); + + /* multiply vA, vB, again separating adjacent lanes */ + assign( abEE, MK_Iop_MullOdd16Sx8( mkexpr(aEvn), mkexpr(bEvn) )); + assign( abEO, binop(Iop_MullEven16Sx8, mkexpr(aEvn), mkexpr(bEvn)) ); + assign( abOE, MK_Iop_MullOdd16Sx8( mkexpr(aOdd), mkexpr(bOdd) )); + assign( abOO, binop(Iop_MullEven16Sx8, mkexpr(aOdd), mkexpr(bOdd)) ); + + /* add results together, + vC */ + putVReg( vD_addr, + binop(Iop_QAdd32Sx4, mkexpr(vC), + binop(Iop_QAdd32Sx4, + binop(Iop_QAdd32Sx4, mkexpr(abEE), mkexpr(abEO)), + binop(Iop_QAdd32Sx4, mkexpr(abOE), mkexpr(abOO)))) ); + break; + } + case 0x26: { // vmsumuhm (Multiply Sum Unsigned HW Modulo, AV p205) + DIP("vmsumuhm v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + assign( abEvn, MK_Iop_MullOdd16Ux8( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_Add32x4, mkexpr(vC), + binop(Iop_Add32x4, mkexpr(abEvn), mkexpr(abOdd))) ); + break; + } + case 0x27: { // vmsumuhs (Multiply Sum Unsigned HW Saturate, AV p206) + DIP("vmsumuhs v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + /* widening multiply, separating lanes */ + assign( abEvn, MK_Iop_MullOdd16Ux8(mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) ); + + /* break V128 to 4xI32's, zero-extending to I64's */ + breakV128to4x64U( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 ); + breakV128to4x64U( mkexpr(abOdd), &ab6, &ab4, &ab2, &ab0 ); + breakV128to4x64U( mkexpr(vC), &c3, &c2, &c1, &c0 ); + + /* add lanes */ + assign( z3, binop(Iop_Add64, mkexpr(c3), + binop(Iop_Add64, mkexpr(ab7), mkexpr(ab6)))); + assign( z2, binop(Iop_Add64, mkexpr(c2), + binop(Iop_Add64, mkexpr(ab5), mkexpr(ab4)))); + assign( z1, binop(Iop_Add64, mkexpr(c1), + binop(Iop_Add64, mkexpr(ab3), mkexpr(ab2)))); + assign( z0, binop(Iop_Add64, mkexpr(c0), + binop(Iop_Add64, mkexpr(ab1), mkexpr(ab0)))); + + /* saturate-narrow to 32bit, and combine to V128 */ + putVReg( vD_addr, mkV128from4x64U( mkexpr(z3), mkexpr(z2), + mkexpr(z1), mkexpr(z0)) ); + + break; + } + case 0x28: { // vmsumshm (Multiply Sum Signed HW Modulo, AV p202) + DIP("vmsumshm v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + assign( abEvn, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_Add32x4, mkexpr(vC), + binop(Iop_Add32x4, mkexpr(abOdd), mkexpr(abEvn))) ); + break; + } + case 0x29: { // vmsumshs (Multiply Sum Signed HW Saturate, AV p203) + DIP("vmsumshs v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + /* widening multiply, separating lanes */ + assign( abEvn, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) )); + assign( abOdd, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) ); + + /* break V128 to 4xI32's, sign-extending to I64's */ + breakV128to4x64S( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 ); + breakV128to4x64S( mkexpr(abOdd), &ab6, &ab4, &ab2, &ab0 ); + breakV128to4x64S( mkexpr(vC), &c3, &c2, &c1, &c0 ); + + /* add lanes */ + assign( z3, binop(Iop_Add64, mkexpr(c3), + binop(Iop_Add64, mkexpr(ab7), mkexpr(ab6)))); + assign( z2, binop(Iop_Add64, mkexpr(c2), + binop(Iop_Add64, mkexpr(ab5), mkexpr(ab4)))); + assign( z1, binop(Iop_Add64, mkexpr(c1), + binop(Iop_Add64, mkexpr(ab3), mkexpr(ab2)))); + assign( z0, binop(Iop_Add64, mkexpr(c0), + binop(Iop_Add64, mkexpr(ab1), mkexpr(ab0)))); + + /* saturate-narrow to 32bit, and combine to V128 */ + putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2), + mkexpr(z1), mkexpr(z0)) ); + break; + } + default: + vex_printf("dis_av_multarith(ppc)(opc2)\n"); + return False; + } + return True; + } + + /* + AltiVec Shift/Rotate Instructions + */ + static Bool dis_av_shift ( UInt theInstr ) + { + /* VX-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 11 ); + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + + if (opc1 != 0x4){ + vex_printf("dis_av_shift(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + /* Rotate */ + case 0x004: // vrlb (Rotate Left Integer B, AV p234) + DIP("vrlb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Rol8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x044: // vrlh (Rotate Left Integer HW, AV p235) + DIP("vrlh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Rol16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x084: // vrlw (Rotate Left Integer W, AV p236) + DIP("vrlw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Rol32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + + /* Shift Left */ + case 0x104: // vslb (Shift Left Integer B, AV p240) + DIP("vslb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shl8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x144: // vslh (Shift Left Integer HW, AV p242) + DIP("vslh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shl16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x184: // vslw (Shift Left Integer W, AV p244) + DIP("vslw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shl32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x1C4: { // vsl (Shift Left, AV p239) + IRTemp sh = newTemp(Ity_I8); + DIP("vsl v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( sh, binop(Iop_And8, mkU8(0x7), + unop(Iop_32to8, + unop(Iop_V128to32, mkexpr(vB)))) ); + putVReg( vD_addr, + binop(Iop_ShlV128, mkexpr(vA), mkexpr(sh)) ); + break; + } + case 0x40C: { // vslo (Shift Left by Octet, AV p243) + IRTemp sh = newTemp(Ity_I8); + DIP("vslo v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( sh, binop(Iop_And8, mkU8(0x78), + unop(Iop_32to8, + unop(Iop_V128to32, mkexpr(vB)))) ); + putVReg( vD_addr, + binop(Iop_ShlV128, mkexpr(vA), mkexpr(sh)) ); + break; + } + + + /* Shift Right */ + case 0x204: // vsrb (Shift Right B, AV p256) + DIP("vsrb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shr8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x244: // vsrh (Shift Right HW, AV p257) + DIP("vsrh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shr16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x284: // vsrw (Shift Right W, AV p259) + DIP("vsrw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shr32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x2C4: { // vsr (Shift Right, AV p251) + IRTemp sh = newTemp(Ity_I8); + DIP("vsr v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( sh, binop(Iop_And8, mkU8(0x7), + unop(Iop_32to8, + unop(Iop_V128to32, mkexpr(vB)))) ); + putVReg( vD_addr, + binop(Iop_ShrV128, mkexpr(vA), mkexpr(sh)) ); + break; + } + case 0x304: // vsrab (Shift Right Alg B, AV p253) + DIP("vsrab v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sar8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x344: // vsrah (Shift Right Alg HW, AV p254) + DIP("vsrah v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sar16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x384: // vsraw (Shift Right Alg W, AV p255) + DIP("vsraw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sar32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x44C: { // vsro (Shift Right by Octet, AV p258) + IRTemp sh = newTemp(Ity_I8); + DIP("vsro v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( sh, binop(Iop_And8, mkU8(0x78), + unop(Iop_32to8, + unop(Iop_V128to32, mkexpr(vB)))) ); + putVReg( vD_addr, + binop(Iop_ShrV128, mkexpr(vA), mkexpr(sh)) ); + break; + } + + default: + vex_printf("dis_av_shift(ppc)(opc2)\n"); + return False; + } + return True; + } + + /* + AltiVec Permute Instructions + */ + static Bool dis_av_permute ( UInt theInstr ) + { + /* VA-Form, VX-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar UIMM_5 = vA_addr; + UChar vB_addr = ifieldRegB(theInstr); + UChar vC_addr = ifieldRegC(theInstr); + UChar b10 = ifieldBIT10(theInstr); + UChar SHB_uimm4 = toUChar( IFIELD( theInstr, 6, 4 ) ); + UInt opc2 = toUChar( IFIELD( theInstr, 0, 6 ) ); + + UChar SIMM_8 = extend_s_5to8(UIMM_5); + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + IRTemp vC = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + assign( vC, getVReg(vC_addr)); + + if (opc1 != 0x4) { + vex_printf("dis_av_permute(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + case 0x2A: // vsel (Conditional Select, AV p238) + DIP("vsel v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); + /* vD = (vA & ~vC) | (vB & vC) */ + putVReg( vD_addr, binop(Iop_OrV128, + binop(Iop_AndV128, mkexpr(vA), unop(Iop_NotV128, mkexpr(vC))), + binop(Iop_AndV128, mkexpr(vB), mkexpr(vC))) ); + return True; + + case 0x2B: { // vperm (Permute, AV p218) + /* limited to two args for IR, so have to play games... */ + IRTemp a_perm = newTemp(Ity_V128); + IRTemp b_perm = newTemp(Ity_V128); + IRTemp mask = newTemp(Ity_V128); + IRTemp vC_andF = newTemp(Ity_V128); + DIP("vperm v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vB_addr, vC_addr); + /* Limit the Perm8x16 steering values to 0 .. 15 as that is what + IR specifies, and also to hide irrelevant bits from + memcheck */ + assign( vC_andF, + binop(Iop_AndV128, mkexpr(vC), + unop(Iop_Dup8x16, mkU8(0xF))) ); + assign( a_perm, + binop(Iop_Perm8x16, mkexpr(vA), mkexpr(vC_andF)) ); + assign( b_perm, + binop(Iop_Perm8x16, mkexpr(vB), mkexpr(vC_andF)) ); + // mask[i8] = (vC[i8]_4 == 1) ? 0xFF : 0x0 + assign( mask, binop(Iop_SarN8x16, + binop(Iop_ShlN8x16, mkexpr(vC), mkU8(3)), + mkU8(7)) ); + // dst = (a & ~mask) | (b & mask) + putVReg( vD_addr, binop(Iop_OrV128, + binop(Iop_AndV128, mkexpr(a_perm), + unop(Iop_NotV128, mkexpr(mask))), + binop(Iop_AndV128, mkexpr(b_perm), + mkexpr(mask))) ); + return True; + } + case 0x2C: // vsldoi (Shift Left Double by Octet Imm, AV p241) + if (b10 != 0) { + vex_printf("dis_av_permute(ppc)(vsldoi)\n"); + return False; + } + DIP("vsldoi v%d,v%d,v%d,%d\n", + vD_addr, vA_addr, vB_addr, SHB_uimm4); + if (SHB_uimm4 == 0) + putVReg( vD_addr, mkexpr(vA) ); + else + putVReg( vD_addr, + binop(Iop_OrV128, + binop(Iop_ShlV128, mkexpr(vA), mkU8(SHB_uimm4*8)), + binop(Iop_ShrV128, mkexpr(vB), mkU8((16-SHB_uimm4)*8))) ); + return True; + + default: + break; // Fall through... + } + + opc2 = IFIELD( theInstr, 0, 11 ); + switch (opc2) { + + /* Merge */ + case 0x00C: // vmrghb (Merge High B, AV p195) + DIP("vmrghb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_InterleaveHI8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x04C: // vmrghh (Merge High HW, AV p196) + DIP("vmrghh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_InterleaveHI16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x08C: // vmrghw (Merge High W, AV p197) + DIP("vmrghw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_InterleaveHI32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x10C: // vmrglb (Merge Low B, AV p198) + DIP("vmrglb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_InterleaveLO8x16, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x14C: // vmrglh (Merge Low HW, AV p199) + DIP("vmrglh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_InterleaveLO16x8, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x18C: // vmrglw (Merge Low W, AV p200) + DIP("vmrglw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_InterleaveLO32x4, mkexpr(vA), mkexpr(vB)) ); + break; + + + /* Splat */ + case 0x20C: { // vspltb (Splat Byte, AV p245) + /* vD = Dup8x16( vB[UIMM_5] ) */ + UChar sh_uimm = (15 - (UIMM_5 & 15)) * 8; + DIP("vspltb v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); + putVReg( vD_addr, unop(Iop_Dup8x16, + unop(Iop_32to8, unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) ); + break; + } + case 0x24C: { // vsplth (Splat Half Word, AV p246) + UChar sh_uimm = (7 - (UIMM_5 & 7)) * 16; + DIP("vsplth v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); + putVReg( vD_addr, unop(Iop_Dup16x8, + unop(Iop_32to16, unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) ); + break; + } + case 0x28C: { // vspltw (Splat Word, AV p250) + /* vD = Dup32x4( vB[UIMM_5] ) */ + UChar sh_uimm = (3 - (UIMM_5 & 3)) * 32; + DIP("vspltw v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); + putVReg( vD_addr, unop(Iop_Dup32x4, + unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm)))) ); + break; + } + case 0x30C: // vspltisb (Splat Immediate Signed B, AV p247) + DIP("vspltisb v%d,%d\n", vD_addr, (Char)SIMM_8); + putVReg( vD_addr, unop(Iop_Dup8x16, mkU8(SIMM_8)) ); + break; + + case 0x34C: // vspltish (Splat Immediate Signed HW, AV p248) + DIP("vspltish v%d,%d\n", vD_addr, (Char)SIMM_8); + putVReg( vD_addr, + unop(Iop_Dup16x8, mkU16(extend_s_8to32(SIMM_8))) ); + break; + + case 0x38C: // vspltisw (Splat Immediate Signed W, AV p249) + DIP("vspltisw v%d,%d\n", vD_addr, (Char)SIMM_8); + putVReg( vD_addr, + unop(Iop_Dup32x4, mkU32(extend_s_8to32(SIMM_8))) ); + break; + + default: + vex_printf("dis_av_permute(ppc)(opc2)\n"); + return False; + } + return True; + } + + /* + AltiVec Pack/Unpack Instructions + */ + static Bool dis_av_pack ( UInt theInstr ) + { + /* VX-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 11 ); + + IRTemp signs = IRTemp_INVALID; + IRTemp zeros = IRTemp_INVALID; + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + + if (opc1 != 0x4) { + vex_printf("dis_av_pack(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + /* Packing */ + case 0x00E: // vpkuhum (Pack Unsigned HW Unsigned Modulo, AV p224) + DIP("vpkuhum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Narrow16x8, mkexpr(vA), mkexpr(vB)) ); + return True; + + case 0x04E: // vpkuwum (Pack Unsigned W Unsigned Modulo, AV p226) + DIP("vpkuwum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Narrow32x4, mkexpr(vA), mkexpr(vB)) ); + return True; + + case 0x08E: // vpkuhus (Pack Unsigned HW Unsigned Saturate, AV p225) + DIP("vpkuhus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_QNarrow16Ux8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + return True; + + case 0x0CE: // vpkuwus (Pack Unsigned W Unsigned Saturate, AV p227) + DIP("vpkuwus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_QNarrow32Ux4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + return True; + + case 0x10E: { // vpkshus (Pack Signed HW Unsigned Saturate, AV p221) + // This insn does a signed->unsigned saturating conversion. + // Conversion done here, then uses unsigned->unsigned vpk insn: + // => UnsignedSaturatingNarrow( x & ~ (x >>s 15) ) + IRTemp vA_tmp = newTemp(Ity_V128); + IRTemp vB_tmp = newTemp(Ity_V128); + DIP("vpkshus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( vA_tmp, binop(Iop_AndV128, mkexpr(vA), + unop(Iop_NotV128, + binop(Iop_SarN16x8, + mkexpr(vA), mkU8(15)))) ); + assign( vB_tmp, binop(Iop_AndV128, mkexpr(vB), + unop(Iop_NotV128, + binop(Iop_SarN16x8, + mkexpr(vB), mkU8(15)))) ); + putVReg( vD_addr, binop(Iop_QNarrow16Ux8, + mkexpr(vA_tmp), mkexpr(vB_tmp)) ); + // TODO: set VSCR[SAT] + return True; + } + case 0x14E: { // vpkswus (Pack Signed W Unsigned Saturate, AV p223) + // This insn does a signed->unsigned saturating conversion. + // Conversion done here, then uses unsigned->unsigned vpk insn: + // => UnsignedSaturatingNarrow( x & ~ (x >>s 31) ) + IRTemp vA_tmp = newTemp(Ity_V128); + IRTemp vB_tmp = newTemp(Ity_V128); + DIP("vpkswus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( vA_tmp, binop(Iop_AndV128, mkexpr(vA), + unop(Iop_NotV128, + binop(Iop_SarN32x4, + mkexpr(vA), mkU8(31)))) ); + assign( vB_tmp, binop(Iop_AndV128, mkexpr(vB), + unop(Iop_NotV128, + binop(Iop_SarN32x4, + mkexpr(vB), mkU8(31)))) ); + putVReg( vD_addr, binop(Iop_QNarrow32Ux4, + mkexpr(vA_tmp), mkexpr(vB_tmp)) ); + // TODO: set VSCR[SAT] + return True; + } + case 0x18E: // vpkshss (Pack Signed HW Signed Saturate, AV p220) + DIP("vpkshss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_QNarrow16Sx8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + return True; + + case 0x1CE: // vpkswss (Pack Signed W Signed Saturate, AV p222) + DIP("vpkswss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_QNarrow32Sx4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + return True; + + case 0x30E: { // vpkpx (Pack Pixel, AV p219) + /* CAB: Worth a new primop? */ + /* Using shifts to compact pixel elements, then packing them */ + IRTemp a1 = newTemp(Ity_V128); + IRTemp a2 = newTemp(Ity_V128); + IRTemp a3 = newTemp(Ity_V128); + IRTemp a_tmp = newTemp(Ity_V128); + IRTemp b1 = newTemp(Ity_V128); + IRTemp b2 = newTemp(Ity_V128); + IRTemp b3 = newTemp(Ity_V128); + IRTemp b_tmp = newTemp(Ity_V128); + DIP("vpkpx v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( a1, binop(Iop_ShlN16x8, + binop(Iop_ShrN32x4, mkexpr(vA), mkU8(19)), + mkU8(10)) ); + assign( a2, binop(Iop_ShlN16x8, + binop(Iop_ShrN16x8, mkexpr(vA), mkU8(11)), + mkU8(5)) ); + assign( a3, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(vA), mkU8(8)), + mkU8(11)) ); + assign( a_tmp, binop(Iop_OrV128, mkexpr(a1), + binop(Iop_OrV128, mkexpr(a2), mkexpr(a3))) ); + + assign( b1, binop(Iop_ShlN16x8, + binop(Iop_ShrN32x4, mkexpr(vB), mkU8(19)), + mkU8(10)) ); + assign( b2, binop(Iop_ShlN16x8, + binop(Iop_ShrN16x8, mkexpr(vB), mkU8(11)), + mkU8(5)) ); + assign( b3, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(vB), mkU8(8)), + mkU8(11)) ); + assign( b_tmp, binop(Iop_OrV128, mkexpr(b1), + binop(Iop_OrV128, mkexpr(b2), mkexpr(b3))) ); + + putVReg( vD_addr, binop(Iop_Narrow32x4, + mkexpr(a_tmp), mkexpr(b_tmp)) ); + return True; + } + + default: + break; // Fall through... + } + + + if (vA_addr != 0) { + vex_printf("dis_av_pack(ppc)(vA_addr)\n"); + return False; + } + + signs = newTemp(Ity_V128); + zeros = newTemp(Ity_V128); + assign( zeros, unop(Iop_Dup32x4, mkU32(0)) ); + + switch (opc2) { + /* Unpacking */ + case 0x20E: { // vupkhsb (Unpack High Signed B, AV p277) + DIP("vupkhsb v%d,v%d\n", vD_addr, vB_addr); + assign( signs, binop(Iop_CmpGT8Sx16, mkexpr(zeros), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_InterleaveHI8x16, mkexpr(signs), mkexpr(vB)) ); + break; + } + case 0x24E: { // vupkhsh (Unpack High Signed HW, AV p278) + DIP("vupkhsh v%d,v%d\n", vD_addr, vB_addr); + assign( signs, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_InterleaveHI16x8, mkexpr(signs), mkexpr(vB)) ); + break; + } + case 0x28E: { // vupklsb (Unpack Low Signed B, AV p280) + DIP("vupklsb v%d,v%d\n", vD_addr, vB_addr); + assign( signs, binop(Iop_CmpGT8Sx16, mkexpr(zeros), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_InterleaveLO8x16, mkexpr(signs), mkexpr(vB)) ); + break; + } + case 0x2CE: { // vupklsh (Unpack Low Signed HW, AV p281) + DIP("vupklsh v%d,v%d\n", vD_addr, vB_addr); + assign( signs, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_InterleaveLO16x8, mkexpr(signs), mkexpr(vB)) ); + break; + } + case 0x34E: { // vupkhpx (Unpack High Pixel16, AV p276) + /* CAB: Worth a new primop? */ + /* Using shifts to isolate pixel elements, then expanding them */ + IRTemp z0 = newTemp(Ity_V128); + IRTemp z1 = newTemp(Ity_V128); + IRTemp z01 = newTemp(Ity_V128); + IRTemp z2 = newTemp(Ity_V128); + IRTemp z3 = newTemp(Ity_V128); + IRTemp z23 = newTemp(Ity_V128); + DIP("vupkhpx v%d,v%d\n", vD_addr, vB_addr); + assign( z0, binop(Iop_ShlN16x8, + binop(Iop_SarN16x8, mkexpr(vB), mkU8(15)), + mkU8(8)) ); + assign( z1, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(vB), mkU8(1)), + mkU8(11)) ); + assign( z01, binop(Iop_InterleaveHI16x8, mkexpr(zeros), + binop(Iop_OrV128, mkexpr(z0), mkexpr(z1))) ); + assign( z2, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + binop(Iop_ShrN16x8, mkexpr(vB), mkU8(5)), + mkU8(11)), + mkU8(3)) ); + assign( z3, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(vB), mkU8(11)), + mkU8(11)) ); + assign( z23, binop(Iop_InterleaveHI16x8, mkexpr(zeros), + binop(Iop_OrV128, mkexpr(z2), mkexpr(z3))) ); + putVReg( vD_addr, + binop(Iop_OrV128, + binop(Iop_ShlN32x4, mkexpr(z01), mkU8(16)), + mkexpr(z23)) ); + break; + } + case 0x3CE: { // vupklpx (Unpack Low Pixel16, AV p279) + /* identical to vupkhpx, except interleaving LO */ + IRTemp z0 = newTemp(Ity_V128); + IRTemp z1 = newTemp(Ity_V128); + IRTemp z01 = newTemp(Ity_V128); + IRTemp z2 = newTemp(Ity_V128); + IRTemp z3 = newTemp(Ity_V128); + IRTemp z23 = newTemp(Ity_V128); + DIP("vupklpx v%d,v%d\n", vD_addr, vB_addr); + assign( z0, binop(Iop_ShlN16x8, + binop(Iop_SarN16x8, mkexpr(vB), mkU8(15)), + mkU8(8)) ); + assign( z1, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(vB), mkU8(1)), + mkU8(11)) ); + assign( z01, binop(Iop_InterleaveLO16x8, mkexpr(zeros), + binop(Iop_OrV128, mkexpr(z0), mkexpr(z1))) ); + assign( z2, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + binop(Iop_ShrN16x8, mkexpr(vB), mkU8(5)), + mkU8(11)), + mkU8(3)) ); + assign( z3, binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(vB), mkU8(11)), + mkU8(11)) ); + assign( z23, binop(Iop_InterleaveLO16x8, mkexpr(zeros), + binop(Iop_OrV128, mkexpr(z2), mkexpr(z3))) ); + putVReg( vD_addr, + binop(Iop_OrV128, + binop(Iop_ShlN32x4, mkexpr(z01), mkU8(16)), + mkexpr(z23)) ); + break; + } + default: + vex_printf("dis_av_pack(ppc)(opc2)\n"); + return False; + } + return True; + } + + + /* + AltiVec Floating Point Arithmetic Instructions + */ + static Bool dis_av_fp_arith ( UInt theInstr ) + { + /* VA-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UChar vC_addr = ifieldRegC(theInstr); + UInt opc2=0; + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + IRTemp vC = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + assign( vC, getVReg(vC_addr)); + + if (opc1 != 0x4) { + vex_printf("dis_av_fp_arith(ppc)(instr)\n"); + return False; + } + + opc2 = IFIELD( theInstr, 0, 6 ); + switch (opc2) { + case 0x2E: // vmaddfp (Multiply Add FP, AV p177) + DIP("vmaddfp v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vC_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_Add32Fx4, mkexpr(vB), + binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) ); + return True; + + case 0x2F: { // vnmsubfp (Negative Multiply-Subtract FP, AV p215) + DIP("vnmsubfp v%d,v%d,v%d,v%d\n", + vD_addr, vA_addr, vC_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_Sub32Fx4, + mkexpr(vB), + binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) ); + return True; + } + + default: + break; // Fall through... + } + + opc2 = IFIELD( theInstr, 0, 11 ); + switch (opc2) { + case 0x00A: // vaddfp (Add FP, AV p137) + DIP("vaddfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Add32Fx4, mkexpr(vA), mkexpr(vB)) ); + return True; + + case 0x04A: // vsubfp (Subtract FP, AV p261) + DIP("vsubfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sub32Fx4, mkexpr(vA), mkexpr(vB)) ); + return True; + + case 0x40A: // vmaxfp (Maximum FP, AV p178) + DIP("vmaxfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max32Fx4, mkexpr(vA), mkexpr(vB)) ); + return True; + + case 0x44A: // vminfp (Minimum FP, AV p187) + DIP("vminfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min32Fx4, mkexpr(vA), mkexpr(vB)) ); + return True; + + default: + break; // Fall through... + } + + + if (vA_addr != 0) { + vex_printf("dis_av_fp_arith(ppc)(vA_addr)\n"); + return False; + } + + switch (opc2) { + case 0x10A: // vrefp (Reciprocal Esimate FP, AV p228) + DIP("vrefp v%d,v%d\n", vD_addr, vB_addr); + putVReg( vD_addr, unop(Iop_Recip32Fx4, mkexpr(vB)) ); + return True; + + case 0x14A: // vrsqrtefp (Reciprocal Sqrt Estimate FP, AV p237) + DIP("vrsqrtefp v%d,v%d\n", vD_addr, vB_addr); + putVReg( vD_addr, unop(Iop_RSqrt32Fx4, mkexpr(vB)) ); + return True; + + case 0x18A: // vexptefp (2 Raised to the Exp Est FP, AV p173) + DIP("vexptefp v%d,v%d\n", vD_addr, vB_addr); + DIP(" => not implemented\n"); + return False; + + case 0x1CA: // vlogefp (Log2 Estimate FP, AV p175) + DIP("vlogefp v%d,v%d\n", vD_addr, vB_addr); + DIP(" => not implemented\n"); + return False; + + default: + vex_printf("dis_av_fp_arith(ppc)(opc2=0x%x)\n",opc2); + return False; + } + return True; + } + + /* + AltiVec Floating Point Compare Instructions + */ + static Bool dis_av_fp_cmp ( UInt theInstr ) + { + /* VXR-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UChar flag_rC = ifieldBIT10(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 10 ); + + Bool cmp_bounds = False; + + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + IRTemp vD = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + + if (opc1 != 0x4) { + vex_printf("dis_av_fp_cmp(ppc)(instr)\n"); + return False; + } + + switch (opc2) { + case 0x0C6: // vcmpeqfp (Compare Equal-to FP, AV p159) + DIP("vcmpeqfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpEQ32Fx4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x1C6: // vcmpgefp (Compare Greater-than-or-Equal-to, AV p163) + DIP("vcmpgefp%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGE32Fx4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x2C6: // vcmpgtfp (Compare Greater-than FP, AV p164) + DIP("vcmpgtfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT32Fx4, mkexpr(vA), mkexpr(vB)) ); + break; + + case 0x3C6: { // vcmpbfp (Compare Bounds FP, AV p157) + IRTemp gt = newTemp(Ity_V128); + IRTemp lt = newTemp(Ity_V128); + IRTemp zeros = newTemp(Ity_V128); + DIP("vcmpbfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + cmp_bounds = True; + assign( zeros, unop(Iop_Dup32x4, mkU32(0)) ); + + /* Note: making use of fact that the ppc backend for compare insns + return zero'd lanes if either of the corresponding arg lanes is + a nan. + + Perhaps better to have an irop Iop_isNan32Fx4, but then we'd + need this for the other compares too (vcmpeqfp etc)... + Better still, tighten down the spec for compare irops. + */ + assign( gt, unop(Iop_NotV128, + binop(Iop_CmpLE32Fx4, mkexpr(vA), mkexpr(vB))) ); + assign( lt, unop(Iop_NotV128, + binop(Iop_CmpGE32Fx4, mkexpr(vA), + binop(Iop_Sub32Fx4, mkexpr(zeros), + mkexpr(vB)))) ); + + // finally, just shift gt,lt to correct position + assign( vD, binop(Iop_ShlN32x4, + binop(Iop_OrV128, + binop(Iop_AndV128, mkexpr(gt), + unop(Iop_Dup32x4, mkU32(0x2))), + binop(Iop_AndV128, mkexpr(lt), + unop(Iop_Dup32x4, mkU32(0x1)))), + mkU8(30)) ); + break; + } + + default: + vex_printf("dis_av_fp_cmp(ppc)(opc2)\n"); + return False; + } + + putVReg( vD_addr, mkexpr(vD) ); + + if (flag_rC) { + set_AV_CR6( mkexpr(vD), !cmp_bounds ); + } + return True; + } + + /* + AltiVec Floating Point Convert/Round Instructions + */ + static Bool dis_av_fp_convert ( UInt theInstr ) + { + /* VX-Form */ + UChar opc1 = ifieldOPC(theInstr); + UChar vD_addr = ifieldRegDS(theInstr); + UChar UIMM_5 = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + UInt opc2 = IFIELD( theInstr, 0, 11 ); + + IRTemp vB = newTemp(Ity_V128); + IRTemp vScale = newTemp(Ity_V128); + IRTemp vInvScale = newTemp(Ity_V128); + + float scale, inv_scale; + + assign( vB, getVReg(vB_addr)); + + /* scale = 2^UIMM, cast to float, reinterpreted as uint */ + scale = (float)( (unsigned int) 1<hwcaps; + Long delta; + + /* What insn variants are we supporting today? */ + if (mode64) { + allow_F = True; + allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC64_V)); + allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC64_FX)); + allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX)); + } else { + allow_F = (0 != (hwcaps & VEX_HWCAPS_PPC32_F)); + allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC32_V)); + allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC32_FX)); + allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX)); + } + + /* The running delta */ + delta = (Long)mkSzAddr(ty, (ULong)delta64); + + /* Set result defaults. */ + dres.whatNext = Dis_Continue; + dres.len = 0; + dres.continueAt = 0; + + /* At least this is simple on PPC32: insns are all 4 bytes long, and + 4-aligned. So just fish the whole thing out of memory right now + and have done. */ + theInstr = getUIntBigendianly( (UChar*)(&guest_code[delta]) ); + + if (0) vex_printf("insn: 0x%x\n", theInstr); + + DIP("\t0x%llx: ", (ULong)guest_CIA_curr_instr); + + /* We may be asked to update the guest CIA before going further. */ + if (put_IP) + putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) ); + + /* Spot "Special" instructions (see comment at top of file). */ + { + UChar* code = (UChar*)(guest_code + delta); + /* Spot the 16-byte preamble: + 32-bit mode: + 54001800 rlwinm 0,0,3,0,0 + 54006800 rlwinm 0,0,13,0,0 + 5400E800 rlwinm 0,0,29,0,0 + 54009800 rlwinm 0,0,19,0,0 + 64-bit mode: + 78001800 rotldi 0,0,3 + 78006800 rotldi 0,0,13 + 7800E802 rotldi 0,0,61 + 78009802 rotldi 0,0,51 + */ + UInt word1 = mode64 ? 0x78001800 : 0x54001800; + UInt word2 = mode64 ? 0x78006800 : 0x54006800; + UInt word3 = mode64 ? 0x7800E802 : 0x5400E800; + UInt word4 = mode64 ? 0x78009802 : 0x54009800; + if (getUIntBigendianly(code+ 0) == word1 && + getUIntBigendianly(code+ 4) == word2 && + getUIntBigendianly(code+ 8) == word3 && + getUIntBigendianly(code+12) == word4) { + /* Got a "Special" instruction preamble. Which one is it? */ + if (getUIntBigendianly(code+16) == 0x7C210B78 /* or 1,1,1 */) { + /* %R3 = client_request ( %R4 ) */ + DIP("r3 = client_request ( %%r4 )\n"); + delta += 20; + irsb->next = mkSzImm( ty, guest_CIA_bbstart + delta ); + irsb->jumpkind = Ijk_ClientReq; + dres.whatNext = Dis_StopHere; + goto decode_success; + } + else + if (getUIntBigendianly(code+16) == 0x7C421378 /* or 2,2,2 */) { + /* %R3 = guest_NRADDR */ + DIP("r3 = guest_NRADDR\n"); + delta += 20; + dres.len = 20; + putIReg(3, IRExpr_Get( OFFB_NRADDR, ty )); + goto decode_success; + } + else + if (getUIntBigendianly(code+16) == 0x7C631B78 /* or 3,3,3 */) { + /* branch-and-link-to-noredir %R11 */ + DIP("branch-and-link-to-noredir r11\n"); + delta += 20; + putGST( PPC_GST_LR, mkSzImm(ty, guest_CIA_bbstart + (Long)delta) ); + irsb->next = getIReg(11); + irsb->jumpkind = Ijk_NoRedir; + dres.whatNext = Dis_StopHere; + goto decode_success; + } + else + if (getUIntBigendianly(code+16) == 0x7C842378 /* or 4,4,4 */) { + /* %R3 = guest_NRADDR_GPR2 */ + DIP("r3 = guest_NRADDR_GPR2\n"); + delta += 20; + dres.len = 20; + putIReg(3, IRExpr_Get( OFFB_NRADDR_GPR2, ty )); + goto decode_success; + } + /* We don't know what it is. Set opc1/opc2 so decode_failure + can print the insn following the Special-insn preamble. */ + theInstr = getUIntBigendianly(code+16); + opc1 = ifieldOPC(theInstr); + opc2 = ifieldOPClo10(theInstr); + goto decode_failure; + /*NOTREACHED*/ + } + } + + opc1 = ifieldOPC(theInstr); + opc2 = ifieldOPClo10(theInstr); + + // Note: all 'reserved' bits must be cleared, else invalid + switch (opc1) { + + /* Integer Arithmetic Instructions */ + case 0x0C: case 0x0D: case 0x0E: // addic, addic., addi + case 0x0F: case 0x07: case 0x08: // addis, mulli, subfic + if (dis_int_arith( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Compare Instructions */ + case 0x0B: case 0x0A: // cmpi, cmpli + if (dis_int_cmp( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Logical Instructions */ + case 0x1C: case 0x1D: case 0x18: // andi., andis., ori + case 0x19: case 0x1A: case 0x1B: // oris, xori, xoris + if (dis_int_logic( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Rotate Instructions */ + case 0x14: case 0x15: case 0x17: // rlwimi, rlwinm, rlwnm + if (dis_int_rot( theInstr )) goto decode_success; + goto decode_failure; + + /* 64bit Integer Rotate Instructions */ + case 0x1E: // rldcl, rldcr, rldic, rldicl, rldicr, rldimi + if (dis_int_rot( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Load Instructions */ + case 0x22: case 0x23: case 0x2A: // lbz, lbzu, lha + case 0x2B: case 0x28: case 0x29: // lhau, lhz, lhzu + case 0x20: case 0x21: // lwz, lwzu + if (dis_int_load( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Store Instructions */ + case 0x26: case 0x27: case 0x2C: // stb, stbu, sth + case 0x2D: case 0x24: case 0x25: // sthu, stw, stwu + if (dis_int_store( theInstr, abiinfo )) goto decode_success; + goto decode_failure; + + /* Integer Load and Store Multiple Instructions */ + case 0x2E: case 0x2F: // lmw, stmw + if (dis_int_ldst_mult( theInstr )) goto decode_success; + goto decode_failure; + + /* Branch Instructions */ + case 0x12: case 0x10: // b, bc + if (dis_branch(theInstr, abiinfo, &dres, + resteerOkFn, callback_opaque)) + goto decode_success; + goto decode_failure; + + /* System Linkage Instructions */ + case 0x11: // sc + if (dis_syslink(theInstr, abiinfo, &dres)) goto decode_success; + goto decode_failure; + + /* Trap Instructions */ + case 0x02: case 0x03: // tdi, twi + if (dis_trapi(theInstr, &dres)) goto decode_success; + goto decode_failure; + + /* Floating Point Load Instructions */ + case 0x30: case 0x31: case 0x32: // lfs, lfsu, lfd + case 0x33: // lfdu + if (!allow_F) goto decode_noF; + if (dis_fp_load( theInstr )) goto decode_success; + goto decode_failure; + + /* Floating Point Store Instructions */ + case 0x34: case 0x35: case 0x36: // stfsx, stfsux, stfdx + case 0x37: // stfdux + if (!allow_F) goto decode_noF; + if (dis_fp_store( theInstr )) goto decode_success; + goto decode_failure; + + /* 64bit Integer Loads */ + case 0x3A: // ld, ldu, lwa + if (!mode64) goto decode_failure; + if (dis_int_load( theInstr )) goto decode_success; + goto decode_failure; + + case 0x3B: + if (!allow_F) goto decode_noF; + opc2 = IFIELD(theInstr, 1, 5); + switch (opc2) { + /* Floating Point Arith Instructions */ + case 0x12: case 0x14: case 0x15: // fdivs, fsubs, fadds + case 0x19: // fmuls + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + case 0x16: // fsqrts + if (!allow_FX) goto decode_noFX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + case 0x18: // fres + if (!allow_GX) goto decode_noGX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + + /* Floating Point Mult-Add Instructions */ + case 0x1C: case 0x1D: case 0x1E: // fmsubs, fmadds, fnmsubs + case 0x1F: // fnmadds + if (dis_fp_multadd(theInstr)) goto decode_success; + goto decode_failure; + + case 0x1A: // frsqrtes + if (!allow_GX) goto decode_noGX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + + default: + goto decode_failure; + } + break; + + /* 64bit Integer Stores */ + case 0x3E: // std, stdu + if (!mode64) goto decode_failure; + if (dis_int_store( theInstr, abiinfo )) goto decode_success; + goto decode_failure; + + case 0x3F: + if (!allow_F) goto decode_noF; + /* Instrs using opc[1:5] never overlap instrs using opc[1:10], + so we can simply fall through the first switch statement */ + + opc2 = IFIELD(theInstr, 1, 5); + switch (opc2) { + /* Floating Point Arith Instructions */ + case 0x12: case 0x14: case 0x15: // fdiv, fsub, fadd + case 0x19: // fmul + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + case 0x16: // fsqrt + if (!allow_FX) goto decode_noFX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + case 0x17: case 0x1A: // fsel, frsqrte + if (!allow_GX) goto decode_noGX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + + /* Floating Point Mult-Add Instructions */ + case 0x1C: case 0x1D: case 0x1E: // fmsub, fmadd, fnmsub + case 0x1F: // fnmadd + if (dis_fp_multadd(theInstr)) goto decode_success; + goto decode_failure; + + case 0x18: // fre + if (!allow_GX) goto decode_noGX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + + default: + break; // Fall through + } + + opc2 = IFIELD(theInstr, 1, 10); + switch (opc2) { + /* Floating Point Compare Instructions */ + case 0x000: // fcmpu + case 0x020: // fcmpo + if (dis_fp_cmp(theInstr)) goto decode_success; + goto decode_failure; + + /* Floating Point Rounding/Conversion Instructions */ + case 0x00C: // frsp + case 0x00E: // fctiw + case 0x00F: // fctiwz + case 0x32E: // fctid + case 0x32F: // fctidz + case 0x34E: // fcfid + if (dis_fp_round(theInstr)) goto decode_success; + goto decode_failure; + + /* Power6 rounding stuff */ + case 0x1E8: // frim + case 0x1C8: // frip + case 0x188: // frin + case 0x1A8: // friz + if (dis_P6(theInstr, allow_F, allow_V, allow_FX, allow_GX)) + goto decode_success; + goto decode_failure; + + /* Floating Point Move Instructions */ + case 0x028: // fneg + case 0x048: // fmr + case 0x088: // fnabs + case 0x108: // fabs + if (dis_fp_move( theInstr )) goto decode_success; + goto decode_failure; + + /* Floating Point Status/Control Register Instructions */ + case 0x026: // mtfsb1 + case 0x040: // mcrfs + case 0x046: // mtfsb0 + case 0x086: // mtfsfi + case 0x247: // mffs + case 0x2C7: // mtfsf + if (dis_fp_scr( theInstr )) goto decode_success; + goto decode_failure; + + default: + goto decode_failure; + } + break; + + case 0x13: + switch (opc2) { + + /* Condition Register Logical Instructions */ + case 0x101: case 0x081: case 0x121: // crand, crandc, creqv + case 0x0E1: case 0x021: case 0x1C1: // crnand, crnor, cror + case 0x1A1: case 0x0C1: case 0x000: // crorc, crxor, mcrf + if (dis_cond_logic( theInstr )) goto decode_success; + goto decode_failure; + + /* Branch Instructions */ + case 0x210: case 0x010: // bcctr, bclr + if (dis_branch(theInstr, abiinfo, &dres, + resteerOkFn, callback_opaque)) + goto decode_success; + goto decode_failure; + + /* Memory Synchronization Instructions */ + case 0x096: // isync + if (dis_memsync( theInstr )) goto decode_success; + goto decode_failure; + + default: + goto decode_failure; + } + break; + + + case 0x1F: + + /* For arith instns, bit10 is the OE flag (overflow enable) */ + + opc2 = IFIELD(theInstr, 1, 9); + switch (opc2) { + /* Integer Arithmetic Instructions */ + case 0x10A: case 0x00A: case 0x08A: // add, addc, adde + case 0x0EA: case 0x0CA: case 0x1EB: // addme, addze, divw + case 0x1CB: case 0x04B: case 0x00B: // divwu, mulhw, mulhwu + case 0x0EB: case 0x068: case 0x028: // mullw, neg, subf + case 0x008: case 0x088: case 0x0E8: // subfc, subfe, subfme + case 0x0C8: // subfze + if (dis_int_arith( theInstr )) goto decode_success; + goto decode_failure; + + /* 64bit Integer Arithmetic */ + case 0x009: case 0x049: case 0x0E9: // mulhdu, mulhd, mulld + case 0x1C9: case 0x1E9: // divdu, divd + if (!mode64) goto decode_failure; + if (dis_int_arith( theInstr )) goto decode_success; + goto decode_failure; + + default: + break; // Fall through... + } + + /* All remaining opcodes use full 10 bits. */ + + opc2 = IFIELD(theInstr, 1, 10); + switch (opc2) { + /* Integer Compare Instructions */ + case 0x000: case 0x020: // cmp, cmpl + if (dis_int_cmp( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Logical Instructions */ + case 0x01C: case 0x03C: case 0x01A: // and, andc, cntlzw + case 0x11C: case 0x3BA: case 0x39A: // eqv, extsb, extsh + case 0x1DC: case 0x07C: case 0x1BC: // nand, nor, or + case 0x19C: case 0x13C: // orc, xor + if (dis_int_logic( theInstr )) goto decode_success; + goto decode_failure; + + /* 64bit Integer Logical Instructions */ + case 0x3DA: case 0x03A: // extsw, cntlzd + if (!mode64) goto decode_failure; + if (dis_int_logic( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Shift Instructions */ + case 0x018: case 0x318: case 0x338: // slw, sraw, srawi + case 0x218: // srw + if (dis_int_shift( theInstr )) goto decode_success; + goto decode_failure; + + /* 64bit Integer Shift Instructions */ + case 0x01B: case 0x31A: // sld, srad + case 0x33A: case 0x33B: // sradi + case 0x21B: // srd + if (!mode64) goto decode_failure; + if (dis_int_shift( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Load Instructions */ + case 0x057: case 0x077: case 0x157: // lbzx, lbzux, lhax + case 0x177: case 0x117: case 0x137: // lhaux, lhzx, lhzux + case 0x017: case 0x037: // lwzx, lwzux + if (dis_int_load( theInstr )) goto decode_success; + goto decode_failure; + + /* 64bit Integer Load Instructions */ + case 0x035: case 0x015: // ldux, ldx + case 0x175: case 0x155: // lwaux, lwax + if (!mode64) goto decode_failure; + if (dis_int_load( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Store Instructions */ + case 0x0F7: case 0x0D7: case 0x1B7: // stbux, stbx, sthux + case 0x197: case 0x0B7: case 0x097: // sthx, stwux, stwx + if (dis_int_store( theInstr, abiinfo )) goto decode_success; + goto decode_failure; + + /* 64bit Integer Store Instructions */ + case 0x0B5: case 0x095: // stdux, stdx + if (!mode64) goto decode_failure; + if (dis_int_store( theInstr, abiinfo )) goto decode_success; + goto decode_failure; + + /* Integer Load and Store with Byte Reverse Instructions */ + case 0x316: case 0x216: case 0x396: // lhbrx, lwbrx, sthbrx + case 0x296: // stwbrx + if (dis_int_ldst_rev( theInstr )) goto decode_success; + goto decode_failure; + + /* Integer Load and Store String Instructions */ + case 0x255: case 0x215: case 0x2D5: // lswi, lswx, stswi + case 0x295: { // stswx + Bool stopHere = False; + Bool ok = dis_int_ldst_str( theInstr, &stopHere ); + if (!ok) goto decode_failure; + if (stopHere) { + irsb->next = mkSzImm(ty, nextInsnAddr()); + irsb->jumpkind = Ijk_Boring; + dres.whatNext = Dis_StopHere; + } + goto decode_success; + } + + /* Memory Synchronization Instructions */ + case 0x356: case 0x014: case 0x096: // eieio, lwarx, stwcx. + case 0x256: // sync + if (dis_memsync( theInstr )) goto decode_success; + goto decode_failure; + + /* 64bit Memory Synchronization Instructions */ + case 0x054: case 0x0D6: // ldarx, stdcx. + if (!mode64) goto decode_failure; + if (dis_memsync( theInstr )) goto decode_success; + goto decode_failure; + + /* Processor Control Instructions */ + case 0x200: case 0x013: case 0x153: // mcrxr, mfcr, mfspr + case 0x173: case 0x090: case 0x1D3: // mftb, mtcrf, mtspr + if (dis_proc_ctl( abiinfo, theInstr )) goto decode_success; + goto decode_failure; + + /* Cache Management Instructions */ + case 0x2F6: case 0x056: case 0x036: // dcba, dcbf, dcbst + case 0x116: case 0x0F6: case 0x3F6: // dcbt, dcbtst, dcbz + case 0x3D6: // icbi + if (dis_cache_manage( theInstr, &dres, archinfo )) + goto decode_success; + goto decode_failure; + + //zz /* External Control Instructions */ + //zz case 0x136: case 0x1B6: // eciwx, ecowx + //zz DIP("external control op => not implemented\n"); + //zz goto decode_failure; + + /* Trap Instructions */ + case 0x004: case 0x044: // tw, td + if (dis_trap(theInstr, &dres)) goto decode_success; + goto decode_failure; + + /* Floating Point Load Instructions */ + case 0x217: case 0x237: case 0x257: // lfsx, lfsux, lfdx + case 0x277: // lfdux + if (!allow_F) goto decode_noF; + if (dis_fp_load( theInstr )) goto decode_success; + goto decode_failure; + + /* Floating Point Store Instructions */ + case 0x297: case 0x2B7: case 0x2D7: // stfs, stfsu, stfd + case 0x2F7: // stfdu, stfiwx + if (!allow_F) goto decode_noF; + if (dis_fp_store( theInstr )) goto decode_success; + goto decode_failure; + case 0x3D7: // stfiwx + if (!allow_F) goto decode_noF; + if (!allow_GX) goto decode_noGX; + if (dis_fp_store( theInstr )) goto decode_success; + goto decode_failure; + + /* AltiVec instructions */ + + /* AV Cache Control - Data streams */ + case 0x156: case 0x176: case 0x336: // dst, dstst, dss + if (!allow_V) goto decode_noV; + if (dis_av_datastream( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Load */ + case 0x006: case 0x026: // lvsl, lvsr + case 0x007: case 0x027: case 0x047: // lvebx, lvehx, lvewx + case 0x067: case 0x167: // lvx, lvxl + if (!allow_V) goto decode_noV; + if (dis_av_load( abiinfo, theInstr )) goto decode_success; + goto decode_failure; + + /* AV Store */ + case 0x087: case 0x0A7: case 0x0C7: // stvebx, stvehx, stvewx + case 0x0E7: case 0x1E7: // stvx, stvxl + if (!allow_V) goto decode_noV; + if (dis_av_store( theInstr )) goto decode_success; + goto decode_failure; + + default: + /* Deal with some other cases that we would otherwise have + punted on. */ + /* --- ISEL (PowerISA_V2.05.pdf, p74) --- */ + /* only decode this insn when reserved bit 0 (31 in IBM's + notation) is zero */ + if (IFIELD(theInstr, 0, 6) == (15<<1)) { + UInt rT = ifieldRegDS( theInstr ); + UInt rA = ifieldRegA( theInstr ); + UInt rB = ifieldRegB( theInstr ); + UInt bi = ifieldRegC( theInstr ); + putIReg( + rT, + IRExpr_Mux0X( unop(Iop_32to8,getCRbit( bi )), + getIReg(rB), + rA == 0 ? (mode64 ? mkU64(0) : mkU32(0)) + : getIReg(rA) ) + ); + DIP("isel r%u,r%u,r%u,crb%u\n", rT,rA,rB,bi); + goto decode_success; + } + goto decode_failure; + } + break; + + + case 0x04: + /* AltiVec instructions */ + + opc2 = IFIELD(theInstr, 0, 6); + switch (opc2) { + /* AV Mult-Add, Mult-Sum */ + case 0x20: case 0x21: case 0x22: // vmhaddshs, vmhraddshs, vmladduhm + case 0x24: case 0x25: case 0x26: // vmsumubm, vmsummbm, vmsumuhm + case 0x27: case 0x28: case 0x29: // vmsumuhs, vmsumshm, vmsumshs + if (!allow_V) goto decode_noV; + if (dis_av_multarith( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Permutations */ + case 0x2A: // vsel + case 0x2B: // vperm + case 0x2C: // vsldoi + if (!allow_V) goto decode_noV; + if (dis_av_permute( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Floating Point Mult-Add/Sub */ + case 0x2E: case 0x2F: // vmaddfp, vnmsubfp + if (!allow_V) goto decode_noV; + if (dis_av_fp_arith( theInstr )) goto decode_success; + goto decode_failure; + + default: + break; // Fall through... + } + + opc2 = IFIELD(theInstr, 0, 11); + switch (opc2) { + /* AV Arithmetic */ + case 0x180: // vaddcuw + case 0x000: case 0x040: case 0x080: // vaddubm, vadduhm, vadduwm + case 0x200: case 0x240: case 0x280: // vaddubs, vadduhs, vadduws + case 0x300: case 0x340: case 0x380: // vaddsbs, vaddshs, vaddsws + case 0x580: // vsubcuw + case 0x400: case 0x440: case 0x480: // vsububm, vsubuhm, vsubuwm + case 0x600: case 0x640: case 0x680: // vsububs, vsubuhs, vsubuws + case 0x700: case 0x740: case 0x780: // vsubsbs, vsubshs, vsubsws + case 0x402: case 0x442: case 0x482: // vavgub, vavguh, vavguw + case 0x502: case 0x542: case 0x582: // vavgsb, vavgsh, vavgsw + case 0x002: case 0x042: case 0x082: // vmaxub, vmaxuh, vmaxuw + case 0x102: case 0x142: case 0x182: // vmaxsb, vmaxsh, vmaxsw + case 0x202: case 0x242: case 0x282: // vminub, vminuh, vminuw + case 0x302: case 0x342: case 0x382: // vminsb, vminsh, vminsw + case 0x008: case 0x048: // vmuloub, vmulouh + case 0x108: case 0x148: // vmulosb, vmulosh + case 0x208: case 0x248: // vmuleub, vmuleuh + case 0x308: case 0x348: // vmulesb, vmulesh + case 0x608: case 0x708: case 0x648: // vsum4ubs, vsum4sbs, vsum4shs + case 0x688: case 0x788: // vsum2sws, vsumsws + if (!allow_V) goto decode_noV; + if (dis_av_arith( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Rotate, Shift */ + case 0x004: case 0x044: case 0x084: // vrlb, vrlh, vrlw + case 0x104: case 0x144: case 0x184: // vslb, vslh, vslw + case 0x204: case 0x244: case 0x284: // vsrb, vsrh, vsrw + case 0x304: case 0x344: case 0x384: // vsrab, vsrah, vsraw + case 0x1C4: case 0x2C4: // vsl, vsr + case 0x40C: case 0x44C: // vslo, vsro + if (!allow_V) goto decode_noV; + if (dis_av_shift( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Logic */ + case 0x404: case 0x444: case 0x484: // vand, vandc, vor + case 0x4C4: case 0x504: // vxor, vnor + if (!allow_V) goto decode_noV; + if (dis_av_logic( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Processor Control */ + case 0x604: case 0x644: // mfvscr, mtvscr + if (!allow_V) goto decode_noV; + if (dis_av_procctl( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Floating Point Arithmetic */ + case 0x00A: case 0x04A: // vaddfp, vsubfp + case 0x10A: case 0x14A: case 0x18A: // vrefp, vrsqrtefp, vexptefp + case 0x1CA: // vlogefp + case 0x40A: case 0x44A: // vmaxfp, vminfp + if (!allow_V) goto decode_noV; + if (dis_av_fp_arith( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Floating Point Round/Convert */ + case 0x20A: case 0x24A: case 0x28A: // vrfin, vrfiz, vrfip + case 0x2CA: // vrfim + case 0x30A: case 0x34A: case 0x38A: // vcfux, vcfsx, vctuxs + case 0x3CA: // vctsxs + if (!allow_V) goto decode_noV; + if (dis_av_fp_convert( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Merge, Splat */ + case 0x00C: case 0x04C: case 0x08C: // vmrghb, vmrghh, vmrghw + case 0x10C: case 0x14C: case 0x18C: // vmrglb, vmrglh, vmrglw + case 0x20C: case 0x24C: case 0x28C: // vspltb, vsplth, vspltw + case 0x30C: case 0x34C: case 0x38C: // vspltisb, vspltish, vspltisw + if (!allow_V) goto decode_noV; + if (dis_av_permute( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Pack, Unpack */ + case 0x00E: case 0x04E: case 0x08E: // vpkuhum, vpkuwum, vpkuhus + case 0x0CE: // vpkuwus + case 0x10E: case 0x14E: case 0x18E: // vpkshus, vpkswus, vpkshss + case 0x1CE: // vpkswss + case 0x20E: case 0x24E: case 0x28E: // vupkhsb, vupkhsh, vupklsb + case 0x2CE: // vupklsh + case 0x30E: case 0x34E: case 0x3CE: // vpkpx, vupkhpx, vupklpx + if (!allow_V) goto decode_noV; + if (dis_av_pack( theInstr )) goto decode_success; + goto decode_failure; + + default: + break; // Fall through... + } + + opc2 = IFIELD(theInstr, 0, 10); + switch (opc2) { + + /* AV Compare */ + case 0x006: case 0x046: case 0x086: // vcmpequb, vcmpequh, vcmpequw + case 0x206: case 0x246: case 0x286: // vcmpgtub, vcmpgtuh, vcmpgtuw + case 0x306: case 0x346: case 0x386: // vcmpgtsb, vcmpgtsh, vcmpgtsw + if (!allow_V) goto decode_noV; + if (dis_av_cmp( theInstr )) goto decode_success; + goto decode_failure; + + /* AV Floating Point Compare */ + case 0x0C6: case 0x1C6: case 0x2C6: // vcmpeqfp, vcmpgefp, vcmpgtfp + case 0x3C6: // vcmpbfp + if (!allow_V) goto decode_noV; + if (dis_av_fp_cmp( theInstr )) goto decode_success; + goto decode_failure; + + default: + goto decode_failure; + } + break; + + default: + goto decode_failure; + + decode_noF: + vassert(!allow_F); + vex_printf("disInstr(ppc): declined to decode an FP insn.\n"); + goto decode_failure; + decode_noV: + vassert(!allow_V); + vex_printf("disInstr(ppc): declined to decode an AltiVec insn.\n"); + goto decode_failure; + decode_noFX: + vassert(!allow_FX); + vex_printf("disInstr(ppc): " + "declined to decode a GeneralPurpose-Optional insn.\n"); + goto decode_failure; + decode_noGX: + vassert(!allow_GX); + vex_printf("disInstr(ppc): " + "declined to decode a Graphics-Optional insn.\n"); + goto decode_failure; + + decode_failure: + /* All decode failures end up here. */ + opc2 = (theInstr) & 0x7FF; + vex_printf("disInstr(ppc): unhandled instruction: " + "0x%x\n", theInstr); + vex_printf(" primary %d(0x%x), secondary %u(0x%x)\n", + opc1, opc1, opc2, opc2); + + /* Tell the dispatcher that this insn cannot be decoded, and so has + not been executed, and (is currently) the next to be executed. + CIA should be up-to-date since it made so at the start of each + insn, but nevertheless be paranoid and update it again right + now. */ + putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) ); + irsb->next = mkSzImm(ty, guest_CIA_curr_instr); + irsb->jumpkind = Ijk_NoDecode; + dres.whatNext = Dis_StopHere; + dres.len = 0; + return dres; + + } /* switch (opc) for the main (primary) opcode switch. */ + + decode_success: + /* All decode successes end up here. */ + DIP("\n"); + + if (dres.len == 0) { + dres.len = 4; + } else { + vassert(dres.len == 20); + } + return dres; + } + + #undef DIP + #undef DIS + + + /*------------------------------------------------------------*/ + /*--- Top-level fn ---*/ + /*------------------------------------------------------------*/ + + /* Disassemble a single instruction into IR. The instruction + is located in host memory at &guest_code[delta]. */ + + DisResult disInstr_PPC ( IRSB* irsb_IN, + Bool put_IP, + Bool (*resteerOkFn) ( void*, Addr64 ), + void* callback_opaque, + UChar* guest_code_IN, + Long delta, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian_IN ) + { + IRType ty; + DisResult dres; + UInt mask32, mask64; + UInt hwcaps_guest = archinfo->hwcaps; + + vassert(guest_arch == VexArchPPC32 || guest_arch == VexArchPPC64); + + /* global -- ick */ + mode64 = guest_arch == VexArchPPC64; + ty = mode64 ? Ity_I64 : Ity_I32; + + /* do some sanity checks */ + mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V + | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX; + + mask64 = VEX_HWCAPS_PPC64_V + | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX; + + if (mode64) { + vassert((hwcaps_guest & mask32) == 0); + } else { + vassert((hwcaps_guest & mask64) == 0); + } + + /* Set globals (see top of this file) */ + guest_code = guest_code_IN; + irsb = irsb_IN; + host_is_bigendian = host_bigendian_IN; + + guest_CIA_curr_instr = mkSzAddr(ty, guest_IP); + guest_CIA_bbstart = mkSzAddr(ty, guest_IP - delta); + + dres = disInstr_PPC_WRK ( put_IP, resteerOkFn, callback_opaque, + delta, archinfo, abiinfo ); + + return dres; + } + + + /*------------------------------------------------------------*/ + /*--- Unused stuff ---*/ + /*------------------------------------------------------------*/ + + ///* A potentially more memcheck-friendly implementation of Clz32, with + // the boundary case Clz32(0) = 32, which is what ppc requires. */ + // + //static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg ) + //{ + // /* Welcome ... to SSA R Us. */ + // IRTemp n1 = newTemp(Ity_I32); + // IRTemp n2 = newTemp(Ity_I32); + // IRTemp n3 = newTemp(Ity_I32); + // IRTemp n4 = newTemp(Ity_I32); + // IRTemp n5 = newTemp(Ity_I32); + // IRTemp n6 = newTemp(Ity_I32); + // IRTemp n7 = newTemp(Ity_I32); + // IRTemp n8 = newTemp(Ity_I32); + // IRTemp n9 = newTemp(Ity_I32); + // IRTemp n10 = newTemp(Ity_I32); + // IRTemp n11 = newTemp(Ity_I32); + // IRTemp n12 = newTemp(Ity_I32); + // + // /* First, propagate the most significant 1-bit into all lower + // positions in the word. */ + // /* unsigned int clz ( unsigned int n ) + // { + // n |= (n >> 1); + // n |= (n >> 2); + // n |= (n >> 4); + // n |= (n >> 8); + // n |= (n >> 16); + // return bitcount(~n); + // } + // */ + // assign(n1, mkexpr(arg)); + // assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1)))); + // assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2)))); + // assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4)))); + // assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8)))); + // assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16)))); + // /* This gives a word of the form 0---01---1. Now invert it, giving + // a word of the form 1---10---0, then do a population-count idiom + // (to count the 1s, which is the number of leading zeroes, or 32 + // if the original word was 0. */ + // assign(n7, unop(Iop_Not32, mkexpr(n6))); + // + // /* unsigned int bitcount ( unsigned int n ) + // { + // n = n - ((n >> 1) & 0x55555555); + // n = (n & 0x33333333) + ((n >> 2) & 0x33333333); + // n = (n + (n >> 4)) & 0x0F0F0F0F; + // n = n + (n >> 8); + // n = (n + (n >> 16)) & 0x3F; + // return n; + // } + // */ + // assign(n8, + // binop(Iop_Sub32, + // mkexpr(n7), + // binop(Iop_And32, + // binop(Iop_Shr32, mkexpr(n7), mkU8(1)), + // mkU32(0x55555555)))); + // assign(n9, + // binop(Iop_Add32, + // binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)), + // binop(Iop_And32, + // binop(Iop_Shr32, mkexpr(n8), mkU8(2)), + // mkU32(0x33333333)))); + // assign(n10, + // binop(Iop_And32, + // binop(Iop_Add32, + // mkexpr(n9), + // binop(Iop_Shr32, mkexpr(n9), mkU8(4))), + // mkU32(0x0F0F0F0F))); + // assign(n11, + // binop(Iop_Add32, + // mkexpr(n10), + // binop(Iop_Shr32, mkexpr(n10), mkU8(8)))); + // assign(n12, + // binop(Iop_Add32, + // mkexpr(n11), + // binop(Iop_Shr32, mkexpr(n11), mkU8(16)))); + // return + // binop(Iop_And32, mkexpr(n12), mkU32(0x3F)); + //} + + /*--------------------------------------------------------------------*/ + /*--- end guest_ppc_toIR.c ---*/ + /*--------------------------------------------------------------------*/ Index: VEX/priv/guest_x86_defs.h =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_x86_defs.h Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,417 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_x86_defs.h) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* Only to be used within the guest-x86 directory. */ + + #ifndef __VEX_GUEST_X86_DEFS_H + #define __VEX_GUEST_X86_DEFS_H + + + /*---------------------------------------------------------*/ + /*--- x86 to IR conversion ---*/ + /*---------------------------------------------------------*/ + + /* Convert one x86 insn to IR. See the type DisOneInstrFn in + bb_to_IR.h. */ + extern + DisResult disInstr_X86 ( IRSB* irbb, + Bool put_IP, + Bool (*resteerOkFn) ( void*, Addr64 ), + void* callback_opaque, + UChar* guest_code, + Long delta, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian ); + + /* Used by the optimiser to specialise calls to helpers. */ + extern + IRExpr* guest_x86_spechelper ( HChar* function_name, + IRExpr** args ); + + /* Describes to the optimiser which part of the guest state require + precise memory exceptions. This is logically part of the guest + state description. */ + extern + Bool guest_x86_state_requires_precise_mem_exns ( Int, Int ); + + extern + VexGuestLayout x86guest_layout; + + + /*---------------------------------------------------------*/ + /*--- x86 guest helpers ---*/ + /*---------------------------------------------------------*/ + + /* --- CLEAN HELPERS --- */ + + extern UInt x86g_calculate_eflags_all ( + UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_ndep + ); + + __attribute((regparm(3))) + extern UInt x86g_calculate_eflags_c ( + UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_ndep + ); + + extern UInt x86g_calculate_condition ( + UInt/*X86Condcode*/ cond, + UInt cc_op, + UInt cc_dep1, UInt cc_dep2, UInt cc_ndep + ); + + extern UInt x86g_calculate_FXAM ( UInt tag, ULong dbl ); + + extern ULong x86g_calculate_RCR ( + UInt arg, UInt rot_amt, UInt eflags_in, UInt sz + ); + extern ULong x86g_calculate_RCL ( + UInt arg, UInt rot_amt, UInt eflags_in, UInt sz + ); + + extern UInt x86g_calculate_daa_das_aaa_aas ( UInt AX_and_flags, UInt opcode ); + + extern ULong x86g_check_fldcw ( UInt fpucw ); + + extern UInt x86g_create_fpucw ( UInt fpround ); + + extern ULong x86g_check_ldmxcsr ( UInt mxcsr ); + + extern UInt x86g_create_mxcsr ( UInt sseround ); + + + /* Translate a guest virtual_addr into a guest linear address by + consulting the supplied LDT/GDT structures. Their representation + must be as specified in pub/libvex_guest_x86.h. To indicate a + translation failure, 1<<32 is returned. On success, the lower 32 + bits of the returned result indicate the linear address. + */ + extern + ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, + UInt seg_selector, UInt virtual_addr ); + + extern ULong x86g_calculate_mmx_pmaddwd ( ULong, ULong ); + extern ULong x86g_calculate_mmx_psadbw ( ULong, ULong ); + extern UInt x86g_calculate_mmx_pmovmskb ( ULong ); + extern UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); + + + /* --- DIRTY HELPERS --- */ + + extern ULong x86g_dirtyhelper_loadF80le ( UInt ); + + extern void x86g_dirtyhelper_storeF80le ( UInt, ULong ); + + extern void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* ); + extern void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* ); + extern void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* ); + + extern void x86g_dirtyhelper_FINIT ( VexGuestX86State* ); + + extern void x86g_dirtyhelper_FXSAVE ( VexGuestX86State*, HWord ); + extern void x86g_dirtyhelper_FSAVE ( VexGuestX86State*, HWord ); + extern void x86g_dirtyhelper_FSTENV ( VexGuestX86State*, HWord ); + + extern ULong x86g_dirtyhelper_RDTSC ( void ); + + extern UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ ); + extern void x86g_dirtyhelper_OUT ( UInt portno, UInt data, + UInt sz/*1,2 or 4*/ ); + + extern VexEmWarn + x86g_dirtyhelper_FXRSTOR ( VexGuestX86State*, HWord ); + + extern VexEmWarn + x86g_dirtyhelper_FRSTOR ( VexGuestX86State*, HWord ); + + extern VexEmWarn + x86g_dirtyhelper_FLDENV ( VexGuestX86State*, HWord ); + + + /*---------------------------------------------------------*/ + /*--- Condition code stuff ---*/ + /*---------------------------------------------------------*/ + + /* eflags masks */ + #define X86G_CC_SHIFT_O 11 + #define X86G_CC_SHIFT_S 7 + #define X86G_CC_SHIFT_Z 6 + #define X86G_CC_SHIFT_A 4 + #define X86G_CC_SHIFT_C 0 + #define X86G_CC_SHIFT_P 2 + + #define X86G_CC_MASK_O (1 << X86G_CC_SHIFT_O) + #define X86G_CC_MASK_S (1 << X86G_CC_SHIFT_S) + #define X86G_CC_MASK_Z (1 << X86G_CC_SHIFT_Z) + #define X86G_CC_MASK_A (1 << X86G_CC_SHIFT_A) + #define X86G_CC_MASK_C (1 << X86G_CC_SHIFT_C) + #define X86G_CC_MASK_P (1 << X86G_CC_SHIFT_P) + + /* FPU flag masks */ + #define X86G_FC_SHIFT_C3 14 + #define X86G_FC_SHIFT_C2 10 + #define X86G_FC_SHIFT_C1 9 + #define X86G_FC_SHIFT_C0 8 + + #define X86G_FC_MASK_C3 (1 << X86G_FC_SHIFT_C3) + #define X86G_FC_MASK_C2 (1 << X86G_FC_SHIFT_C2) + #define X86G_FC_MASK_C1 (1 << X86G_FC_SHIFT_C1) + #define X86G_FC_MASK_C0 (1 << X86G_FC_SHIFT_C0) + + + /* %EFLAGS thunk descriptors. A four-word thunk is used to record + details of the most recent flag-setting operation, so the flags can + be computed later if needed. It is possible to do this a little + more efficiently using a 3-word thunk, but that makes it impossible + to describe the flag data dependencies sufficiently accurately for + Memcheck. Hence 4 words are used, with minimal loss of efficiency. + + The four words are: + + CC_OP, which describes the operation. + + CC_DEP1 and CC_DEP2. These are arguments to the operation. + We want Memcheck to believe that the resulting flags are + data-dependent on both CC_DEP1 and CC_DEP2, hence the + name DEP. + + CC_NDEP. This is a 3rd argument to the operation which is + sometimes needed. We arrange things so that Memcheck does + not believe the resulting flags are data-dependent on CC_NDEP + ("not dependent"). + + To make Memcheck believe that (the definedness of) the encoded + flags depends only on (the definedness of) CC_DEP1 and CC_DEP2 + requires two things: + + (1) In the guest state layout info (x86guest_layout), CC_OP and + CC_NDEP are marked as always defined. + + (2) When passing the thunk components to an evaluation function + (calculate_condition, calculate_eflags, calculate_eflags_c) the + IRCallee's mcx_mask must be set so as to exclude from + consideration all passed args except CC_DEP1 and CC_DEP2. + + Strictly speaking only (2) is necessary for correctness. However, + (1) helps efficiency in that since (2) means we never ask about the + definedness of CC_OP or CC_NDEP, we may as well not even bother to + track their definedness. + + When building the thunk, it is always necessary to write words into + CC_DEP1 and CC_DEP2, even if those args are not used given the + CC_OP field (eg, CC_DEP2 is not used if CC_OP is CC_LOGIC1/2/4). + This is important because otherwise Memcheck could give false + positives as it does not understand the relationship between the + CC_OP field and CC_DEP1 and CC_DEP2, and so believes that the + definedness of the stored flags always depends on both CC_DEP1 and + CC_DEP2. + + However, it is only necessary to set CC_NDEP when the CC_OP value + requires it, because Memcheck ignores CC_NDEP, and the evaluation + functions do understand the CC_OP fields and will only examine + CC_NDEP for suitable values of CC_OP. + + A summary of the field usages is: + + Operation DEP1 DEP2 NDEP + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + add/sub/mul first arg second arg unused + + adc/sbb first arg (second arg) + XOR old_carry old_carry + + and/or/xor result zero unused + + inc/dec result zero old_carry + + shl/shr/sar result subshifted- unused + result + + rol/ror result zero old_flags + + copy old_flags zero unused. + + + Therefore Memcheck will believe the following: + + * add/sub/mul -- definedness of result flags depends on definedness + of both args. + + * adc/sbb -- definedness of result flags depends on definedness of + both args and definedness of the old C flag. Because only two + DEP fields are available, the old C flag is XOR'd into the second + arg so that Memcheck sees the data dependency on it. That means + the NDEP field must contain a second copy of the old C flag + so that the evaluation functions can correctly recover the second + arg. + + * and/or/xor are straightforward -- definedness of result flags + depends on definedness of result value. + + * inc/dec -- definedness of result flags depends only on + definedness of result. This isn't really true -- it also depends + on the old C flag. However, we don't want Memcheck to see that, + and so the old C flag must be passed in NDEP and not in DEP2. + It's inconceivable that a compiler would generate code that puts + the C flag in an undefined state, then does an inc/dec, which + leaves C unchanged, and then makes a conditional jump/move based + on C. So our fiction seems a good approximation. + + * shl/shr/sar -- straightforward, again, definedness of result + flags depends on definedness of result value. The subshifted + value (value shifted one less) is also needed, but its + definedness is the same as the definedness of the shifted value. + + * rol/ror -- these only set O and C, and leave A Z C P alone. + However it seems prudent (as per inc/dec) to say the definedness + of all resulting flags depends on the definedness of the result, + hence the old flags must go in as NDEP and not DEP2. + + * rcl/rcr are too difficult to do in-line, and so are done by a + helper function. They are not part of this scheme. The helper + function takes the value to be rotated, the rotate amount and the + old flags, and returns the new flags and the rotated value. + Since the helper's mcx_mask does not have any set bits, Memcheck + will lazily propagate undefinedness from any of the 3 args into + both results (flags and actual value). + */ + enum { + X86G_CC_OP_COPY=0, /* DEP1 = current flags, DEP2 = 0, NDEP = unused */ + /* just copy DEP1 to output */ + + X86G_CC_OP_ADDB, /* 1 */ + X86G_CC_OP_ADDW, /* 2 DEP1 = argL, DEP2 = argR, NDEP = unused */ + X86G_CC_OP_ADDL, /* 3 */ + + X86G_CC_OP_SUBB, /* 4 */ + X86G_CC_OP_SUBW, /* 5 DEP1 = argL, DEP2 = argR, NDEP = unused */ + X86G_CC_OP_SUBL, /* 6 */ + + X86G_CC_OP_ADCB, /* 7 */ + X86G_CC_OP_ADCW, /* 8 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */ + X86G_CC_OP_ADCL, /* 9 */ + + X86G_CC_OP_SBBB, /* 10 */ + X86G_CC_OP_SBBW, /* 11 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */ + X86G_CC_OP_SBBL, /* 12 */ + + X86G_CC_OP_LOGICB, /* 13 */ + X86G_CC_OP_LOGICW, /* 14 DEP1 = result, DEP2 = 0, NDEP = unused */ + X86G_CC_OP_LOGICL, /* 15 */ + + X86G_CC_OP_INCB, /* 16 */ + X86G_CC_OP_INCW, /* 17 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */ + X86G_CC_OP_INCL, /* 18 */ + + X86G_CC_OP_DECB, /* 19 */ + X86G_CC_OP_DECW, /* 20 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */ + X86G_CC_OP_DECL, /* 21 */ + + X86G_CC_OP_SHLB, /* 22 DEP1 = res, DEP2 = res', NDEP = unused */ + X86G_CC_OP_SHLW, /* 23 where res' is like res but shifted one bit less */ + X86G_CC_OP_SHLL, /* 24 */ + + X86G_CC_OP_SHRB, /* 25 DEP1 = res, DEP2 = res', NDEP = unused */ + X86G_CC_OP_SHRW, /* 26 where res' is like res but shifted one bit less */ + X86G_CC_OP_SHRL, /* 27 */ + + X86G_CC_OP_ROLB, /* 28 */ + X86G_CC_OP_ROLW, /* 29 DEP1 = res, DEP2 = 0, NDEP = old flags */ + X86G_CC_OP_ROLL, /* 30 */ + + X86G_CC_OP_RORB, /* 31 */ + X86G_CC_OP_RORW, /* 32 DEP1 = res, DEP2 = 0, NDEP = old flags */ + X86G_CC_OP_RORL, /* 33 */ + + X86G_CC_OP_UMULB, /* 34 */ + X86G_CC_OP_UMULW, /* 35 DEP1 = argL, DEP2 = argR, NDEP = unused */ + X86G_CC_OP_UMULL, /* 36 */ + + X86G_CC_OP_SMULB, /* 37 */ + X86G_CC_OP_SMULW, /* 38 DEP1 = argL, DEP2 = argR, NDEP = unused */ + X86G_CC_OP_SMULL, /* 39 */ + + X86G_CC_OP_NUMBER + }; + + typedef + enum { + X86CondO = 0, /* overflow */ + X86CondNO = 1, /* no overflow */ + + X86CondB = 2, /* below */ + X86CondNB = 3, /* not below */ + + X86CondZ = 4, /* zero */ + X86CondNZ = 5, /* not zero */ + + X86CondBE = 6, /* below or equal */ + X86CondNBE = 7, /* not below or equal */ + + X86CondS = 8, /* negative */ + X86CondNS = 9, /* not negative */ + + X86CondP = 10, /* parity even */ + X86CondNP = 11, /* not parity even */ + + X86CondL = 12, /* jump less */ + X86CondNL = 13, /* not less */ + + X86CondLE = 14, /* less or equal */ + X86CondNLE = 15, /* not less or equal */ + + X86CondAlways = 16 /* HACK */ + } + X86Condcode; + + #endif /* ndef __VEX_GUEST_X86_DEFS_H */ + + /*---------------------------------------------------------------*/ + /*--- end guest_x86_defs.h ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_x86_helpers.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_x86_helpers.c Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,2746 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_x86_helpers.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + #include "libvex_basictypes.h" + #include "libvex_emwarn.h" + #include "libvex_guest_x86.h" + #include "libvex_ir.h" + #include "libvex.h" + + #include "main_util.h" + #include "guest_generic_bb_to_IR.h" + #include "guest_x86_defs.h" + #include "guest_generic_x87.h" + + + /* This file contains helper functions for x86 guest code. + Calls to these functions are generated by the back end. + These calls are of course in the host machine code and + this file will be compiled to host machine code, so that + all makes sense. + + Only change the signatures of these helper functions very + carefully. If you change the signature here, you'll have to change + the parameters passed to it in the IR calls constructed by + guest-x86/toIR.c. + + The convention used is that all functions called from generated + code are named x86g_, and any function whose name lacks + that prefix is not called from generated code. Note that some + LibVEX_* functions can however be called by VEX's client, but that + is not the same as calling them from VEX-generated code. + */ + + + /* Set to 1 to get detailed profiling info about use of the flag + machinery. */ + #define PROFILE_EFLAGS 0 + + + /*---------------------------------------------------------------*/ + /*--- %eflags run-time helpers. ---*/ + /*---------------------------------------------------------------*/ + + static const UChar parity_table[256] = { + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, + 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, + }; + + /* generalised left-shifter */ + inline static Int lshift ( Int x, Int n ) + { + if (n >= 0) + return x << n; + else + return x >> (-n); + } + + /* identity on ULong */ + static inline ULong idULong ( ULong x ) + { + return x; + } + + + #define PREAMBLE(__data_bits) \ + /* const */ UInt DATA_MASK \ + = __data_bits==8 ? 0xFF \ + : (__data_bits==16 ? 0xFFFF \ + : 0xFFFFFFFF); \ + /* const */ UInt SIGN_MASK = 1 << (__data_bits - 1); \ + /* const */ UInt CC_DEP1 = cc_dep1_formal; \ + /* const */ UInt CC_DEP2 = cc_dep2_formal; \ + /* const */ UInt CC_NDEP = cc_ndep_formal; \ + /* Four bogus assignments, which hopefully gcc can */ \ + /* optimise away, and which stop it complaining about */ \ + /* unused variables. */ \ + SIGN_MASK = SIGN_MASK; \ + DATA_MASK = DATA_MASK; \ + CC_DEP2 = CC_DEP2; \ + CC_NDEP = CC_NDEP; + + + /*-------------------------------------------------------------*/ + + #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + Int argL, argR, res; \ + argL = CC_DEP1; \ + argR = CC_DEP2; \ + res = argL + argR; \ + cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ + 12 - DATA_BITS) & X86G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + Int argL, argR, res; \ + argL = CC_DEP1; \ + argR = CC_DEP2; \ + res = argL - argR; \ + cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR) & (argL ^ res), \ + 12 - DATA_BITS) & X86G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + Int argL, argR, oldC, res; \ + oldC = CC_NDEP & X86G_CC_MASK_C; \ + argL = CC_DEP1; \ + argR = CC_DEP2 ^ oldC; \ + res = (argL + argR) + oldC; \ + if (oldC) \ + cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \ + else \ + cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR ^ -1) & (argL ^ res), \ + 12 - DATA_BITS) & X86G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + Int argL, argR, oldC, res; \ + oldC = CC_NDEP & X86G_CC_MASK_C; \ + argL = CC_DEP1; \ + argR = CC_DEP2 ^ oldC; \ + res = (argL - argR) - oldC; \ + if (oldC) \ + cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \ + else \ + cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = lshift((argL ^ argR) & (argL ^ res), \ + 12 - DATA_BITS) & X86G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + cf = 0; \ + pf = parity_table[(UChar)CC_DEP1]; \ + af = 0; \ + zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ + sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ + of = 0; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + Int argL, argR, res; \ + res = CC_DEP1; \ + argL = res - 1; \ + argR = 1; \ + cf = CC_NDEP & X86G_CC_MASK_C; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = ((res & DATA_MASK) == SIGN_MASK) << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + Int argL, argR, res; \ + res = CC_DEP1; \ + argL = res + 1; \ + argR = 1; \ + cf = CC_NDEP & X86G_CC_MASK_C; \ + pf = parity_table[(UChar)res]; \ + af = (res ^ argL ^ argR) & 0x10; \ + zf = ((DATA_UTYPE)res == 0) << 6; \ + sf = lshift(res, 8 - DATA_BITS) & 0x80; \ + of = ((res & DATA_MASK) \ + == ((UInt)SIGN_MASK - 1)) << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \ + pf = parity_table[(UChar)CC_DEP1]; \ + af = 0; /* undefined */ \ + zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ + sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ + /* of is defined if shift count == 1 */ \ + of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ + & X86G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + cf = CC_DEP2 & 1; \ + pf = parity_table[(UChar)CC_DEP1]; \ + af = 0; /* undefined */ \ + zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \ + sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \ + /* of is defined if shift count == 1 */ \ + of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \ + & X86G_CC_MASK_O; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */ + /* DEP1 = result, NDEP = old flags */ + #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int fl \ + = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \ + | (X86G_CC_MASK_C & CC_DEP1) \ + | (X86G_CC_MASK_O & (lshift(CC_DEP1, \ + 11-(DATA_BITS-1)) \ + ^ lshift(CC_DEP1, 11))); \ + return fl; \ + } \ + } + + /*-------------------------------------------------------------*/ + + /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */ + /* DEP1 = result, NDEP = old flags */ + #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int fl \ + = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \ + | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \ + | (X86G_CC_MASK_O & (lshift(CC_DEP1, \ + 11-(DATA_BITS-1)) \ + ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \ + return fl; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \ + DATA_U2TYPE, NARROWto2U) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + DATA_UTYPE hi; \ + DATA_UTYPE lo \ + = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \ + * ((DATA_UTYPE)CC_DEP2) ); \ + DATA_U2TYPE rr \ + = NARROWto2U( \ + ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \ + * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \ + hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \ + cf = (hi != 0); \ + pf = parity_table[(UChar)lo]; \ + af = 0; /* undefined */ \ + zf = (lo == 0) << 6; \ + sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ + of = cf << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + /*-------------------------------------------------------------*/ + + #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \ + DATA_S2TYPE, NARROWto2S) \ + { \ + PREAMBLE(DATA_BITS); \ + { Int cf, pf, af, zf, sf, of; \ + DATA_STYPE hi; \ + DATA_STYPE lo \ + = NARROWtoS( ((DATA_STYPE)CC_DEP1) \ + * ((DATA_STYPE)CC_DEP2) ); \ + DATA_S2TYPE rr \ + = NARROWto2S( \ + ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \ + * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \ + hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \ + cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \ + pf = parity_table[(UChar)lo]; \ + af = 0; /* undefined */ \ + zf = (lo == 0) << 6; \ + sf = lshift(lo, 8 - DATA_BITS) & 0x80; \ + of = cf << 11; \ + return cf | pf | af | zf | sf | of; \ + } \ + } + + + #if PROFILE_EFLAGS + + static Bool initted = False; + + /* C flag, fast route */ + static UInt tabc_fast[X86G_CC_OP_NUMBER]; + /* C flag, slow route */ + static UInt tabc_slow[X86G_CC_OP_NUMBER]; + /* table for calculate_cond */ + static UInt tab_cond[X86G_CC_OP_NUMBER][16]; + /* total entry counts for calc_all, calc_c, calc_cond. */ + static UInt n_calc_all = 0; + static UInt n_calc_c = 0; + static UInt n_calc_cond = 0; + + #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond))) + + + static void showCounts ( void ) + { + Int op, co; + Char ch; + vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n", + n_calc_all, n_calc_cond, n_calc_c); + + vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE" + " S NS P NP L NL LE NLE\n"); + vex_printf(" -----------------------------------------------------" + "----------------------------------------\n"); + for (op = 0; op < X86G_CC_OP_NUMBER; op++) { + + ch = ' '; + if (op > 0 && (op-1) % 3 == 0) + ch = 'B'; + if (op > 0 && (op-1) % 3 == 1) + ch = 'W'; + if (op > 0 && (op-1) % 3 == 2) + ch = 'L'; + + vex_printf("%2d%c: ", op, ch); + vex_printf("%6u ", tabc_slow[op]); + vex_printf("%6u ", tabc_fast[op]); + for (co = 0; co < 16; co++) { + Int n = tab_cond[op][co]; + if (n >= 1000) { + vex_printf(" %3dK", n / 1000); + } else + if (n >= 0) { + vex_printf(" %3d ", n ); + } else { + vex_printf(" "); + } + } + vex_printf("\n"); + } + vex_printf("\n"); + } + + static void initCounts ( void ) + { + Int op, co; + initted = True; + for (op = 0; op < X86G_CC_OP_NUMBER; op++) { + tabc_fast[op] = tabc_slow[op] = 0; + for (co = 0; co < 16; co++) + tab_cond[op][co] = 0; + } + } + + #endif /* PROFILE_EFLAGS */ + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate all the 6 flags from the supplied thunk parameters. + Worker function, not directly called from generated code. */ + static + UInt x86g_calculate_eflags_all_WRK ( UInt cc_op, + UInt cc_dep1_formal, + UInt cc_dep2_formal, + UInt cc_ndep_formal ) + { + switch (cc_op) { + case X86G_CC_OP_COPY: + return cc_dep1_formal + & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z + | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P); + + case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar ); + case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort ); + case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt ); + + case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar ); + case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort ); + case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt ); + + case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar ); + case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort ); + case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt ); + + case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar ); + case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort ); + case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt ); + + case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar ); + case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort ); + case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt ); + + case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar ); + case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort ); + case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt ); + + case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar ); + case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort ); + case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt ); + + case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar ); + case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort ); + case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt ); + + case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar ); + case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort ); + case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt ); + + case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar ); + case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort ); + case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt ); + + case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar ); + case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort ); + case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt ); + + case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar, + UShort, toUShort ); + case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort, + UInt, toUInt ); + case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt, + ULong, idULong ); + + case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar, + Short, toUShort ); + case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort, + Int, toUInt ); + case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt, + Long, idULong ); + + default: + /* shouldn't really make these calls from generated code */ + vex_printf("x86g_calculate_eflags_all_WRK(X86)" + "( %u, 0x%x, 0x%x, 0x%x )\n", + cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal ); + vpanic("x86g_calculate_eflags_all_WRK(X86)"); + } + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate all the 6 flags from the supplied thunk parameters. */ + UInt x86g_calculate_eflags_all ( UInt cc_op, + UInt cc_dep1, + UInt cc_dep2, + UInt cc_ndep ) + { + # if PROFILE_EFLAGS + if (!initted) initCounts(); + n_calc_all++; + if (SHOW_COUNTS_NOW) showCounts(); + # endif + return + x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep ); + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate just the carry flag from the supplied thunk parameters. */ + __attribute((regparm(3))) + UInt x86g_calculate_eflags_c ( UInt cc_op, + UInt cc_dep1, + UInt cc_dep2, + UInt cc_ndep ) + { + # if PROFILE_EFLAGS + if (!initted) initCounts(); + n_calc_c++; + tabc_fast[cc_op]++; + if (SHOW_COUNTS_NOW) showCounts(); + # endif + + /* Fast-case some common ones. */ + switch (cc_op) { + case X86G_CC_OP_LOGICL: + case X86G_CC_OP_LOGICW: + case X86G_CC_OP_LOGICB: + return 0; + case X86G_CC_OP_SUBL: + return ((UInt)cc_dep1) < ((UInt)cc_dep2) + ? X86G_CC_MASK_C : 0; + case X86G_CC_OP_SUBW: + return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF)) + ? X86G_CC_MASK_C : 0; + case X86G_CC_OP_SUBB: + return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF)) + ? X86G_CC_MASK_C : 0; + case X86G_CC_OP_INCL: + case X86G_CC_OP_DECL: + return cc_ndep & X86G_CC_MASK_C; + default: + break; + } + + # if PROFILE_EFLAGS + tabc_fast[cc_op]--; + tabc_slow[cc_op]++; + # endif + + return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep) + & X86G_CC_MASK_C; + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* returns 1 or 0 */ + UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond, + UInt cc_op, + UInt cc_dep1, + UInt cc_dep2, + UInt cc_ndep ) + { + UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1, + cc_dep2, cc_ndep); + UInt of,sf,zf,cf,pf; + UInt inv = cond & 1; + + # if PROFILE_EFLAGS + if (!initted) initCounts(); + tab_cond[cc_op][cond]++; + n_calc_cond++; + if (SHOW_COUNTS_NOW) showCounts(); + # endif + + switch (cond) { + case X86CondNO: + case X86CondO: /* OF == 1 */ + of = eflags >> X86G_CC_SHIFT_O; + return 1 & (inv ^ of); + + case X86CondNZ: + case X86CondZ: /* ZF == 1 */ + zf = eflags >> X86G_CC_SHIFT_Z; + return 1 & (inv ^ zf); + + case X86CondNB: + case X86CondB: /* CF == 1 */ + cf = eflags >> X86G_CC_SHIFT_C; + return 1 & (inv ^ cf); + break; + + case X86CondNBE: + case X86CondBE: /* (CF or ZF) == 1 */ + cf = eflags >> X86G_CC_SHIFT_C; + zf = eflags >> X86G_CC_SHIFT_Z; + return 1 & (inv ^ (cf | zf)); + break; + + case X86CondNS: + case X86CondS: /* SF == 1 */ + sf = eflags >> X86G_CC_SHIFT_S; + return 1 & (inv ^ sf); + + case X86CondNP: + case X86CondP: /* PF == 1 */ + pf = eflags >> X86G_CC_SHIFT_P; + return 1 & (inv ^ pf); + + case X86CondNL: + case X86CondL: /* (SF xor OF) == 1 */ + sf = eflags >> X86G_CC_SHIFT_S; + of = eflags >> X86G_CC_SHIFT_O; + return 1 & (inv ^ (sf ^ of)); + break; + + case X86CondNLE: + case X86CondLE: /* ((SF xor OF) or ZF) == 1 */ + sf = eflags >> X86G_CC_SHIFT_S; + of = eflags >> X86G_CC_SHIFT_O; + zf = eflags >> X86G_CC_SHIFT_Z; + return 1 & (inv ^ ((sf ^ of) | zf)); + break; + + default: + /* shouldn't really make these calls from generated code */ + vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n", + cond, cc_op, cc_dep1, cc_dep2, cc_ndep ); + vpanic("x86g_calculate_condition"); + } + } + + + /* VISIBLE TO LIBVEX CLIENT */ + UInt LibVEX_GuestX86_get_eflags ( /*IN*/VexGuestX86State* vex_state ) + { + UInt eflags = x86g_calculate_eflags_all_WRK( + vex_state->guest_CC_OP, + vex_state->guest_CC_DEP1, + vex_state->guest_CC_DEP2, + vex_state->guest_CC_NDEP + ); + UInt dflag = vex_state->guest_DFLAG; + vassert(dflag == 1 || dflag == 0xFFFFFFFF); + if (dflag == 0xFFFFFFFF) + eflags |= (1<<10); + if (vex_state->guest_IDFLAG == 1) + eflags |= (1<<21); + if (vex_state->guest_ACFLAG == 1) + eflags |= (1<<18); + + return eflags; + } + + /* VISIBLE TO LIBVEX CLIENT */ + void + LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag, + /*MOD*/VexGuestX86State* vex_state ) + { + UInt oszacp = x86g_calculate_eflags_all_WRK( + vex_state->guest_CC_OP, + vex_state->guest_CC_DEP1, + vex_state->guest_CC_DEP2, + vex_state->guest_CC_NDEP + ); + if (new_carry_flag & 1) { + oszacp |= X86G_CC_MASK_C; + } else { + oszacp &= ~X86G_CC_MASK_C; + } + vex_state->guest_CC_OP = X86G_CC_OP_COPY; + vex_state->guest_CC_DEP1 = oszacp; + vex_state->guest_CC_DEP2 = 0; + vex_state->guest_CC_NDEP = 0; + } + + + /*---------------------------------------------------------------*/ + /*--- %eflags translation-time function specialisers. ---*/ + /*--- These help iropt specialise calls the above run-time ---*/ + /*--- %eflags functions. ---*/ + /*---------------------------------------------------------------*/ + + /* Used by the optimiser to try specialisations. Returns an + equivalent expression, or NULL if none. */ + + static inline Bool isU32 ( IRExpr* e, UInt n ) + { + return + toBool( e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U32 + && e->Iex.Const.con->Ico.U32 == n ); + } + + IRExpr* guest_x86_spechelper ( HChar* function_name, + IRExpr** args ) + { + # define unop(_op,_a1) IRExpr_Unop((_op),(_a1)) + # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2)) + # define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) + # define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) + + Int i, arity = 0; + for (i = 0; args[i]; i++) + arity++; + # if 0 + vex_printf("spec request:\n"); + vex_printf(" %s ", function_name); + for (i = 0; i < arity; i++) { + vex_printf(" "); + ppIRExpr(args[i]); + } + vex_printf("\n"); + # endif + + /* --------- specialising "x86g_calculate_condition" --------- */ + + if (vex_streq(function_name, "x86g_calculate_condition")) { + /* specialise calls to above "calculate condition" function */ + IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2; + vassert(arity == 5); + cond = args[0]; + cc_op = args[1]; + cc_dep1 = args[2]; + cc_dep2 = args[3]; + + /*---------------- ADDL ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) { + /* long add, then Z --> test (dst+src == 0) */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ32, + binop(Iop_Add32, cc_dep1, cc_dep2), + mkU32(0))); + } + + /*---------------- SUBL ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) { + /* long sub/cmp, then Z --> test dst==src */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ32, cc_dep1, cc_dep2)); + } + if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) { + /* long sub/cmp, then NZ --> test dst!=src */ + return unop(Iop_1Uto32, + binop(Iop_CmpNE32, cc_dep1, cc_dep2)); + } + + if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) { + /* long sub/cmp, then L (signed less than) + --> test dst test !(dst test dst <=s src */ + return unop(Iop_1Uto32, + binop(Iop_CmpLE32S, cc_dep1, cc_dep2)); + } + if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) { + /* long sub/cmp, then NLE (signed not less than or equal) + --> test dst >s src + --> test !(dst <=s src) */ + return binop(Iop_Xor32, + unop(Iop_1Uto32, + binop(Iop_CmpLE32S, cc_dep1, cc_dep2)), + mkU32(1)); + } + + if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) { + /* long sub/cmp, then BE (unsigned less than or equal) + --> test dst <=u src */ + return unop(Iop_1Uto32, + binop(Iop_CmpLE32U, cc_dep1, cc_dep2)); + } + if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) { + /* long sub/cmp, then BE (unsigned greater than) + --> test !(dst <=u src) */ + return binop(Iop_Xor32, + unop(Iop_1Uto32, + binop(Iop_CmpLE32U, cc_dep1, cc_dep2)), + mkU32(1)); + } + + if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) { + /* long sub/cmp, then B (unsigned less than) + --> test dst test !(dst test (dst-src test !(dst-src test dst==src */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ16, + unop(Iop_32to16,cc_dep1), + unop(Iop_32to16,cc_dep2))); + } + if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) { + /* word sub/cmp, then NZ --> test dst!=src */ + return unop(Iop_1Uto32, + binop(Iop_CmpNE16, + unop(Iop_32to16,cc_dep1), + unop(Iop_32to16,cc_dep2))); + } + + /*---------------- SUBB ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) { + /* byte sub/cmp, then Z --> test dst==src */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ8, + unop(Iop_32to8,cc_dep1), + unop(Iop_32to8,cc_dep2))); + } + if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) { + /* byte sub/cmp, then NZ --> test dst!=src */ + return unop(Iop_1Uto32, + binop(Iop_CmpNE8, + unop(Iop_32to8,cc_dep1), + unop(Iop_32to8,cc_dep2))); + } + + if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) { + /* byte sub/cmp, then NBE (unsigned greater than) + --> test src test (dst-0 test dst (UInt)dst[7] + This is yet another scheme by which gcc figures out if the + top bit of a byte is 1 or 0. See also LOGICB/CondS below. */ + /* Note: isU32(cc_dep2, 0) is correct, even though this is + for an 8-bit comparison, since the args to the helper + function are always U32s. */ + return binop(Iop_And32, + binop(Iop_Shr32,cc_dep1,mkU8(7)), + mkU32(1)); + } + if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS) + && isU32(cc_dep2, 0)) { + /* byte sub/cmp of zero, then NS --> test !(dst-0 test !(dst (UInt) !dst[7] + */ + return binop(Iop_Xor32, + binop(Iop_And32, + binop(Iop_Shr32,cc_dep1,mkU8(7)), + mkU32(1)), + mkU32(1)); + } + + /*---------------- LOGICL ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) { + /* long and/or/xor, then Z --> test dst==0 */ + return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); + } + if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) { + /* long and/or/xor, then NZ --> test dst!=0 */ + return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0))); + } + + if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) { + /* long and/or/xor, then LE + This is pretty subtle. LOGIC sets SF and ZF according to the + result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but + OF is zero, so this reduces to SZ | ZF -- which will be 1 iff + the result is <=signed 0. Hence ... + */ + return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0))); + } + + if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) { + /* long and/or/xor, then BE + LOGIC sets ZF according to the result and makes CF be zero. + BE computes (CF | ZF), but CF is zero, so this reduces ZF + -- which will be 1 iff the result is zero. Hence ... + */ + return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); + } + + if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) { + /* see comment below for (LOGICB, CondS) */ + /* long and/or/xor, then S --> (UInt)result[31] */ + return binop(Iop_And32, + binop(Iop_Shr32,cc_dep1,mkU8(31)), + mkU32(1)); + } + if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) { + /* see comment below for (LOGICB, CondNS) */ + /* long and/or/xor, then S --> (UInt) ~ result[31] */ + return binop(Iop_Xor32, + binop(Iop_And32, + binop(Iop_Shr32,cc_dep1,mkU8(31)), + mkU32(1)), + mkU32(1)); + } + + /*---------------- LOGICW ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) { + /* word and/or/xor, then Z --> test dst==0 */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)), + mkU32(0))); + } + + if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) { + /* see comment below for (LOGICB, CondS) */ + /* word and/or/xor, then S --> (UInt)result[15] */ + return binop(Iop_And32, + binop(Iop_Shr32,cc_dep1,mkU8(15)), + mkU32(1)); + } + + /*---------------- LOGICB ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) { + /* byte and/or/xor, then Z --> test dst==0 */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)), + mkU32(0))); + } + if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) { + /* byte and/or/xor, then Z --> test dst!=0 */ + /* b9ac9: 84 c0 test %al,%al + b9acb: 75 0d jne b9ada */ + return unop(Iop_1Uto32, + binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)), + mkU32(0))); + } + + if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) { + /* this is an idiom gcc sometimes uses to find out if the top + bit of a byte register is set: eg testb %al,%al; js .. + Since it just depends on the top bit of the byte, extract + that bit and explicitly get rid of all the rest. This + helps memcheck avoid false positives in the case where any + of the other bits in the byte are undefined. */ + /* byte and/or/xor, then S --> (UInt)result[7] */ + return binop(Iop_And32, + binop(Iop_Shr32,cc_dep1,mkU8(7)), + mkU32(1)); + } + if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) { + /* ditto, for negation-of-S. */ + /* byte and/or/xor, then S --> (UInt) ~ result[7] */ + return binop(Iop_Xor32, + binop(Iop_And32, + binop(Iop_Shr32,cc_dep1,mkU8(7)), + mkU32(1)), + mkU32(1)); + } + + /*---------------- DECL ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) { + /* dec L, then Z --> test dst == 0 */ + return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); + } + + if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) { + /* dec L, then S --> compare DST test dst == 0 */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ32, + binop(Iop_Shl32,cc_dep1,mkU8(16)), + mkU32(0))); + } + + /*---------------- INCW ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) { + /* This rewrite helps memcheck on 'incw %ax ; je ...'. */ + /* inc W, then Z --> test dst == 0 */ + return unop(Iop_1Uto32, + binop(Iop_CmpEQ32, + binop(Iop_Shl32,cc_dep1,mkU8(16)), + mkU32(0))); + } + + /*---------------- SHRL ----------------*/ + + if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) { + /* SHRL, then Z --> test dep1 == 0 */ + return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0))); + } + + /*---------------- COPY ----------------*/ + /* This can happen, as a result of x87 FP compares: "fcom ... ; + fnstsw %ax ; sahf ; jbe" for example. */ + + if (isU32(cc_op, X86G_CC_OP_COPY) && + (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) { + /* COPY, then BE --> extract C and Z from dep1, and test + (C or Z) == 1. */ + /* COPY, then NBE --> extract C and Z from dep1, and test + (C or Z) == 0. */ + UInt nnn = isU32(cond, X86CondBE) ? 1 : 0; + return + unop( + Iop_1Uto32, + binop( + Iop_CmpEQ32, + binop( + Iop_And32, + binop( + Iop_Or32, + binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), + binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)) + ), + mkU32(1) + ), + mkU32(nnn) + ) + ); + } + + if (isU32(cc_op, X86G_CC_OP_COPY) + && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) { + /* COPY, then B --> extract C from dep1, and test (C == 1). */ + /* COPY, then NB --> extract C from dep1, and test (C == 0). */ + UInt nnn = isU32(cond, X86CondB) ? 1 : 0; + return + unop( + Iop_1Uto32, + binop( + Iop_CmpEQ32, + binop( + Iop_And32, + binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), + mkU32(1) + ), + mkU32(nnn) + ) + ); + } + + if (isU32(cc_op, X86G_CC_OP_COPY) + && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) { + /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */ + /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */ + UInt nnn = isU32(cond, X86CondZ) ? 1 : 0; + return + unop( + Iop_1Uto32, + binop( + Iop_CmpEQ32, + binop( + Iop_And32, + binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)), + mkU32(1) + ), + mkU32(nnn) + ) + ); + } + + if (isU32(cc_op, X86G_CC_OP_COPY) + && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) { + /* COPY, then P --> extract P from dep1, and test (P == 1). */ + /* COPY, then NP --> extract P from dep1, and test (P == 0). */ + UInt nnn = isU32(cond, X86CondP) ? 1 : 0; + return + unop( + Iop_1Uto32, + binop( + Iop_CmpEQ32, + binop( + Iop_And32, + binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)), + mkU32(1) + ), + mkU32(nnn) + ) + ); + } + + return NULL; + } + + /* --------- specialising "x86g_calculate_eflags_c" --------- */ + + if (vex_streq(function_name, "x86g_calculate_eflags_c")) { + /* specialise calls to above "calculate_eflags_c" function */ + IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep; + vassert(arity == 4); + cc_op = args[0]; + cc_dep1 = args[1]; + cc_dep2 = args[2]; + cc_ndep = args[3]; + + if (isU32(cc_op, X86G_CC_OP_SUBL)) { + /* C after sub denotes unsigned less than */ + return unop(Iop_1Uto32, + binop(Iop_CmpLT32U, cc_dep1, cc_dep2)); + } + if (isU32(cc_op, X86G_CC_OP_SUBB)) { + /* C after sub denotes unsigned less than */ + return unop(Iop_1Uto32, + binop(Iop_CmpLT32U, + binop(Iop_And32,cc_dep1,mkU32(0xFF)), + binop(Iop_And32,cc_dep2,mkU32(0xFF)))); + } + if (isU32(cc_op, X86G_CC_OP_LOGICL) + || isU32(cc_op, X86G_CC_OP_LOGICW) + || isU32(cc_op, X86G_CC_OP_LOGICB)) { + /* cflag after logic is zero */ + return mkU32(0); + } + if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) { + /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */ + return cc_ndep; + } + if (isU32(cc_op, X86G_CC_OP_COPY)) { + /* cflag after COPY is stored in DEP1. */ + return + binop( + Iop_And32, + binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)), + mkU32(1) + ); + } + if (isU32(cc_op, X86G_CC_OP_ADDL)) { + /* C after add denotes sum tag == Iex_Const) { + vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n"); + } + # endif + + return NULL; + } + + /* --------- specialising "x86g_calculate_eflags_all" --------- */ + + if (vex_streq(function_name, "x86g_calculate_eflags_all")) { + /* specialise calls to above "calculate_eflags_all" function */ + IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */ + vassert(arity == 4); + cc_op = args[0]; + cc_dep1 = args[1]; + /* cc_dep2 = args[2]; */ + /* cc_ndep = args[3]; */ + + if (isU32(cc_op, X86G_CC_OP_COPY)) { + /* eflags after COPY are stored in DEP1. */ + return + binop( + Iop_And32, + cc_dep1, + mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z + | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P) + ); + } + return NULL; + } + + # undef unop + # undef binop + # undef mkU32 + # undef mkU8 + + return NULL; + } + + + /*---------------------------------------------------------------*/ + /*--- Supporting functions for x87 FPU activities. ---*/ + /*---------------------------------------------------------------*/ + + static inline Bool host_is_little_endian ( void ) + { + UInt x = 0x76543210; + UChar* p = (UChar*)(&x); + return toBool(*p == 0x10); + } + + /* 80 and 64-bit floating point formats: + + 80-bit: + + S 0 0-------0 zero + S 0 0X------X denormals + S 1-7FFE 1X------X normals (all normals have leading 1) + S 7FFF 10------0 infinity + S 7FFF 10X-----X snan + S 7FFF 11X-----X qnan + + S is the sign bit. For runs X----X, at least one of the Xs must be + nonzero. Exponent is 15 bits, fractional part is 63 bits, and + there is an explicitly represented leading 1, and a sign bit, + giving 80 in total. + + 64-bit avoids the confusion of an explicitly represented leading 1 + and so is simpler: + + S 0 0------0 zero + S 0 X------X denormals + S 1-7FE any normals + S 7FF 0------0 infinity + S 7FF 0X-----X snan + S 7FF 1X-----X qnan + + Exponent is 11 bits, fractional part is 52 bits, and there is a + sign bit, giving 64 in total. + */ + + /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */ + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + UInt x86g_calculate_FXAM ( UInt tag, ULong dbl ) + { + Bool mantissaIsZero; + Int bexp; + UChar sign; + UChar* f64; + + vassert(host_is_little_endian()); + + /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */ + + f64 = (UChar*)(&dbl); + sign = toUChar( (f64[7] >> 7) & 1 ); + + /* First off, if the tag indicates the register was empty, + return 1,0,sign,1 */ + if (tag == 0) { + /* vex_printf("Empty\n"); */ + return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1) + | X86G_FC_MASK_C0; + } + + bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); + bexp &= 0x7FF; + + mantissaIsZero + = toBool( + (f64[6] & 0x0F) == 0 + && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0 + ); + + /* If both exponent and mantissa are zero, the value is zero. + Return 1,0,sign,0. */ + if (bexp == 0 && mantissaIsZero) { + /* vex_printf("Zero\n"); */ + return X86G_FC_MASK_C3 | 0 + | (sign << X86G_FC_SHIFT_C1) | 0; + } + + /* If exponent is zero but mantissa isn't, it's a denormal. + Return 1,1,sign,0. */ + if (bexp == 0 && !mantissaIsZero) { + /* vex_printf("Denormal\n"); */ + return X86G_FC_MASK_C3 | X86G_FC_MASK_C2 + | (sign << X86G_FC_SHIFT_C1) | 0; + } + + /* If the exponent is 7FF and the mantissa is zero, this is an infinity. + Return 0,1,sign,1. */ + if (bexp == 0x7FF && mantissaIsZero) { + /* vex_printf("Inf\n"); */ + return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) + | X86G_FC_MASK_C0; + } + + /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN. + Return 0,0,sign,1. */ + if (bexp == 0x7FF && !mantissaIsZero) { + /* vex_printf("NaN\n"); */ + return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0; + } + + /* Uh, ok, we give up. It must be a normal finite number. + Return 0,1,sign,0. + */ + /* vex_printf("normal\n"); */ + return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0; + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest memory) */ + ULong x86g_dirtyhelper_loadF80le ( UInt addrU ) + { + ULong f64; + convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 ); + return f64; + } + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (writes guest memory) */ + void x86g_dirtyhelper_storeF80le ( UInt addrU, ULong f64 ) + { + convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) ); + } + + + /*----------------------------------------------*/ + /*--- The exported fns .. ---*/ + /*----------------------------------------------*/ + + /* Layout of the real x87 state. */ + /* 13 June 05: Fpu_State and auxiliary constants was moved to + g_generic_x87.h */ + + + /* CLEAN HELPER */ + /* fpucw[15:0] contains a x87 native format FPU control word. + Extract from it the required FPROUND value and any resulting + emulation warning, and return (warn << 32) | fpround value. + */ + ULong x86g_check_fldcw ( UInt fpucw ) + { + /* Decide on a rounding mode. fpucw[11:10] holds it. */ + /* NOTE, encoded exactly as per enum IRRoundingMode. */ + UInt rmode = (fpucw >> 10) & 3; + + /* Detect any required emulation warnings. */ + VexEmWarn ew = EmWarn_NONE; + + if ((fpucw & 0x3F) != 0x3F) { + /* unmasked exceptions! */ + ew = EmWarn_X86_x87exns; + } + else + if (((fpucw >> 8) & 3) != 3) { + /* unsupported precision */ + ew = EmWarn_X86_x87precision; + } + + return (((ULong)ew) << 32) | ((ULong)rmode); + } + + /* CLEAN HELPER */ + /* Given fpround as an IRRoundingMode value, create a suitable x87 + native format FPU control word. */ + UInt x86g_create_fpucw ( UInt fpround ) + { + fpround &= 3; + return 0x037F | (fpround << 10); + } + + + /* CLEAN HELPER */ + /* mxcsr[15:0] contains a SSE native format MXCSR value. + Extract from it the required SSEROUND value and any resulting + emulation warning, and return (warn << 32) | sseround value. + */ + ULong x86g_check_ldmxcsr ( UInt mxcsr ) + { + /* Decide on a rounding mode. mxcsr[14:13] holds it. */ + /* NOTE, encoded exactly as per enum IRRoundingMode. */ + UInt rmode = (mxcsr >> 13) & 3; + + /* Detect any required emulation warnings. */ + VexEmWarn ew = EmWarn_NONE; + + if ((mxcsr & 0x1F80) != 0x1F80) { + /* unmasked exceptions! */ + ew = EmWarn_X86_sseExns; + } + else + if (mxcsr & (1<<15)) { + /* FZ is set */ + ew = EmWarn_X86_fz; + } + else + if (mxcsr & (1<<6)) { + /* DAZ is set */ + ew = EmWarn_X86_daz; + } + + return (((ULong)ew) << 32) | ((ULong)rmode); + } + + + /* CLEAN HELPER */ + /* Given sseround as an IRRoundingMode value, create a suitable SSE + native format MXCSR value. */ + UInt x86g_create_mxcsr ( UInt sseround ) + { + sseround &= 3; + return 0x1F80 | (sseround << 13); + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (writes guest state) */ + /* Initialise the x87 FPU state as per 'finit'. */ + void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst ) + { + Int i; + gst->guest_FTOP = 0; + for (i = 0; i < 8; i++) { + gst->guest_FPTAG[i] = 0; /* empty */ + gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */ + } + gst->guest_FPROUND = (UInt)Irrm_NEAREST; + gst->guest_FC3210 = 0; + } + + + /* This is used to implement both 'frstor' and 'fldenv'. The latter + appears to differ from the former only in that the 8 FP registers + themselves are not transferred into the guest state. */ + static + VexEmWarn do_put_x87 ( Bool moveRegs, + /*IN*/UChar* x87_state, + /*OUT*/VexGuestX86State* vex_state ) + { + Int stno, preg; + UInt tag; + ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); + UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); + Fpu_State* x87 = (Fpu_State*)x87_state; + UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; + UInt tagw = x87->env[FP_ENV_TAG]; + UInt fpucw = x87->env[FP_ENV_CTRL]; + UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; + VexEmWarn ew; + UInt fpround; + ULong pair; + + /* Copy registers and tags */ + for (stno = 0; stno < 8; stno++) { + preg = (stno + ftop) & 7; + tag = (tagw >> (2*preg)) & 3; + if (tag == 3) { + /* register is empty */ + /* hmm, if it's empty, does it still get written? Probably + safer to say it does. If we don't, memcheck could get out + of sync, in that it thinks all FP registers are defined by + this helper, but in reality some have not been updated. */ + if (moveRegs) + vexRegs[preg] = 0; /* IEEE754 64-bit zero */ + vexTags[preg] = 0; + } else { + /* register is non-empty */ + if (moveRegs) + convert_f80le_to_f64le( &x87->reg[10*stno], + (UChar*)&vexRegs[preg] ); + vexTags[preg] = 1; + } + } + + /* stack pointer */ + vex_state->guest_FTOP = ftop; + + /* status word */ + vex_state->guest_FC3210 = c3210; + + /* handle the control word, setting FPROUND and detecting any + emulation warnings. */ + pair = x86g_check_fldcw ( (UInt)fpucw ); + fpround = (UInt)pair; + ew = (VexEmWarn)(pair >> 32); + + vex_state->guest_FPROUND = fpround & 3; + + /* emulation warnings --> caller */ + return ew; + } + + + /* Create an x87 FPU state from the guest state, as close as + we can approximate it. */ + static + void do_get_x87 ( /*IN*/VexGuestX86State* vex_state, + /*OUT*/UChar* x87_state ) + { + Int i, stno, preg; + UInt tagw; + ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); + UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); + Fpu_State* x87 = (Fpu_State*)x87_state; + UInt ftop = vex_state->guest_FTOP; + UInt c3210 = vex_state->guest_FC3210; + + for (i = 0; i < 14; i++) + x87->env[i] = 0; + + x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; + x87->env[FP_ENV_STAT] + = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); + x87->env[FP_ENV_CTRL] + = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND )); + + /* Dump the register stack in ST order. */ + tagw = 0; + for (stno = 0; stno < 8; stno++) { + preg = (stno + ftop) & 7; + if (vexTags[preg] == 0) { + /* register is empty */ + tagw |= (3 << (2*preg)); + convert_f64le_to_f80le( (UChar*)&vexRegs[preg], + &x87->reg[10*stno] ); + } else { + /* register is full. */ + tagw |= (0 << (2*preg)); + convert_f64le_to_f80le( (UChar*)&vexRegs[preg], + &x87->reg[10*stno] ); + } + } + x87->env[FP_ENV_TAG] = toUShort(tagw); + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest state, writes guest mem) */ + void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr ) + { + /* Somewhat roundabout, but at least it's simple. */ + Fpu_State tmp; + UShort* addrS = (UShort*)addr; + UChar* addrC = (UChar*)addr; + U128* xmm = (U128*)(addr + 160); + UInt mxcsr; + UShort fp_tags; + UInt summary_tags; + Int r, stno; + UShort *srcS, *dstS; + + do_get_x87( gst, (UChar*)&tmp ); + mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND ); + + /* Now build the proper fxsave image from the x87 image we just + made. */ + + addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ + addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ + + /* set addrS[2] in an endian-independent way */ + summary_tags = 0; + fp_tags = tmp.env[FP_ENV_TAG]; + for (r = 0; r < 8; r++) { + if ( ((fp_tags >> (2*r)) & 3) != 3 ) + summary_tags |= (1 << r); + } + addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ + addrC[5] = 0; /* pad */ + + addrS[3] = 0; /* FOP: fpu opcode (bogus) */ + addrS[4] = 0; + addrS[5] = 0; /* FPU IP (bogus) */ + addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we + could conceivably dump %CS here) */ + + addrS[7] = 0; /* Intel reserved */ + + addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */ + addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */ + addrS[10] = 0; /* segment selector for above operand pointer; %DS + perhaps? */ + addrS[11] = 0; /* Intel reserved */ + + addrS[12] = toUShort(mxcsr); /* MXCSR */ + addrS[13] = toUShort(mxcsr >> 16); + + addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */ + addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */ + + /* Copy in the FP registers, in ST order. */ + for (stno = 0; stno < 8; stno++) { + srcS = (UShort*)(&tmp.reg[10*stno]); + dstS = (UShort*)(&addrS[16 + 8*stno]); + dstS[0] = srcS[0]; + dstS[1] = srcS[1]; + dstS[2] = srcS[2]; + dstS[3] = srcS[3]; + dstS[4] = srcS[4]; + dstS[5] = 0; + dstS[6] = 0; + dstS[7] = 0; + } + + /* That's the first 160 bytes of the image done. Now only %xmm0 + .. %xmm7 remain to be copied. If the host is big-endian, these + need to be byte-swapped. */ + vassert(host_is_little_endian()); + + # define COPY_U128(_dst,_src) \ + do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ + _dst[2] = _src[2]; _dst[3] = _src[3]; } \ + while (0) + + COPY_U128( xmm[0], gst->guest_XMM0 ); + COPY_U128( xmm[1], gst->guest_XMM1 ); + COPY_U128( xmm[2], gst->guest_XMM2 ); + COPY_U128( xmm[3], gst->guest_XMM3 ); + COPY_U128( xmm[4], gst->guest_XMM4 ); + COPY_U128( xmm[5], gst->guest_XMM5 ); + COPY_U128( xmm[6], gst->guest_XMM6 ); + COPY_U128( xmm[7], gst->guest_XMM7 ); + + # undef COPY_U128 + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (writes guest state, reads guest mem) */ + VexEmWarn x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr ) + { + Fpu_State tmp; + VexEmWarn warnX87 = EmWarn_NONE; + VexEmWarn warnXMM = EmWarn_NONE; + UShort* addrS = (UShort*)addr; + UChar* addrC = (UChar*)addr; + U128* xmm = (U128*)(addr + 160); + UShort fp_tags; + Int r, stno, i; + + /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need + to be byte-swapped. */ + vassert(host_is_little_endian()); + + # define COPY_U128(_dst,_src) \ + do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ + _dst[2] = _src[2]; _dst[3] = _src[3]; } \ + while (0) + + COPY_U128( gst->guest_XMM0, xmm[0] ); + COPY_U128( gst->guest_XMM1, xmm[1] ); + COPY_U128( gst->guest_XMM2, xmm[2] ); + COPY_U128( gst->guest_XMM3, xmm[3] ); + COPY_U128( gst->guest_XMM4, xmm[4] ); + COPY_U128( gst->guest_XMM5, xmm[5] ); + COPY_U128( gst->guest_XMM6, xmm[6] ); + COPY_U128( gst->guest_XMM7, xmm[7] ); + + # undef COPY_U128 + + /* Copy the x87 registers out of the image, into a temporary + Fpu_State struct. */ + for (i = 0; i < 14; i++) tmp.env[i] = 0; + for (i = 0; i < 80; i++) tmp.reg[i] = 0; + /* fill in tmp.reg[0..7] */ + for (stno = 0; stno < 8; stno++) { + UShort* dstS = (UShort*)(&tmp.reg[10*stno]); + UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); + dstS[0] = srcS[0]; + dstS[1] = srcS[1]; + dstS[2] = srcS[2]; + dstS[3] = srcS[3]; + dstS[4] = srcS[4]; + } + /* fill in tmp.env[0..13] */ + tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ + tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ + + fp_tags = 0; + for (r = 0; r < 8; r++) { + if (addrC[4] & (1<> 32); + + gst->guest_SSEROUND = (UInt)w64; + } + + /* Prefer an X87 emwarn over an XMM one, if both exist. */ + if (warnX87 != EmWarn_NONE) + return warnX87; + else + return warnXMM; + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest state, writes guest mem) */ + void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr ) + { + do_get_x87( gst, (UChar*)addr ); + } + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (writes guest state, reads guest mem) */ + VexEmWarn x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr ) + { + return do_put_x87( True/*regs too*/, (UChar*)addr, gst ); + } + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (reads guest state, writes guest mem) */ + void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr ) + { + /* Somewhat roundabout, but at least it's simple. */ + Int i; + UShort* addrP = (UShort*)addr; + Fpu_State tmp; + do_get_x87( gst, (UChar*)&tmp ); + for (i = 0; i < 14; i++) + addrP[i] = tmp.env[i]; + } + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (writes guest state, reads guest mem) */ + VexEmWarn x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr ) + { + return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst); + } + + + /*---------------------------------------------------------------*/ + /*--- Misc integer helpers, including rotates and CPUID. ---*/ + /*---------------------------------------------------------------*/ + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate both flags and value result for rotate right + through the carry bit. Result in low 32 bits, + new flags (OSZACP) in high 32 bits. + */ + ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz ) + { + UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf; + + switch (sz) { + case 4: + cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; + of = ((arg >> 31) ^ cf) & 1; + while (tempCOUNT > 0) { + tempcf = arg & 1; + arg = (arg >> 1) | (cf << 31); + cf = tempcf; + tempCOUNT--; + } + break; + case 2: + while (tempCOUNT >= 17) tempCOUNT -= 17; + cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; + of = ((arg >> 15) ^ cf) & 1; + while (tempCOUNT > 0) { + tempcf = arg & 1; + arg = ((arg >> 1) & 0x7FFF) | (cf << 15); + cf = tempcf; + tempCOUNT--; + } + break; + case 1: + while (tempCOUNT >= 9) tempCOUNT -= 9; + cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; + of = ((arg >> 7) ^ cf) & 1; + while (tempCOUNT > 0) { + tempcf = arg & 1; + arg = ((arg >> 1) & 0x7F) | (cf << 7); + cf = tempcf; + tempCOUNT--; + } + break; + default: + vpanic("calculate_RCR: invalid size"); + } + + cf &= 1; + of &= 1; + eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O); + eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O); + + return (((ULong)eflags_in) << 32) | ((ULong)arg); + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate both flags and value result for rotate left + through the carry bit. Result in low 32 bits, + new flags (OSZACP) in high 32 bits. + */ + ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz ) + { + UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf; + + switch (sz) { + case 4: + cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; + while (tempCOUNT > 0) { + tempcf = (arg >> 31) & 1; + arg = (arg << 1) | (cf & 1); + cf = tempcf; + tempCOUNT--; + } + of = ((arg >> 31) ^ cf) & 1; + break; + case 2: + while (tempCOUNT >= 17) tempCOUNT -= 17; + cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; + while (tempCOUNT > 0) { + tempcf = (arg >> 15) & 1; + arg = 0xFFFF & ((arg << 1) | (cf & 1)); + cf = tempcf; + tempCOUNT--; + } + of = ((arg >> 15) ^ cf) & 1; + break; + case 1: + while (tempCOUNT >= 9) tempCOUNT -= 9; + cf = (eflags_in >> X86G_CC_SHIFT_C) & 1; + while (tempCOUNT > 0) { + tempcf = (arg >> 7) & 1; + arg = 0xFF & ((arg << 1) | (cf & 1)); + cf = tempcf; + tempCOUNT--; + } + of = ((arg >> 7) ^ cf) & 1; + break; + default: + vpanic("calculate_RCL: invalid size"); + } + + cf &= 1; + of &= 1; + eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O); + eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O); + + return (((ULong)eflags_in) << 32) | ((ULong)arg); + } + + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + /* Calculate both flags and value result for DAA/DAS/AAA/AAS. + AX value in low half of arg, OSZACP in upper half. + See guest-x86/toIR.c usage point for details. + */ + static UInt calc_parity_8bit ( UInt w32 ) { + UInt i; + UInt p = 1; + for (i = 0; i < 8; i++) + p ^= (1 & (w32 >> i)); + return p; + } + UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode ) + { + UInt r_AL = (flags_and_AX >> 0) & 0xFF; + UInt r_AH = (flags_and_AX >> 8) & 0xFF; + UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1; + UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1; + UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1; + UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1; + UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1; + UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1; + UInt result = 0; + + switch (opcode) { + case 0x27: { /* DAA */ + UInt old_AL = r_AL; + UInt old_C = r_C; + r_C = 0; + if ((r_AL & 0xF) > 9 || r_A == 1) { + r_AL = r_AL + 6; + r_C = old_C; + if (r_AL >= 0x100) r_C = 1; + r_A = 1; + } else { + r_A = 0; + } + if (old_AL > 0x99 || old_C == 1) { + r_AL = r_AL + 0x60; + r_C = 1; + } else { + r_C = 0; + } + /* O is undefined. S Z and P are set according to the + result. */ + r_AL &= 0xFF; + r_O = 0; /* let's say */ + r_S = (r_AL & 0x80) ? 1 : 0; + r_Z = (r_AL == 0) ? 1 : 0; + r_P = calc_parity_8bit( r_AL ); + break; + } + case 0x2F: { /* DAS */ + UInt old_AL = r_AL; + UInt old_C = r_C; + r_C = 0; + if ((r_AL & 0xF) > 9 || r_A == 1) { + Bool borrow = r_AL < 6; + r_AL = r_AL - 6; + r_C = old_C; + if (borrow) r_C = 1; + r_A = 1; + } else { + r_A = 0; + } + if (old_AL > 0x99 || old_C == 1) { + r_AL = r_AL - 0x60; + r_C = 1; + } else { + /* Intel docs are wrong: r_C = 0; */ + } + /* O is undefined. S Z and P are set according to the + result. */ + r_AL &= 0xFF; + r_O = 0; /* let's say */ + r_S = (r_AL & 0x80) ? 1 : 0; + r_Z = (r_AL == 0) ? 1 : 0; + r_P = calc_parity_8bit( r_AL ); + break; + } + case 0x37: { /* AAA */ + Bool nudge = r_AL > 0xF9; + if ((r_AL & 0xF) > 9 || r_A == 1) { + r_AL = r_AL + 6; + r_AH = r_AH + 1 + (nudge ? 1 : 0); + r_A = 1; + r_C = 1; + r_AL = r_AL & 0xF; + } else { + r_A = 0; + r_C = 0; + r_AL = r_AL & 0xF; + } + /* O S Z and P are undefined. */ + r_O = r_S = r_Z = r_P = 0; /* let's say */ + break; + } + case 0x3F: { /* AAS */ + Bool nudge = r_AL < 0x06; + if ((r_AL & 0xF) > 9 || r_A == 1) { + r_AL = r_AL - 6; + r_AH = r_AH - 1 - (nudge ? 1 : 0); + r_A = 1; + r_C = 1; + r_AL = r_AL & 0xF; + } else { + r_A = 0; + r_C = 0; + r_AL = r_AL & 0xF; + } + /* O S Z and P are undefined. */ + r_O = r_S = r_Z = r_P = 0; /* let's say */ + break; + } + default: + vassert(0); + } + result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) ) + | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) ) + | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) ) + | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) ) + | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) ) + | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) ) + | ( (r_AH & 0xFF) << 8 ) + | ( (r_AL & 0xFF) << 0 ); + return result; + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially-transparent) */ + /* Horrible hack. On non-x86 platforms, return 1. */ + ULong x86g_dirtyhelper_RDTSC ( void ) + { + # if defined(__i386__) + ULong res; + __asm__ __volatile__("rdtsc" : "=A" (res)); + return res; + # else + return 1ULL; + # endif + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (modifies guest state) */ + /* Claim to be a P55C (Intel Pentium/MMX) */ + void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st ) + { + switch (st->guest_EAX) { + case 0: + st->guest_EAX = 0x1; + st->guest_EBX = 0x756e6547; + st->guest_ECX = 0x6c65746e; + st->guest_EDX = 0x49656e69; + break; + default: + st->guest_EAX = 0x543; + st->guest_EBX = 0x0; + st->guest_ECX = 0x0; + st->guest_EDX = 0x8001bf; + break; + } + } + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (modifies guest state) */ + /* Claim to be the following SSE1-capable CPU: + vendor_id : GenuineIntel + cpu family : 6 + model : 11 + model name : Intel(R) Pentium(R) III CPU family 1133MHz + stepping : 1 + cpu MHz : 1131.013 + cache size : 512 KB + */ + void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st ) + { + switch (st->guest_EAX) { + case 0: + st->guest_EAX = 0x00000002; + st->guest_EBX = 0x756e6547; + st->guest_ECX = 0x6c65746e; + st->guest_EDX = 0x49656e69; + break; + case 1: + st->guest_EAX = 0x000006b1; + st->guest_EBX = 0x00000004; + st->guest_ECX = 0x00000000; + st->guest_EDX = 0x0383fbff; + break; + default: + st->guest_EAX = 0x03020101; + st->guest_EBX = 0x00000000; + st->guest_ECX = 0x00000000; + st->guest_EDX = 0x0c040883; + break; + } + } + + /* Claim to be the following SSSE3-capable CPU (2 x ...): + vendor_id : GenuineIntel + cpu family : 6 + model : 15 + model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz + stepping : 6 + cpu MHz : 2394.000 + cache size : 4096 KB + physical id : 0 + siblings : 2 + core id : 0 + cpu cores : 2 + fpu : yes + fpu_exception : yes + cpuid level : 10 + wp : yes + flags : fpu vme de pse tsc msr pae mce cx8 apic sep + mtrr pge mca cmov pat pse36 clflush dts acpi + mmx fxsr sse sse2 ss ht tm syscall nx lm + constant_tsc pni monitor ds_cpl vmx est tm2 + cx16 xtpr lahf_lm + bogomips : 4798.78 + clflush size : 64 + cache_alignment : 64 + address sizes : 36 bits physical, 48 bits virtual + power management: + */ + void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st ) + { + # define SET_ABCD(_a,_b,_c,_d) \ + do { st->guest_EAX = (UInt)(_a); \ + st->guest_EBX = (UInt)(_b); \ + st->guest_ECX = (UInt)(_c); \ + st->guest_EDX = (UInt)(_d); \ + } while (0) + + switch (st->guest_EAX) { + case 0x00000000: + SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); + break; + case 0x00000001: + SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); + break; + case 0x00000002: + SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); + break; + case 0x00000003: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000004: { + switch (st->guest_ECX) { + case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f, + 0x0000003f, 0x00000001); break; + case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f, + 0x0000003f, 0x00000001); break; + case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f, + 0x00000fff, 0x00000001); break; + default: SET_ABCD(0x00000000, 0x00000000, + 0x00000000, 0x00000000); break; + } + break; + } + case 0x00000005: + SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); + break; + case 0x00000006: + SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); + break; + case 0x00000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000008: + SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000009: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x0000000a: + unhandled_eax_value: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000000: + SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000001: + SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000); + break; + case 0x80000002: + SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); + break; + case 0x80000003: + SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); + break; + case 0x80000004: + SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); + break; + case 0x80000005: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000006: + SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); + break; + case 0x80000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000008: + SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); + break; + default: + goto unhandled_eax_value; + } + # undef SET_ABCD + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially-transparent) */ + /* Horrible hack. On non-x86 platforms, return 0. */ + UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ ) + { + # if defined(__i386__) + UInt r = 0; + portno &= 0xFFFF; + switch (sz) { + case 4: + __asm__ __volatile__("movl $0,%%eax; inl %w1,%0" + : "=a" (r) : "Nd" (portno)); + break; + case 2: + __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0" + : "=a" (r) : "Nd" (portno)); + break; + case 1: + __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0" + : "=a" (r) : "Nd" (portno)); + break; + default: + break; + } + return r; + # else + return 0; + # endif + } + + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (non-referentially-transparent) */ + /* Horrible hack. On non-x86 platforms, do nothing. */ + void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ ) + { + # if defined(__i386__) + portno &= 0xFFFF; + switch (sz) { + case 4: + __asm__ __volatile__("outl %0, %w1" + : : "a" (data), "Nd" (portno)); + break; + case 2: + __asm__ __volatile__("outw %w0, %w1" + : : "a" (data), "Nd" (portno)); + break; + case 1: + __asm__ __volatile__("outb %b0, %w1" + : : "a" (data), "Nd" (portno)); + break; + default: + break; + } + # else + /* do nothing */ + # endif + } + + + /*---------------------------------------------------------------*/ + /*--- Helpers for MMX/SSE/SSE2. ---*/ + /*---------------------------------------------------------------*/ + + static inline UChar abdU8 ( UChar xx, UChar yy ) { + return toUChar(xx>yy ? xx-yy : yy-xx); + } + + static inline ULong mk32x2 ( UInt w1, UInt w0 ) { + return (((ULong)w1) << 32) | ((ULong)w0); + } + + static inline UShort sel16x4_3 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUShort(hi32 >> 16); + } + static inline UShort sel16x4_2 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUShort(hi32); + } + static inline UShort sel16x4_1 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUShort(lo32 >> 16); + } + static inline UShort sel16x4_0 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUShort(lo32); + } + + static inline UChar sel8x8_7 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 24); + } + static inline UChar sel8x8_6 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 16); + } + static inline UChar sel8x8_5 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 8); + } + static inline UChar sel8x8_4 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUChar(hi32 >> 0); + } + static inline UChar sel8x8_3 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 24); + } + static inline UChar sel8x8_2 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 16); + } + static inline UChar sel8x8_1 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 8); + } + static inline UChar sel8x8_0 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUChar(lo32 >> 0); + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) + { + return + mk32x2( + (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy))) + + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))), + (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy))) + + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))) + ); + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + UInt x86g_calculate_mmx_pmovmskb ( ULong xx ) + { + UInt r = 0; + if (xx & (1ULL << (64-1))) r |= (1<<7); + if (xx & (1ULL << (56-1))) r |= (1<<6); + if (xx & (1ULL << (48-1))) r |= (1<<5); + if (xx & (1ULL << (40-1))) r |= (1<<4); + if (xx & (1ULL << (32-1))) r |= (1<<3); + if (xx & (1ULL << (24-1))) r |= (1<<2); + if (xx & (1ULL << (16-1))) r |= (1<<1); + if (xx & (1ULL << ( 8-1))) r |= (1<<0); + return r; + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy ) + { + UInt t = 0; + t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) ); + t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) ); + t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) ); + t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) ); + t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) ); + t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) ); + t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) ); + t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) ); + t &= 0xFFFF; + return (ULong)t; + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) + { + UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi ); + UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo ); + return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); + } + + + /*---------------------------------------------------------------*/ + /*--- Helpers for dealing with segment overrides. ---*/ + /*---------------------------------------------------------------*/ + + static inline + UInt get_segdescr_base ( VexGuestX86SegDescr* ent ) + { + UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow; + UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid; + UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi; + return (hi << 24) | (mid << 16) | lo; + } + + static inline + UInt get_segdescr_limit ( VexGuestX86SegDescr* ent ) + { + UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow; + UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi; + UInt limit = (hi << 16) | lo; + if (ent->LdtEnt.Bits.Granularity) + limit = (limit << 12) | 0xFFF; + return limit; + } + + /* CALLED FROM GENERATED CODE: CLEAN HELPER */ + ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, + UInt seg_selector, UInt virtual_addr ) + { + UInt tiBit, base, limit; + VexGuestX86SegDescr* the_descrs; + + Bool verboze = False; + + /* If this isn't true, we're in Big Trouble. */ + vassert(8 == sizeof(VexGuestX86SegDescr)); + + if (verboze) + vex_printf("x86h_use_seg_selector: " + "seg_selector = 0x%x, vaddr = 0x%x\n", + seg_selector, virtual_addr); + + /* Check for wildly invalid selector. */ + if (seg_selector & ~0xFFFF) + goto bad; + + seg_selector &= 0x0000FFFF; + + /* Sanity check the segment selector. Ensure that RPL=11b (least + privilege). This forms the bottom 2 bits of the selector. */ + if ((seg_selector & 3) != 3) + goto bad; + + /* Extract the TI bit (0 means GDT, 1 means LDT) */ + tiBit = (seg_selector >> 2) & 1; + + /* Convert the segment selector onto a table index */ + seg_selector >>= 3; + vassert(seg_selector >= 0 && seg_selector < 8192); + + if (tiBit == 0) { + + /* GDT access. */ + /* Do we actually have a GDT to look at? */ + if (gdt == 0) + goto bad; + + /* Check for access to non-existent entry. */ + if (seg_selector >= VEX_GUEST_X86_GDT_NENT) + goto bad; + + the_descrs = (VexGuestX86SegDescr*)gdt; + base = get_segdescr_base (&the_descrs[seg_selector]); + limit = get_segdescr_limit(&the_descrs[seg_selector]); + + } else { + + /* All the same stuff, except for the LDT. */ + if (ldt == 0) + goto bad; + + if (seg_selector >= VEX_GUEST_X86_LDT_NENT) + goto bad; + + the_descrs = (VexGuestX86SegDescr*)ldt; + base = get_segdescr_base (&the_descrs[seg_selector]); + limit = get_segdescr_limit(&the_descrs[seg_selector]); + + } + + /* Do the limit check. Note, this check is just slightly too + slack. Really it should be "if (virtual_addr + size - 1 >= + limit)," but we don't have the size info to hand. Getting it + could be significantly complex. */ + if (virtual_addr >= limit) + goto bad; + + if (verboze) + vex_printf("x86h_use_seg_selector: " + "base = 0x%x, addr = 0x%x\n", + base, base + virtual_addr); + + /* High 32 bits are zero, indicating success. */ + return (ULong)( ((UInt)virtual_addr) + base ); + + bad: + return 1ULL << 32; + } + + + /*---------------------------------------------------------------*/ + /*--- Helpers for dealing with, and describing, ---*/ + /*--- guest state as a whole. ---*/ + /*---------------------------------------------------------------*/ + + /* Initialise the entire x86 guest state. */ + /* VISIBLE TO LIBVEX CLIENT */ + void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state ) + { + vex_state->guest_EAX = 0; + vex_state->guest_ECX = 0; + vex_state->guest_EDX = 0; + vex_state->guest_EBX = 0; + vex_state->guest_ESP = 0; + vex_state->guest_EBP = 0; + vex_state->guest_ESI = 0; + vex_state->guest_EDI = 0; + + vex_state->guest_CC_OP = X86G_CC_OP_COPY; + vex_state->guest_CC_DEP1 = 0; + vex_state->guest_CC_DEP2 = 0; + vex_state->guest_CC_NDEP = 0; + vex_state->guest_DFLAG = 1; /* forwards */ + vex_state->guest_IDFLAG = 0; + vex_state->guest_ACFLAG = 0; + + vex_state->guest_EIP = 0; + + /* Initialise the simulated FPU */ + x86g_dirtyhelper_FINIT( vex_state ); + + /* Initialse the SSE state. */ + # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0; + + vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST; + SSEZERO(vex_state->guest_XMM0); + SSEZERO(vex_state->guest_XMM1); + SSEZERO(vex_state->guest_XMM2); + SSEZERO(vex_state->guest_XMM3); + SSEZERO(vex_state->guest_XMM4); + SSEZERO(vex_state->guest_XMM5); + SSEZERO(vex_state->guest_XMM6); + SSEZERO(vex_state->guest_XMM7); + + # undef SSEZERO + + vex_state->guest_CS = 0; + vex_state->guest_DS = 0; + vex_state->guest_ES = 0; + vex_state->guest_FS = 0; + vex_state->guest_GS = 0; + vex_state->guest_SS = 0; + vex_state->guest_LDT = 0; + vex_state->guest_GDT = 0; + + vex_state->guest_EMWARN = EmWarn_NONE; + + /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */ + vex_state->guest_TISTART = 0; + vex_state->guest_TILEN = 0; + + vex_state->guest_NRADDR = 0; + vex_state->guest_SC_CLASS = 0; + vex_state->guest_IP_AT_SYSCALL = 0; + + vex_state->padding1 = 0; + vex_state->padding2 = 0; + vex_state->padding3 = 0; + } + + + /* Figure out if any part of the guest state contained in minoff + .. maxoff requires precise memory exceptions. If in doubt return + True (but this is generates significantly slower code). + + By default we enforce precise exns for guest %ESP, %EBP and %EIP + only. These are the minimum needed to extract correct stack + backtraces from x86 code. + */ + Bool guest_x86_state_requires_precise_mem_exns ( Int minoff, + Int maxoff) + { + Int ebp_min = offsetof(VexGuestX86State, guest_EBP); + Int ebp_max = ebp_min + 4 - 1; + Int esp_min = offsetof(VexGuestX86State, guest_ESP); + Int esp_max = esp_min + 4 - 1; + Int eip_min = offsetof(VexGuestX86State, guest_EIP); + Int eip_max = eip_min + 4 - 1; + + if (maxoff < ebp_min || minoff > ebp_max) { + /* no overlap with ebp */ + } else { + return True; + } + + if (maxoff < esp_min || minoff > esp_max) { + /* no overlap with esp */ + } else { + return True; + } + + if (maxoff < eip_min || minoff > eip_max) { + /* no overlap with eip */ + } else { + return True; + } + + return False; + } + + + #define ALWAYSDEFD(field) \ + { offsetof(VexGuestX86State, field), \ + (sizeof ((VexGuestX86State*)0)->field) } + + VexGuestLayout + x86guest_layout + = { + /* Total size of the guest state, in bytes. */ + .total_sizeB = sizeof(VexGuestX86State), + + /* Describe the stack pointer. */ + .offset_SP = offsetof(VexGuestX86State,guest_ESP), + .sizeof_SP = 4, + + /* Describe the frame pointer. */ + .offset_FP = offsetof(VexGuestX86State,guest_EBP), + .sizeof_FP = 4, + + /* Describe the instruction pointer. */ + .offset_IP = offsetof(VexGuestX86State,guest_EIP), + .sizeof_IP = 4, + + /* Describe any sections to be regarded by Memcheck as + 'always-defined'. */ + .n_alwaysDefd = 24, + + /* flags thunk: OP and NDEP are always defd, whereas DEP1 + and DEP2 have to be tracked. See detailed comment in + gdefs.h on meaning of thunk fields. */ + .alwaysDefd + = { /* 0 */ ALWAYSDEFD(guest_CC_OP), + /* 1 */ ALWAYSDEFD(guest_CC_NDEP), + /* 2 */ ALWAYSDEFD(guest_DFLAG), + /* 3 */ ALWAYSDEFD(guest_IDFLAG), + /* 4 */ ALWAYSDEFD(guest_ACFLAG), + /* 5 */ ALWAYSDEFD(guest_EIP), + /* 6 */ ALWAYSDEFD(guest_FTOP), + /* 7 */ ALWAYSDEFD(guest_FPTAG), + /* 8 */ ALWAYSDEFD(guest_FPROUND), + /* 9 */ ALWAYSDEFD(guest_FC3210), + /* 10 */ ALWAYSDEFD(guest_CS), + /* 11 */ ALWAYSDEFD(guest_DS), + /* 12 */ ALWAYSDEFD(guest_ES), + /* 13 */ ALWAYSDEFD(guest_FS), + /* 14 */ ALWAYSDEFD(guest_GS), + /* 15 */ ALWAYSDEFD(guest_SS), + /* 16 */ ALWAYSDEFD(guest_LDT), + /* 17 */ ALWAYSDEFD(guest_GDT), + /* 18 */ ALWAYSDEFD(guest_EMWARN), + /* 19 */ ALWAYSDEFD(guest_SSEROUND), + /* 20 */ ALWAYSDEFD(guest_TISTART), + /* 21 */ ALWAYSDEFD(guest_TILEN), + /* 22 */ ALWAYSDEFD(guest_SC_CLASS), + /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL) + } + }; + + + /*---------------------------------------------------------------*/ + /*--- end guest_x86_helpers.c ---*/ + /*---------------------------------------------------------------*/ Index: VEX/priv/guest_x86_toIR.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/guest_x86_toIR.c Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,14811 ---- + + /*--------------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (guest_x86_toIR.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*--------------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + /* Translates x86 code to IR. */ + + /* TODO: + + All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked + to ensure a 32-bit value is being written. + + FUCOMI(P): what happens to A and S flags? Currently are forced + to zero. + + x87 FP Limitations: + + * all arithmetic done at 64 bits + + * no FP exceptions, except for handling stack over/underflow + + * FP rounding mode observed only for float->int conversions + and int->float conversions which could lose accuracy, and + for float-to-float rounding. For all other operations, + round-to-nearest is used, regardless. + + * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the + simulation claims the argument is in-range (-2^63 <= arg <= 2^63) + even when it isn't. + + * some of the FCOM cases could do with testing -- not convinced + that the args are the right way round. + + * FSAVE does not re-initialise the FPU; it should do + + * FINIT not only initialises the FPU environment, it also + zeroes all the FP registers. It should leave the registers + unchanged. + + SAHF should cause eflags[1] == 1, and in fact it produces 0. As + per Intel docs this bit has no meaning anyway. Since PUSHF is the + only way to observe eflags[1], a proper fix would be to make that + bit be set by PUSHF. + + The state of %eflags.AC (alignment check, bit 18) is recorded by + the simulation (viz, if you set it with popf then a pushf produces + the value you set it to), but it is otherwise ignored. In + particular, setting it to 1 does NOT cause alignment checking to + happen. Programs that set it to 1 and then rely on the resulting + SIGBUSs to inform them of misaligned accesses will not work. + + Implementation of sysenter is necessarily partial. sysenter is a + kind of system call entry. When doing a sysenter, the return + address is not known -- that is something that is beyond Vex's + knowledge. So the generated IR forces a return to the scheduler, + which can do what it likes to simulate the systenter, but it MUST + set this thread's guest_EIP field with the continuation address + before resuming execution. If that doesn't happen, the thread will + jump to address zero, which is probably fatal. + + This module uses global variables and so is not MT-safe (if that + should ever become relevant). + + The delta values are 32-bit ints, not 64-bit ints. That means + this module may not work right if run on a 64-bit host. That should + be fixed properly, really -- if anyone ever wants to use Vex to + translate x86 code for execution on a 64-bit host. + + casLE (implementation of lock-prefixed insns) and rep-prefixed + insns: the side-exit back to the start of the insn is done with + Ijk_Boring. This is quite wrong, it should be done with + Ijk_NoRedir, since otherwise the side exit, which is intended to + restart the instruction for whatever reason, could go somewhere + entirely else. Doing it right (with Ijk_NoRedir jumps) would make + no-redir jumps performance critical, at least for rep-prefixed + instructions, since all iterations thereof would involve such a + jump. It's not such a big deal with casLE since the side exit is + only taken if the CAS fails, that is, the location is contended, + which is relatively unlikely. + + Note also, the test for CAS success vs failure is done using + Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary + Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it + shouldn't definedness-check these comparisons. See + COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for + background/rationale. + */ + + /* Performance holes: + + - fcom ; fstsw %ax ; sahf + sahf does not update the O flag (sigh) and so O needs to + be computed. This is done expensively; it would be better + to have a calculate_eflags_o helper. + + - emwarns; some FP codes can generate huge numbers of these + if the fpucw is changed in an inner loop. It would be + better for the guest state to have an emwarn-enable reg + which can be set zero or nonzero. If it is zero, emwarns + are not flagged, and instead control just flows all the + way through bbs as usual. + */ + + /* "Special" instructions. + + This instruction decoder can decode three special instructions + which mean nothing natively (are no-ops as far as regs/mem are + concerned) but have meaning for supporting Valgrind. A special + instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D + C1C713 (in the standard interpretation, that means: roll $3, %edi; + roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that, + one of the following 3 are allowed (standard interpretation in + parentheses): + + 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX ) + 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR + 87D2 (xchgl %edx,%edx) call-noredir *%EAX + + Any other bytes following the 12-byte preamble are illegal and + constitute a failure in instruction decoding. This all assumes + that the preamble will never occur except in specific code + fragments designed for Valgrind to catch. + + No prefixes may precede a "Special" instruction. + */ + + /* LOCK prefixed instructions. These are translated using IR-level + CAS statements (IRCAS) and are believed to preserve atomicity, even + from the point of view of some other process racing against a + simulated one (presumably they communicate via a shared memory + segment). + + Handlers which are aware of LOCK prefixes are: + dis_op2_G_E (add, or, adc, sbb, and, sub, xor) + dis_cmpxchg_G_E (cmpxchg) + dis_Grp1 (add, or, adc, sbb, and, sub, xor) + dis_Grp3 (not, neg) + dis_Grp4 (inc, dec) + dis_Grp5 (inc, dec) + dis_Grp8_Imm (bts, btc, btr) + dis_bt_G_E (bts, btc, btr) + dis_xadd_G_E (xadd) + */ + + + #include "libvex_basictypes.h" + #include "libvex_ir.h" + #include "libvex.h" + #include "libvex_guest_x86.h" + + #include "main_util.h" + #include "main_globals.h" + #include "guest_generic_bb_to_IR.h" + #include "guest_generic_x87.h" + #include "guest_x86_defs.h" + + + /*------------------------------------------------------------*/ + /*--- Globals ---*/ + /*------------------------------------------------------------*/ + + /* These are set at the start of the translation of an insn, right + down in disInstr_X86, so that we don't have to pass them around + endlessly. They are all constant during the translation of any + given insn. */ + + /* We need to know this to do sub-register accesses correctly. */ + static Bool host_is_bigendian; + + /* Pointer to the guest code area (points to start of BB, not to the + insn being processed). */ + static UChar* guest_code; + + /* The guest address corresponding to guest_code[0]. */ + static Addr32 guest_EIP_bbstart; + + /* The guest address for the instruction currently being + translated. */ + static Addr32 guest_EIP_curr_instr; + + /* The IRSB* into which we're generating code. */ + static IRSB* irsb; + + + /*------------------------------------------------------------*/ + /*--- Debugging output ---*/ + /*------------------------------------------------------------*/ + + #define DIP(format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_printf(format, ## args) + + #define DIS(buf, format, args...) \ + if (vex_traceflags & VEX_TRACE_FE) \ + vex_sprintf(buf, format, ## args) + + + /*------------------------------------------------------------*/ + /*--- Offsets of various parts of the x86 guest state. ---*/ + /*------------------------------------------------------------*/ + + #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX) + #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX) + #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX) + #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX) + #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP) + #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP) + #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI) + #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI) + + #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP) + + #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP) + #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1) + #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2) + #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP) + + #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0]) + #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0]) + #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG) + #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG) + #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG) + #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP) + #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210) + #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND) + + #define OFFB_CS offsetof(VexGuestX86State,guest_CS) + #define OFFB_DS offsetof(VexGuestX86State,guest_DS) + #define OFFB_ES offsetof(VexGuestX86State,guest_ES) + #define OFFB_FS offsetof(VexGuestX86State,guest_FS) + #define OFFB_GS offsetof(VexGuestX86State,guest_GS) + #define OFFB_SS offsetof(VexGuestX86State,guest_SS) + #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) + #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) + + #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND) + #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0) + #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1) + #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2) + #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3) + #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4) + #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5) + #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6) + #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7) + + #define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN) + + #define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART) + #define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN) + #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR) + + #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL) + + + /*------------------------------------------------------------*/ + /*--- Helper bits and pieces for deconstructing the ---*/ + /*--- x86 insn stream. ---*/ + /*------------------------------------------------------------*/ + + /* This is the Intel register encoding -- integer regs. */ + #define R_EAX 0 + #define R_ECX 1 + #define R_EDX 2 + #define R_EBX 3 + #define R_ESP 4 + #define R_EBP 5 + #define R_ESI 6 + #define R_EDI 7 + + #define R_AL (0+R_EAX) + #define R_AH (4+R_EAX) + + /* This is the Intel register encoding -- segment regs. */ + #define R_ES 0 + #define R_CS 1 + #define R_SS 2 + #define R_DS 3 + #define R_FS 4 + #define R_GS 5 + + + /* Add a statement to the list held by "irbb". */ + static void stmt ( IRStmt* st ) + { + addStmtToIRSB( irsb, st ); + } + + /* Generate a new temporary of the given type. */ + static IRTemp newTemp ( IRType ty ) + { + vassert(isPlausibleIRType(ty)); + return newIRTemp( irsb->tyenv, ty ); + } + + /* Various simple conversions */ + + static UInt extend_s_8to32( UInt x ) + { + return (UInt)((((Int)x) << 24) >> 24); + } + + static UInt extend_s_16to32 ( UInt x ) + { + return (UInt)((((Int)x) << 16) >> 16); + } + + /* Fetch a byte from the guest insn stream. */ + static UChar getIByte ( Int delta ) + { + return guest_code[delta]; + } + + /* Extract the reg field from a modRM byte. */ + static Int gregOfRM ( UChar mod_reg_rm ) + { + return (Int)( (mod_reg_rm >> 3) & 7 ); + } + + /* Figure out whether the mod and rm parts of a modRM byte refer to a + register or memory. If so, the byte will have the form 11XXXYYY, + where YYY is the register number. */ + static Bool epartIsReg ( UChar mod_reg_rm ) + { + return toBool(0xC0 == (mod_reg_rm & 0xC0)); + } + + /* ... and extract the register number ... */ + static Int eregOfRM ( UChar mod_reg_rm ) + { + return (Int)(mod_reg_rm & 0x7); + } + + /* Get a 8/16/32-bit unsigned value out of the insn stream. */ + + static UChar getUChar ( Int delta ) + { + UChar v = guest_code[delta+0]; + return toUChar(v); + } + + static UInt getUDisp16 ( Int delta ) + { + UInt v = guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return v & 0xFFFF; + } + + static UInt getUDisp32 ( Int delta ) + { + UInt v = guest_code[delta+3]; v <<= 8; + v |= guest_code[delta+2]; v <<= 8; + v |= guest_code[delta+1]; v <<= 8; + v |= guest_code[delta+0]; + return v; + } + + static UInt getUDisp ( Int size, Int delta ) + { + switch (size) { + case 4: return getUDisp32(delta); + case 2: return getUDisp16(delta); + case 1: return (UInt)getUChar(delta); + default: vpanic("getUDisp(x86)"); + } + return 0; /*notreached*/ + } + + + /* Get a byte value out of the insn stream and sign-extend to 32 + bits. */ + static UInt getSDisp8 ( Int delta ) + { + return extend_s_8to32( (UInt) (guest_code[delta]) ); + } + + static UInt getSDisp16 ( Int delta0 ) + { + UChar* eip = (UChar*)(&guest_code[delta0]); + UInt d = *eip++; + d |= ((*eip++) << 8); + return extend_s_16to32(d); + } + + static UInt getSDisp ( Int size, Int delta ) + { + switch (size) { + case 4: return getUDisp32(delta); + case 2: return getSDisp16(delta); + case 1: return getSDisp8(delta); + default: vpanic("getSDisp(x86)"); + } + return 0; /*notreached*/ + } + + + /*------------------------------------------------------------*/ + /*--- Helpers for constructing IR. ---*/ + /*------------------------------------------------------------*/ + + /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit + register references, we need to take the host endianness into + account. Supplied value is 0 .. 7 and in the Intel instruction + encoding. */ + + static IRType szToITy ( Int n ) + { + switch (n) { + case 1: return Ity_I8; + case 2: return Ity_I16; + case 4: return Ity_I32; + default: vpanic("szToITy(x86)"); + } + } + + /* On a little-endian host, less significant bits of the guest + registers are at lower addresses. Therefore, if a reference to a + register low half has the safe guest state offset as a reference to + the full register. + */ + static Int integerGuestRegOffset ( Int sz, UInt archreg ) + { + vassert(archreg < 8); + + /* Correct for little-endian host only. */ + vassert(!host_is_bigendian); + + if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) { + switch (archreg) { + case R_EAX: return OFFB_EAX; + case R_EBX: return OFFB_EBX; + case R_ECX: return OFFB_ECX; + case R_EDX: return OFFB_EDX; + case R_ESI: return OFFB_ESI; + case R_EDI: return OFFB_EDI; + case R_ESP: return OFFB_ESP; + case R_EBP: return OFFB_EBP; + default: vpanic("integerGuestRegOffset(x86,le)(4,2)"); + } + } + + vassert(archreg >= 4 && archreg < 8 && sz == 1); + switch (archreg-4) { + case R_EAX: return 1+ OFFB_EAX; + case R_EBX: return 1+ OFFB_EBX; + case R_ECX: return 1+ OFFB_ECX; + case R_EDX: return 1+ OFFB_EDX; + default: vpanic("integerGuestRegOffset(x86,le)(1h)"); + } + + /* NOTREACHED */ + vpanic("integerGuestRegOffset(x86,le)"); + } + + static Int segmentGuestRegOffset ( UInt sreg ) + { + switch (sreg) { + case R_ES: return OFFB_ES; + case R_CS: return OFFB_CS; + case R_SS: return OFFB_SS; + case R_DS: return OFFB_DS; + case R_FS: return OFFB_FS; + case R_GS: return OFFB_GS; + default: vpanic("segmentGuestRegOffset(x86)"); + } + } + + static Int xmmGuestRegOffset ( UInt xmmreg ) + { + switch (xmmreg) { + case 0: return OFFB_XMM0; + case 1: return OFFB_XMM1; + case 2: return OFFB_XMM2; + case 3: return OFFB_XMM3; + case 4: return OFFB_XMM4; + case 5: return OFFB_XMM5; + case 6: return OFFB_XMM6; + case 7: return OFFB_XMM7; + default: vpanic("xmmGuestRegOffset"); + } + } + + /* Lanes of vector registers are always numbered from zero being the + least significant lane (rightmost in the register). */ + + static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) + { + /* Correct for little-endian host only. */ + vassert(!host_is_bigendian); + vassert(laneno >= 0 && laneno < 8); + return xmmGuestRegOffset( xmmreg ) + 2 * laneno; + } + + static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) + { + /* Correct for little-endian host only. */ + vassert(!host_is_bigendian); + vassert(laneno >= 0 && laneno < 4); + return xmmGuestRegOffset( xmmreg ) + 4 * laneno; + } + + static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) + { + /* Correct for little-endian host only. */ + vassert(!host_is_bigendian); + vassert(laneno >= 0 && laneno < 2); + return xmmGuestRegOffset( xmmreg ) + 8 * laneno; + } + + static IRExpr* getIReg ( Int sz, UInt archreg ) + { + vassert(sz == 1 || sz == 2 || sz == 4); + vassert(archreg < 8); + return IRExpr_Get( integerGuestRegOffset(sz,archreg), + szToITy(sz) ); + } + + /* Ditto, but write to a reg instead. */ + static void putIReg ( Int sz, UInt archreg, IRExpr* e ) + { + IRType ty = typeOfIRExpr(irsb->tyenv, e); + switch (sz) { + case 1: vassert(ty == Ity_I8); break; + case 2: vassert(ty == Ity_I16); break; + case 4: vassert(ty == Ity_I32); break; + default: vpanic("putIReg(x86)"); + } + vassert(archreg < 8); + stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) ); + } + + static IRExpr* getSReg ( UInt sreg ) + { + return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); + } + + static void putSReg ( UInt sreg, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); + stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); + } + + static IRExpr* getXMMReg ( UInt xmmreg ) + { + return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); + } + + static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); + } + + static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); + } + + static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); + } + + static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) + { + return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); + } + + static void putXMMReg ( UInt xmmreg, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); + stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); + } + + static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); + stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); + stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); + stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); + stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); + } + + static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); + stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); + } + + static void assign ( IRTemp dst, IRExpr* e ) + { + stmt( IRStmt_WrTmp(dst, e) ); + } + + static void storeLE ( IRExpr* addr, IRExpr* data ) + { + stmt( IRStmt_Store(Iend_LE, IRTemp_INVALID, addr, data) ); + } + + static IRExpr* unop ( IROp op, IRExpr* a ) + { + return IRExpr_Unop(op, a); + } + + static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) + { + return IRExpr_Binop(op, a1, a2); + } + + static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) + { + return IRExpr_Triop(op, a1, a2, a3); + } + + static IRExpr* mkexpr ( IRTemp tmp ) + { + return IRExpr_RdTmp(tmp); + } + + static IRExpr* mkU8 ( UInt i ) + { + vassert(i < 256); + return IRExpr_Const(IRConst_U8( (UChar)i )); + } + + static IRExpr* mkU16 ( UInt i ) + { + vassert(i < 65536); + return IRExpr_Const(IRConst_U16( (UShort)i )); + } + + static IRExpr* mkU32 ( UInt i ) + { + return IRExpr_Const(IRConst_U32(i)); + } + + static IRExpr* mkU64 ( ULong i ) + { + return IRExpr_Const(IRConst_U64(i)); + } + + static IRExpr* mkU ( IRType ty, UInt i ) + { + if (ty == Ity_I8) return mkU8(i); + if (ty == Ity_I16) return mkU16(i); + if (ty == Ity_I32) return mkU32(i); + /* If this panics, it usually means you passed a size (1,2,4) + value as the IRType, rather than a real IRType. */ + vpanic("mkU(x86)"); + } + + static IRExpr* mkV128 ( UShort mask ) + { + return IRExpr_Const(IRConst_V128(mask)); + } + + static IRExpr* loadLE ( IRType ty, IRExpr* data ) + { + return IRExpr_Load(False, Iend_LE, ty, data); + } + + static IROp mkSizedOp ( IRType ty, IROp op8 ) + { + Int adj; + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 + || op8 == Iop_Mul8 + || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 + || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 + || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 + || op8 == Iop_CasCmpNE8 + || op8 == Iop_Not8); + adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); + return adj + op8; + } + + static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd ) + { + if (szSmall == 1 && szBig == 4) { + return signd ? Iop_8Sto32 : Iop_8Uto32; + } + if (szSmall == 1 && szBig == 2) { + return signd ? Iop_8Sto16 : Iop_8Uto16; + } + if (szSmall == 2 && szBig == 4) { + return signd ? Iop_16Sto32 : Iop_16Uto32; + } + vpanic("mkWidenOp(x86,guest)"); + } + + static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) + { + vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); + vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); + return unop(Iop_32to1, + binop(Iop_And32, + unop(Iop_1Uto32,x), + unop(Iop_1Uto32,y))); + } + + /* Generate a compare-and-swap operation, operating on memory at + 'addr'. The expected value is 'expVal' and the new value is + 'newVal'. If the operation fails, then transfer control (with a + no-redir jump (XXX no -- see comment at top of this file)) to + 'restart_point', which is presumably the address of the guest + instruction again -- retrying, essentially. */ + static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, + Addr32 restart_point ) + { + IRCAS* cas; + IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); + IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); + IRTemp oldTmp = newTemp(tyE); + IRTemp expTmp = newTemp(tyE); + vassert(tyE == tyN); + vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8); + assign(expTmp, expVal); + cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, + NULL, mkexpr(expTmp), NULL, newVal ); + stmt( IRStmt_CAS(cas) ); + stmt( IRStmt_Exit( + binop( mkSizedOp(tyE,Iop_CasCmpNE8), + mkexpr(oldTmp), mkexpr(expTmp) ), + Ijk_Boring, /*Ijk_NoRedir*/ + IRConst_U32( restart_point ) + )); + } + + + /*------------------------------------------------------------*/ + /*--- Helpers for %eflags. ---*/ + /*------------------------------------------------------------*/ + + /* -------------- Evaluating the flags-thunk. -------------- */ + + /* Build IR to calculate all the eflags from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: + Ity_I32. */ + static IRExpr* mk_x86g_calculate_eflags_all ( void ) + { + IRExpr** args + = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), + IRExpr_Get(OFFB_CC_DEP1, Ity_I32), + IRExpr_Get(OFFB_CC_DEP2, Ity_I32), + IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); + IRExpr* call + = mkIRExprCCall( + Ity_I32, + 0/*regparm*/, + "x86g_calculate_eflags_all", &x86g_calculate_eflags_all, + args + ); + /* Exclude OP and NDEP from definedness checking. We're only + interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); + return call; + } + + /* Build IR to calculate some particular condition from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: + Ity_Bit. */ + static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond ) + { + IRExpr** args + = mkIRExprVec_5( mkU32(cond), + IRExpr_Get(OFFB_CC_OP, Ity_I32), + IRExpr_Get(OFFB_CC_DEP1, Ity_I32), + IRExpr_Get(OFFB_CC_DEP2, Ity_I32), + IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); + IRExpr* call + = mkIRExprCCall( + Ity_I32, + 0/*regparm*/, + "x86g_calculate_condition", &x86g_calculate_condition, + args + ); + /* Exclude the requested condition, OP and NDEP from definedness + checking. We're only interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); + return unop(Iop_32to1, call); + } + + /* Build IR to calculate just the carry flag from stored + CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */ + static IRExpr* mk_x86g_calculate_eflags_c ( void ) + { + IRExpr** args + = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), + IRExpr_Get(OFFB_CC_DEP1, Ity_I32), + IRExpr_Get(OFFB_CC_DEP2, Ity_I32), + IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); + IRExpr* call + = mkIRExprCCall( + Ity_I32, + 3/*regparm*/, + "x86g_calculate_eflags_c", &x86g_calculate_eflags_c, + args + ); + /* Exclude OP and NDEP from definedness checking. We're only + interested in DEP1 and DEP2. */ + call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); + return call; + } + + + /* -------------- Building the flags-thunk. -------------- */ + + /* The machinery in this section builds the flag-thunk following a + flag-setting operation. Hence the various setFlags_* functions. + */ + + static Bool isAddSub ( IROp op8 ) + { + return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); + } + + static Bool isLogic ( IROp op8 ) + { + return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); + } + + /* U-widen 8/16/32 bit int expr to 32. */ + static IRExpr* widenUto32 ( IRExpr* e ) + { + switch (typeOfIRExpr(irsb->tyenv,e)) { + case Ity_I32: return e; + case Ity_I16: return unop(Iop_16Uto32,e); + case Ity_I8: return unop(Iop_8Uto32,e); + default: vpanic("widenUto32"); + } + } + + /* S-widen 8/16/32 bit int expr to 32. */ + static IRExpr* widenSto32 ( IRExpr* e ) + { + switch (typeOfIRExpr(irsb->tyenv,e)) { + case Ity_I32: return e; + case Ity_I16: return unop(Iop_16Sto32,e); + case Ity_I8: return unop(Iop_8Sto32,e); + default: vpanic("widenSto32"); + } + } + + /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some + of these combinations make sense. */ + static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) + { + IRType src_ty = typeOfIRExpr(irsb->tyenv,e); + if (src_ty == dst_ty) + return e; + if (src_ty == Ity_I32 && dst_ty == Ity_I16) + return unop(Iop_32to16, e); + if (src_ty == Ity_I32 && dst_ty == Ity_I8) + return unop(Iop_32to8, e); + + vex_printf("\nsrc, dst tys are: "); + ppIRType(src_ty); + vex_printf(", "); + ppIRType(dst_ty); + vex_printf("\n"); + vpanic("narrowTo(x86)"); + } + + + /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is + auto-sized up to the real op. */ + + static + void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) + { + Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); + + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); + + switch (op8) { + case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break; + case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break; + default: ppIROp(op8); + vpanic("setFlags_DEP1_DEP2(x86)"); + } + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) ); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + } + + + /* Set the OP and DEP1 fields only, and write zero to DEP2. */ + + static + void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) + { + Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); + + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); + + switch (op8) { + case Iop_Or8: + case Iop_And8: + case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break; + default: ppIROp(op8); + vpanic("setFlags_DEP1(x86)"); + } + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + } + + + /* For shift operations, we put in the result and the undershifted + result. Except if the shift amount is zero, the thunk is left + unchanged. */ + + static void setFlags_DEP1_DEP2_shift ( IROp op32, + IRTemp res, + IRTemp resUS, + IRType ty, + IRTemp guard ) + { + Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0); + + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); + vassert(guard); + + /* Both kinds of right shifts are handled by the same thunk + operation. */ + switch (op32) { + case Iop_Shr32: + case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break; + case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break; + default: ppIROp(op32); + vpanic("setFlags_DEP1_DEP2_shift(x86)"); + } + + /* DEP1 contains the result, DEP2 contains the undershifted value. */ + stmt( IRStmt_Put( OFFB_CC_OP, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_OP,Ity_I32), + mkU32(ccOp))) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_DEP1,Ity_I32), + widenUto32(mkexpr(res)))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, + IRExpr_Mux0X( mkexpr(guard), + IRExpr_Get(OFFB_CC_DEP2,Ity_I32), + widenUto32(mkexpr(resUS)))) ); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + } + + + /* For the inc/dec case, we store in DEP1 the result value and in NDEP + the former value of the carry flag, which unfortunately we have to + compute. */ + + static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) + { + Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB; + + ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); + + /* This has to come first, because calculating the C flag + may require reading all four thunk fields. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) ); + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); + } + + + /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the + two arguments. */ + + static + void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op ) + { + switch (ty) { + case Ity_I8: + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) ); + break; + case Ity_I16: + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) ); + break; + case Ity_I32: + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) ); + break; + default: + vpanic("setFlags_MUL(x86)"); + } + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + } + + + /* -------------- Condition codes. -------------- */ + + /* Condition codes, using the Intel encoding. */ + + static HChar* name_X86Condcode ( X86Condcode cond ) + { + switch (cond) { + case X86CondO: return "o"; + case X86CondNO: return "no"; + case X86CondB: return "b"; + case X86CondNB: return "nb"; + case X86CondZ: return "z"; + case X86CondNZ: return "nz"; + case X86CondBE: return "be"; + case X86CondNBE: return "nbe"; + case X86CondS: return "s"; + case X86CondNS: return "ns"; + case X86CondP: return "p"; + case X86CondNP: return "np"; + case X86CondL: return "l"; + case X86CondNL: return "nl"; + case X86CondLE: return "le"; + case X86CondNLE: return "nle"; + case X86CondAlways: return "ALWAYS"; + default: vpanic("name_X86Condcode"); + } + } + + static + X86Condcode positiveIse_X86Condcode ( X86Condcode cond, + Bool* needInvert ) + { + vassert(cond >= X86CondO && cond <= X86CondNLE); + if (cond & 1) { + *needInvert = True; + return cond-1; + } else { + *needInvert = False; + return cond; + } + } + + + /* -------------- Helpers for ADD/SUB with carry. -------------- */ + + /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags + appropriately. + + Optionally, generate a store for the 'tres' value. This can either + be a normal store, or it can be a cas-with-possible-failure style + store: + + if taddr is IRTemp_INVALID, then no store is generated. + + if taddr is not IRTemp_INVALID, then a store (using taddr as + the address) is generated: + + if texpVal is IRTemp_INVALID then a normal store is + generated, and restart_point must be zero (it is irrelevant). + + if texpVal is not IRTemp_INVALID then a cas-style store is + generated. texpVal is the expected value, restart_point + is the restart point if the store fails, and texpVal must + have the same type as tres. + */ + static void helper_ADC ( Int sz, + IRTemp tres, IRTemp ta1, IRTemp ta2, + /* info about optional store: */ + IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) + { + UInt thunkOp; + IRType ty = szToITy(sz); + IRTemp oldc = newTemp(Ity_I32); + IRTemp oldcn = newTemp(ty); + IROp plus = mkSizedOp(ty, Iop_Add8); + IROp xor = mkSizedOp(ty, Iop_Xor8); + + vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); + vassert(sz == 1 || sz == 2 || sz == 4); + thunkOp = sz==4 ? X86G_CC_OP_ADCL + : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB); + + /* oldc = old carry flag, 0 or 1 */ + assign( oldc, binop(Iop_And32, + mk_x86g_calculate_eflags_c(), + mkU32(1)) ); + + assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); + + assign( tres, binop(plus, + binop(plus,mkexpr(ta1),mkexpr(ta2)), + mkexpr(oldcn)) ); + + /* Possibly generate a store of 'tres' to 'taddr'. See comment at + start of this function. */ + if (taddr != IRTemp_INVALID) { + if (texpVal == IRTemp_INVALID) { + vassert(restart_point == 0); + storeLE( mkexpr(taddr), mkexpr(tres) ); + } else { + vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); + /* .. and hence 'texpVal' has the same type as 'tres'. */ + casLE( mkexpr(taddr), + mkexpr(texpVal), mkexpr(tres), restart_point ); + } + } + + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), + mkexpr(oldcn)) )) ); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); + } + + + /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags + appropriately. As with helper_ADC, possibly generate a store of + the result -- see comments on helper_ADC for details. + */ + static void helper_SBB ( Int sz, + IRTemp tres, IRTemp ta1, IRTemp ta2, + /* info about optional store: */ + IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) + { + UInt thunkOp; + IRType ty = szToITy(sz); + IRTemp oldc = newTemp(Ity_I32); + IRTemp oldcn = newTemp(ty); + IROp minus = mkSizedOp(ty, Iop_Sub8); + IROp xor = mkSizedOp(ty, Iop_Xor8); + + vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); + vassert(sz == 1 || sz == 2 || sz == 4); + thunkOp = sz==4 ? X86G_CC_OP_SBBL + : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB); + + /* oldc = old carry flag, 0 or 1 */ + assign( oldc, binop(Iop_And32, + mk_x86g_calculate_eflags_c(), + mkU32(1)) ); + + assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); + + assign( tres, binop(minus, + binop(minus,mkexpr(ta1),mkexpr(ta2)), + mkexpr(oldcn)) ); + + /* Possibly generate a store of 'tres' to 'taddr'. See comment at + start of this function. */ + if (taddr != IRTemp_INVALID) { + if (texpVal == IRTemp_INVALID) { + vassert(restart_point == 0); + storeLE( mkexpr(taddr), mkexpr(tres) ); + } else { + vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); + /* .. and hence 'texpVal' has the same type as 'tres'. */ + casLE( mkexpr(taddr), + mkexpr(texpVal), mkexpr(tres), restart_point ); + } + } + + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), + mkexpr(oldcn)) )) ); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); + } + + + /* -------------- Helpers for disassembly printing. -------------- */ + + static HChar* nameGrp1 ( Int opc_aux ) + { + static HChar* grp1_names[8] + = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; + if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)"); + return grp1_names[opc_aux]; + } + + static HChar* nameGrp2 ( Int opc_aux ) + { + static HChar* grp2_names[8] + = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; + if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)"); + return grp2_names[opc_aux]; + } + + static HChar* nameGrp4 ( Int opc_aux ) + { + static HChar* grp4_names[8] + = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; + if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)"); + return grp4_names[opc_aux]; + } + + static HChar* nameGrp5 ( Int opc_aux ) + { + static HChar* grp5_names[8] + = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; + if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)"); + return grp5_names[opc_aux]; + } + + static HChar* nameGrp8 ( Int opc_aux ) + { + static HChar* grp8_names[8] + = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; + if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)"); + return grp8_names[opc_aux]; + } + + static HChar* nameIReg ( Int size, Int reg ) + { + static HChar* ireg32_names[8] + = { "%eax", "%ecx", "%edx", "%ebx", + "%esp", "%ebp", "%esi", "%edi" }; + static HChar* ireg16_names[8] + = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" }; + static HChar* ireg8_names[8] + = { "%al", "%cl", "%dl", "%bl", + "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" }; + if (reg < 0 || reg > 7) goto bad; + switch (size) { + case 4: return ireg32_names[reg]; + case 2: return ireg16_names[reg]; + case 1: return ireg8_names[reg]; + } + bad: + vpanic("nameIReg(X86)"); + return NULL; /*notreached*/ + } + + static HChar* nameSReg ( UInt sreg ) + { + switch (sreg) { + case R_ES: return "%es"; + case R_CS: return "%cs"; + case R_SS: return "%ss"; + case R_DS: return "%ds"; + case R_FS: return "%fs"; + case R_GS: return "%gs"; + default: vpanic("nameSReg(x86)"); + } + } + + static HChar* nameMMXReg ( Int mmxreg ) + { + static HChar* mmx_names[8] + = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; + if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)"); + return mmx_names[mmxreg]; + } + + static HChar* nameXMMReg ( Int xmmreg ) + { + static HChar* xmm_names[8] + = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7" }; + if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg"); + return xmm_names[xmmreg]; + } + + static HChar* nameMMXGran ( Int gran ) + { + switch (gran) { + case 0: return "b"; + case 1: return "w"; + case 2: return "d"; + case 3: return "q"; + default: vpanic("nameMMXGran(x86,guest)"); + } + } + + static HChar nameISize ( Int size ) + { + switch (size) { + case 4: return 'l'; + case 2: return 'w'; + case 1: return 'b'; + default: vpanic("nameISize(x86)"); + } + } + + + /*------------------------------------------------------------*/ + /*--- JMP helpers ---*/ + /*------------------------------------------------------------*/ + + static void jmp_lit( IRJumpKind kind, Addr32 d32 ) + { + irsb->next = mkU32(d32); + irsb->jumpkind = kind; + } + + static void jmp_treg( IRJumpKind kind, IRTemp t ) + { + irsb->next = mkexpr(t); + irsb->jumpkind = kind; + } + + static + void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true ) + { + Bool invert; + X86Condcode condPos; + condPos = positiveIse_X86Condcode ( cond, &invert ); + if (invert) { + stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), + Ijk_Boring, + IRConst_U32(d32_false) ) ); + irsb->next = mkU32(d32_true); + irsb->jumpkind = Ijk_Boring; + } else { + stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), + Ijk_Boring, + IRConst_U32(d32_true) ) ); + irsb->next = mkU32(d32_false); + irsb->jumpkind = Ijk_Boring; + } + } + + + /*------------------------------------------------------------*/ + /*--- Disassembling addressing modes ---*/ + /*------------------------------------------------------------*/ + + static + HChar* sorbTxt ( UChar sorb ) + { + switch (sorb) { + case 0: return ""; /* no override */ + case 0x3E: return "%ds"; + case 0x26: return "%es:"; + case 0x64: return "%fs:"; + case 0x65: return "%gs:"; + default: vpanic("sorbTxt(x86,guest)"); + } + } + + + /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a + linear address by adding any required segment override as indicated + by sorb. */ + static + IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual ) + { + Int sreg; + IRType hWordTy; + IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; + + if (sorb == 0) + /* the common case - no override */ + return virtual; + + switch (sorb) { + case 0x3E: sreg = R_DS; break; + case 0x26: sreg = R_ES; break; + case 0x64: sreg = R_FS; break; + case 0x65: sreg = R_GS; break; + default: vpanic("handleSegOverride(x86,guest)"); + } + + hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; + + seg_selector = newTemp(Ity_I32); + ldt_ptr = newTemp(hWordTy); + gdt_ptr = newTemp(hWordTy); + r64 = newTemp(Ity_I64); + + assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); + assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); + assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); + + /* + Call this to do the translation and limit checks: + ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, + UInt seg_selector, UInt virtual_addr ) + */ + assign( + r64, + mkIRExprCCall( + Ity_I64, + 0/*regparms*/, + "x86g_use_seg_selector", + &x86g_use_seg_selector, + mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), + mkexpr(seg_selector), virtual) + ) + ); + + /* If the high 32 of the result are non-zero, there was a + failure in address translation. In which case, make a + quick exit. + */ + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), + Ijk_MapFail, + IRConst_U32( guest_EIP_curr_instr ) + ) + ); + + /* otherwise, here's the translated result. */ + return unop(Iop_64to32, mkexpr(r64)); + } + + + /* Generate IR to calculate an address indicated by a ModRM and + following SIB bytes. The expression, and the number of bytes in + the address mode, are returned. Note that this fn should not be + called if the R/M part of the address denotes a register instead of + memory. If print_codegen is true, text of the addressing mode is + placed in buf. + + The computed address is stored in a new tempreg, and the + identity of the tempreg is returned. */ + + static IRTemp disAMode_copy2tmp ( IRExpr* addr32 ) + { + IRTemp tmp = newTemp(Ity_I32); + assign( tmp, addr32 ); + return tmp; + } + + static + IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf ) + { + UChar mod_reg_rm = getIByte(delta); + delta++; + + buf[0] = (UChar)0; + + /* squeeze out the reg field from mod_reg_rm, since a 256-entry + jump table seems a bit excessive. + */ + mod_reg_rm &= 0xC7; /* is now XX000YYY */ + mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); + /* is now XX0XXYYY */ + mod_reg_rm &= 0x1F; /* is now 000XXYYY */ + switch (mod_reg_rm) { + + /* (%eax) .. (%edi), not including (%esp) or (%ebp). + --> GET %reg, t + */ + case 0x00: case 0x01: case 0x02: case 0x03: + /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: + { UChar rm = mod_reg_rm; + DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm)); + *len = 1; + return disAMode_copy2tmp( + handleSegOverride(sorb, getIReg(4,rm))); + } + + /* d8(%eax) ... d8(%edi), not including d8(%esp) + --> GET %reg, t ; ADDL d8, t + */ + case 0x08: case 0x09: case 0x0A: case 0x0B: + /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: + { UChar rm = toUChar(mod_reg_rm & 7); + UInt d = getSDisp8(delta); + DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); + *len = 2; + return disAMode_copy2tmp( + handleSegOverride(sorb, + binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); + } + + /* d32(%eax) ... d32(%edi), not including d32(%esp) + --> GET %reg, t ; ADDL d8, t + */ + case 0x10: case 0x11: case 0x12: case 0x13: + /* ! 14 */ case 0x15: case 0x16: case 0x17: + { UChar rm = toUChar(mod_reg_rm & 7); + UInt d = getUDisp32(delta); + DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); + *len = 5; + return disAMode_copy2tmp( + handleSegOverride(sorb, + binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); + } + + /* a register, %eax .. %edi. This shouldn't happen. */ + case 0x18: case 0x19: case 0x1A: case 0x1B: + case 0x1C: case 0x1D: case 0x1E: case 0x1F: + vpanic("disAMode(x86): not an addr!"); + + /* a 32-bit literal address + --> MOV d32, tmp + */ + case 0x05: + { UInt d = getUDisp32(delta); + *len = 5; + DIS(buf, "%s(0x%x)", sorbTxt(sorb), d); + return disAMode_copy2tmp( + handleSegOverride(sorb, mkU32(d))); + } + + case 0x04: { + /* SIB, with no displacement. Special cases: + -- %esp cannot act as an index value. + If index_r indicates %esp, zero is used for the index. + -- when mod is zero and base indicates EBP, base is instead + a 32-bit literal. + It's all madness, I tell you. Extract %index, %base and + scale from the SIB byte. The value denoted is then: + | %index == %ESP && %base == %EBP + = d32 following SIB byte + | %index == %ESP && %base != %EBP + = %base + | %index != %ESP && %base == %EBP + = d32 following SIB byte + (%index << scale) + | %index != %ESP && %base != %ESP + = %base + (%index << scale) + + What happens to the souls of CPU architects who dream up such + horrendous schemes, do you suppose? + */ + UChar sib = getIByte(delta); + UChar scale = toUChar((sib >> 6) & 3); + UChar index_r = toUChar((sib >> 3) & 7); + UChar base_r = toUChar(sib & 7); + delta++; + + if (index_r != R_ESP && base_r != R_EBP) { + DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb), + nameIReg(4,base_r), nameIReg(4,index_r), 1<> 6) & 3); + UChar index_r = toUChar((sib >> 3) & 7); + UChar base_r = toUChar(sib & 7); + UInt d = getSDisp8(delta+1); + + if (index_r == R_ESP) { + DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), + (Int)d, nameIReg(4,base_r)); + *len = 3; + return disAMode_copy2tmp( + handleSegOverride(sorb, + binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); + } else { + DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, + nameIReg(4,base_r), nameIReg(4,index_r), 1<> 6) & 3); + UChar index_r = toUChar((sib >> 3) & 7); + UChar base_r = toUChar(sib & 7); + UInt d = getUDisp32(delta+1); + + if (index_r == R_ESP) { + DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), + (Int)d, nameIReg(4,base_r)); + *len = 6; + return disAMode_copy2tmp( + handleSegOverride(sorb, + binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); + } else { + DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, + nameIReg(4,base_r), nameIReg(4,index_r), 1<> 3)); + /* is now XX0XXYYY */ + mod_reg_rm &= 0x1F; /* is now 000XXYYY */ + switch (mod_reg_rm) { + + /* (%eax) .. (%edi), not including (%esp) or (%ebp). */ + case 0x00: case 0x01: case 0x02: case 0x03: + /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: + return 1; + + /* d8(%eax) ... d8(%edi), not including d8(%esp). */ + case 0x08: case 0x09: case 0x0A: case 0x0B: + /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: + return 2; + + /* d32(%eax) ... d32(%edi), not including d32(%esp). */ + case 0x10: case 0x11: case 0x12: case 0x13: + /* ! 14 */ case 0x15: case 0x16: case 0x17: + return 5; + + /* a register, %eax .. %edi. (Not an addr, but still handled.) */ + case 0x18: case 0x19: case 0x1A: case 0x1B: + case 0x1C: case 0x1D: case 0x1E: case 0x1F: + return 1; + + /* a 32-bit literal address. */ + case 0x05: return 5; + + /* SIB, no displacement. */ + case 0x04: { + UChar sib = getIByte(delta); + UChar base_r = toUChar(sib & 7); + if (base_r == R_EBP) return 6; else return 2; + } + /* SIB, with 8-bit displacement. */ + case 0x0C: return 3; + + /* SIB, with 32-bit displacement. */ + case 0x14: return 6; + + default: + vpanic("lengthAMode"); + return 0; /*notreached*/ + } + } + + /*------------------------------------------------------------*/ + /*--- Disassembling common idioms ---*/ + /*------------------------------------------------------------*/ + + /* Handle binary integer instructions of the form + op E, G meaning + op reg-or-mem, reg + Is passed the a ptr to the modRM byte, the actual operation, and the + data size. Returns the address advanced completely over this + instruction. + + E(src) is reg-or-mem + G(dst) is reg. + + If E is reg, --> GET %G, tmp + OP %E, tmp + PUT tmp, %G + + If E is mem and OP is not reversible, + --> (getAddr E) -> tmpa + LD (tmpa), tmpa + GET %G, tmp2 + OP tmpa, tmp2 + PUT tmp2, %G + + If E is mem and OP is reversible + --> (getAddr E) -> tmpa + LD (tmpa), tmpa + OP %G, tmpa + PUT tmpa, %G + */ + static + UInt dis_op2_E_G ( UChar sorb, + Bool addSubCarry, + IROp op8, + Bool keep, + Int size, + Int delta0, + HChar* t_x86opc ) + { + HChar dis_buf[50]; + Int len; + IRType ty = szToITy(size); + IRTemp dst1 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst0 = newTemp(ty); + UChar rm = getUChar(delta0); + IRTemp addr = IRTemp_INVALID; + + /* addSubCarry == True indicates the intended operation is + add-with-carry or subtract-with-borrow. */ + if (addSubCarry) { + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); + vassert(keep); + } + + if (epartIsReg(rm)) { + /* Specially handle XOR reg,reg, because that doesn't really + depend on reg, and doing the obvious thing potentially + generates a spurious value check failure due to the bogus + dependency. Ditto SBB reg,reg. */ + if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) + && gregOfRM(rm) == eregOfRM(rm)) { + putIReg(size, gregOfRM(rm), mkU(ty,0)); + } + assign( dst0, getIReg(size,gregOfRM(rm)) ); + assign( src, getIReg(size,eregOfRM(rm)) ); + + if (addSubCarry && op8 == Iop_Add8) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIReg(size, gregOfRM(rm), mkexpr(dst1)); + } else + if (addSubCarry && op8 == Iop_Sub8) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIReg(size, gregOfRM(rm), mkexpr(dst1)); + } else { + assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + if (keep) + putIReg(size, gregOfRM(rm), mkexpr(dst1)); + } + + DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), + nameIReg(size,eregOfRM(rm)), + nameIReg(size,gregOfRM(rm))); + return 1+delta0; + } else { + /* E refers to memory */ + addr = disAMode ( &len, sorb, delta0, dis_buf); + assign( dst0, getIReg(size,gregOfRM(rm)) ); + assign( src, loadLE(szToITy(size), mkexpr(addr)) ); + + if (addSubCarry && op8 == Iop_Add8) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIReg(size, gregOfRM(rm), mkexpr(dst1)); + } else + if (addSubCarry && op8 == Iop_Sub8) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIReg(size, gregOfRM(rm), mkexpr(dst1)); + } else { + assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + if (keep) + putIReg(size, gregOfRM(rm), mkexpr(dst1)); + } + + DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), + dis_buf,nameIReg(size,gregOfRM(rm))); + return len+delta0; + } + } + + + + /* Handle binary integer instructions of the form + op G, E meaning + op reg, reg-or-mem + Is passed the a ptr to the modRM byte, the actual operation, and the + data size. Returns the address advanced completely over this + instruction. + + G(src) is reg. + E(dst) is reg-or-mem + + If E is reg, --> GET %E, tmp + OP %G, tmp + PUT tmp, %E + + If E is mem, --> (getAddr E) -> tmpa + LD (tmpa), tmpv + OP %G, tmpv + ST tmpv, (tmpa) + */ + static + UInt dis_op2_G_E ( UChar sorb, + Bool locked, + Bool addSubCarry, + IROp op8, + Bool keep, + Int size, + Int delta0, + HChar* t_x86opc ) + { + HChar dis_buf[50]; + Int len; + IRType ty = szToITy(size); + IRTemp dst1 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst0 = newTemp(ty); + UChar rm = getIByte(delta0); + IRTemp addr = IRTemp_INVALID; + + /* addSubCarry == True indicates the intended operation is + add-with-carry or subtract-with-borrow. */ + if (addSubCarry) { + vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); + vassert(keep); + } + + if (epartIsReg(rm)) { + /* Specially handle XOR reg,reg, because that doesn't really + depend on reg, and doing the obvious thing potentially + generates a spurious value check failure due to the bogus + dependency. Ditto SBB reg,reg.*/ + if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) + && gregOfRM(rm) == eregOfRM(rm)) { + putIReg(size, eregOfRM(rm), mkU(ty,0)); + } + assign(dst0, getIReg(size,eregOfRM(rm))); + assign(src, getIReg(size,gregOfRM(rm))); + + if (addSubCarry && op8 == Iop_Add8) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIReg(size, eregOfRM(rm), mkexpr(dst1)); + } else + if (addSubCarry && op8 == Iop_Sub8) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + putIReg(size, eregOfRM(rm), mkexpr(dst1)); + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + if (keep) + putIReg(size, eregOfRM(rm), mkexpr(dst1)); + } + + DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), + nameIReg(size,gregOfRM(rm)), + nameIReg(size,eregOfRM(rm))); + return 1+delta0; + } + + /* E refers to memory */ + { + addr = disAMode ( &len, sorb, delta0, dis_buf); + assign(dst0, loadLE(ty,mkexpr(addr))); + assign(src, getIReg(size,gregOfRM(rm))); + + if (addSubCarry && op8 == Iop_Add8) { + if (locked) { + /* cas-style store */ + helper_ADC( size, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); + } else { + /* normal store */ + helper_ADC( size, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else + if (addSubCarry && op8 == Iop_Sub8) { + if (locked) { + /* cas-style store */ + helper_SBB( size, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); + } else { + /* normal store */ + helper_SBB( size, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (keep) { + if (locked) { + if (0) vex_printf("locked case\n" ); + casLE( mkexpr(addr), + mkexpr(dst0)/*expval*/, + mkexpr(dst1)/*newval*/, guest_EIP_curr_instr ); + } else { + if (0) vex_printf("nonlocked case\n"); + storeLE(mkexpr(addr), mkexpr(dst1)); + } + } + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + } + + DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), + nameIReg(size,gregOfRM(rm)), dis_buf); + return len+delta0; + } + } + + + /* Handle move instructions of the form + mov E, G meaning + mov reg-or-mem, reg + Is passed the a ptr to the modRM byte, and the data size. Returns + the address advanced completely over this instruction. + + E(src) is reg-or-mem + G(dst) is reg. + + If E is reg, --> GET %E, tmpv + PUT tmpv, %G + + If E is mem --> (getAddr E) -> tmpa + LD (tmpa), tmpb + PUT tmpb, %G + */ + static + UInt dis_mov_E_G ( UChar sorb, + Int size, + Int delta0 ) + { + Int len; + UChar rm = getIByte(delta0); + HChar dis_buf[50]; + + if (epartIsReg(rm)) { + putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm))); + DIP("mov%c %s,%s\n", nameISize(size), + nameIReg(size,eregOfRM(rm)), + nameIReg(size,gregOfRM(rm))); + return 1+delta0; + } + + /* E refers to memory */ + { + IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); + putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr))); + DIP("mov%c %s,%s\n", nameISize(size), + dis_buf,nameIReg(size,gregOfRM(rm))); + return delta0+len; + } + } + + + /* Handle move instructions of the form + mov G, E meaning + mov reg, reg-or-mem + Is passed the a ptr to the modRM byte, and the data size. Returns + the address advanced completely over this instruction. + + G(src) is reg. + E(dst) is reg-or-mem + + If E is reg, --> GET %G, tmp + PUT tmp, %E + + If E is mem, --> (getAddr E) -> tmpa + GET %G, tmpv + ST tmpv, (tmpa) + */ + static + UInt dis_mov_G_E ( UChar sorb, + Int size, + Int delta0 ) + { + Int len; + UChar rm = getIByte(delta0); + HChar dis_buf[50]; + + if (epartIsReg(rm)) { + putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm))); + DIP("mov%c %s,%s\n", nameISize(size), + nameIReg(size,gregOfRM(rm)), + nameIReg(size,eregOfRM(rm))); + return 1+delta0; + } + + /* E refers to memory */ + { + IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf); + storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) ); + DIP("mov%c %s,%s\n", nameISize(size), + nameIReg(size,gregOfRM(rm)), dis_buf); + return len+delta0; + } + } + + + /* op $immediate, AL/AX/EAX. */ + static + UInt dis_op_imm_A ( Int size, + Bool carrying, + IROp op8, + Bool keep, + Int delta, + HChar* t_x86opc ) + { + IRType ty = szToITy(size); + IRTemp dst0 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst1 = newTemp(ty); + UInt lit = getUDisp(size,delta); + assign(dst0, getIReg(size,R_EAX)); + assign(src, mkU(ty,lit)); + + if (isAddSub(op8) && !carrying) { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + setFlags_DEP1_DEP2(op8, dst0, src, ty); + } + else + if (isLogic(op8)) { + vassert(!carrying); + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); + setFlags_DEP1(op8, dst1, ty); + } + else + if (op8 == Iop_Add8 && carrying) { + helper_ADC( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } + else + if (op8 == Iop_Sub8 && carrying) { + helper_SBB( size, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } + else + vpanic("dis_op_imm_A(x86,guest)"); + + if (keep) + putIReg(size, R_EAX, mkexpr(dst1)); + + DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), + lit, nameIReg(size,R_EAX)); + return delta+size; + } + + + /* Sign- and Zero-extending moves. */ + static + UInt dis_movx_E_G ( UChar sorb, + Int delta, Int szs, Int szd, Bool sign_extend ) + { + UChar rm = getIByte(delta); + if (epartIsReg(rm)) { + putIReg(szd, gregOfRM(rm), + unop(mkWidenOp(szs,szd,sign_extend), + getIReg(szs,eregOfRM(rm)))); + DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', + nameISize(szs), nameISize(szd), + nameIReg(szs,eregOfRM(rm)), + nameIReg(szd,gregOfRM(rm))); + return 1+delta; + } + + /* E refers to memory */ + { + Int len; + HChar dis_buf[50]; + IRTemp addr = disAMode ( &len, sorb, delta, dis_buf ); + + putIReg(szd, gregOfRM(rm), + unop(mkWidenOp(szs,szd,sign_extend), + loadLE(szToITy(szs),mkexpr(addr)))); + DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', + nameISize(szs), nameISize(szd), + dis_buf, nameIReg(szd,gregOfRM(rm))); + return len+delta; + } + } + + + /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 / + 16 / 8 bit quantity in the given IRTemp. */ + static + void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) + { + IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32; + IRTemp src64 = newTemp(Ity_I64); + IRTemp dst64 = newTemp(Ity_I64); + switch (sz) { + case 4: + assign( src64, binop(Iop_32HLto64, + getIReg(4,R_EDX), getIReg(4,R_EAX)) ); + assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) ); + putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) ); + putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) ); + break; + case 2: { + IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; + IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; + assign( src64, unop(widen3264, + binop(Iop_16HLto32, + getIReg(2,R_EDX), getIReg(2,R_EAX))) ); + assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); + putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); + putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); + break; + } + case 1: { + IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; + IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; + IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; + assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) ); + assign( dst64, + binop(op, mkexpr(src64), + unop(widen1632, unop(widen816, mkexpr(t)))) ); + putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16, + unop(Iop_64to32,mkexpr(dst64)))) ); + putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16, + unop(Iop_64HIto32,mkexpr(dst64)))) ); + break; + } + default: vpanic("codegen_div(x86)"); + } + } + + + static + UInt dis_Grp1 ( UChar sorb, Bool locked, + Int delta, UChar modrm, + Int am_sz, Int d_sz, Int sz, UInt d32 ) + { + Int len; + HChar dis_buf[50]; + IRType ty = szToITy(sz); + IRTemp dst1 = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dst0 = newTemp(ty); + IRTemp addr = IRTemp_INVALID; + IROp op8 = Iop_INVALID; + UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF); + + switch (gregOfRM(modrm)) { + case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; + case 2: break; // ADC + case 3: break; // SBB + case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; + case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; + /*NOTREACHED*/ + default: vpanic("dis_Grp1: unhandled case"); + } + + if (epartIsReg(modrm)) { + vassert(am_sz == 1); + + assign(dst0, getIReg(sz,eregOfRM(modrm))); + assign(src, mkU(ty,d32 & mask)); + + if (gregOfRM(modrm) == 2 /* ADC */) { + helper_ADC( sz, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } else + if (gregOfRM(modrm) == 3 /* SBB */) { + helper_SBB( sz, dst1, dst0, src, + /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + } + + if (gregOfRM(modrm) < 7) + putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); + + delta += (am_sz + d_sz); + DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, + nameIReg(sz,eregOfRM(modrm))); + } else { + addr = disAMode ( &len, sorb, delta, dis_buf); + + assign(dst0, loadLE(ty,mkexpr(addr))); + assign(src, mkU(ty,d32 & mask)); + + if (gregOfRM(modrm) == 2 /* ADC */) { + if (locked) { + /* cas-style store */ + helper_ADC( sz, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); + } else { + /* normal store */ + helper_ADC( sz, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else + if (gregOfRM(modrm) == 3 /* SBB */) { + if (locked) { + /* cas-style store */ + helper_SBB( sz, dst1, dst0, src, + /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); + } else { + /* normal store */ + helper_SBB( sz, dst1, dst0, src, + /*store*/addr, IRTemp_INVALID, 0 ); + } + } else { + assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); + if (gregOfRM(modrm) < 7) { + if (locked) { + casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, + mkexpr(dst1)/*newVal*/, + guest_EIP_curr_instr ); + } else { + storeLE(mkexpr(addr), mkexpr(dst1)); + } + } + if (isAddSub(op8)) + setFlags_DEP1_DEP2(op8, dst0, src, ty); + else + setFlags_DEP1(op8, dst1, ty); + } + + delta += (len+d_sz); + DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), + d32, dis_buf); + } + return delta; + } + + + /* Group 2 extended opcodes. shift_expr must be an 8-bit typed + expression. */ + + static + UInt dis_Grp2 ( UChar sorb, + Int delta, UChar modrm, + Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, + HChar* shift_expr_txt, Bool* decode_OK ) + { + /* delta on entry points at the modrm byte. */ + HChar dis_buf[50]; + Int len; + Bool isShift, isRotate, isRotateC; + IRType ty = szToITy(sz); + IRTemp dst0 = newTemp(ty); + IRTemp dst1 = newTemp(ty); + IRTemp addr = IRTemp_INVALID; + + *decode_OK = True; + + vassert(sz == 1 || sz == 2 || sz == 4); + + /* Put value to shift/rotate in dst0. */ + if (epartIsReg(modrm)) { + assign(dst0, getIReg(sz, eregOfRM(modrm))); + delta += (am_sz + d_sz); + } else { + addr = disAMode ( &len, sorb, delta, dis_buf); + assign(dst0, loadLE(ty,mkexpr(addr))); + delta += len + d_sz; + } + + isShift = False; + switch (gregOfRM(modrm)) { case 4: case 5: case 7: isShift = True; } + + isRotate = False; + switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; } + + isRotateC = False; + switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; } + + if (gregOfRM(modrm) == 6) { + *decode_OK = False; + return delta; + } + + if (!isShift && !isRotate && !isRotateC) { + /*NOTREACHED*/ + vpanic("dis_Grp2(Reg): unhandled case(x86)"); + } + + if (isRotateC) { + /* call a helper; these insns are so ridiculous they do not + deserve better */ + Bool left = toBool(gregOfRM(modrm) == 2); + IRTemp r64 = newTemp(Ity_I64); + IRExpr** args + = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */ + widenUto32(shift_expr), /* rotate amount */ + widenUto32(mk_x86g_calculate_eflags_all()), + mkU32(sz) ); + assign( r64, mkIRExprCCall( + Ity_I64, + 0/*regparm*/, + left ? "x86g_calculate_RCL" : "x86g_calculate_RCR", + left ? &x86g_calculate_RCL : &x86g_calculate_RCR, + args + ) + ); + /* new eflags in hi half r64; new value in lo half r64 */ + assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) ); + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + } + + if (isShift) { + + IRTemp pre32 = newTemp(Ity_I32); + IRTemp res32 = newTemp(Ity_I32); + IRTemp res32ss = newTemp(Ity_I32); + IRTemp shift_amt = newTemp(Ity_I8); + IROp op32; + + switch (gregOfRM(modrm)) { + case 4: op32 = Iop_Shl32; break; + case 5: op32 = Iop_Shr32; break; + case 7: op32 = Iop_Sar32; break; + /*NOTREACHED*/ + default: vpanic("dis_Grp2:shift"); break; + } + + /* Widen the value to be shifted to 32 bits, do the shift, and + narrow back down. This seems surprisingly long-winded, but + unfortunately the Intel semantics requires that 8/16-bit + shifts give defined results for shift values all the way up + to 31, and this seems the simplest way to do it. It has the + advantage that the only IR level shifts generated are of 32 + bit values, and the shift amount is guaranteed to be in the + range 0 .. 31, thereby observing the IR semantics requiring + all shift values to be in the range 0 .. 2^word_size-1. */ + + /* shift_amt = shift_expr & 31, regardless of operation size */ + assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) ); + + /* suitably widen the value to be shifted to 32 bits. */ + assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0)) + : widenUto32(mkexpr(dst0)) ); + + /* res32 = pre32 `shift` shift_amt */ + assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) ); + + /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */ + assign( res32ss, + binop(op32, + mkexpr(pre32), + binop(Iop_And8, + binop(Iop_Sub8, + mkexpr(shift_amt), mkU8(1)), + mkU8(31))) ); + + /* Build the flags thunk. */ + setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt); + + /* Narrow the result back down. */ + assign( dst1, narrowTo(ty, mkexpr(res32)) ); + + } /* if (isShift) */ + + else + if (isRotate) { + Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); + Bool left = toBool(gregOfRM(modrm) == 0); + IRTemp rot_amt = newTemp(Ity_I8); + IRTemp rot_amt32 = newTemp(Ity_I8); + IRTemp oldFlags = newTemp(Ity_I32); + + /* rot_amt = shift_expr & mask */ + /* By masking the rotate amount thusly, the IR-level Shl/Shr + expressions never shift beyond the word size and thus remain + well defined. */ + assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31))); + + if (ty == Ity_I32) + assign(rot_amt, mkexpr(rot_amt32)); + else + assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1))); + + if (left) { + + /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ + assign(dst1, + binop( mkSizedOp(ty,Iop_Or8), + binop( mkSizedOp(ty,Iop_Shl8), + mkexpr(dst0), + mkexpr(rot_amt) + ), + binop( mkSizedOp(ty,Iop_Shr8), + mkexpr(dst0), + binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) + ) + ) + ); + ccOp += X86G_CC_OP_ROLB; + + } else { /* right */ + + /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ + assign(dst1, + binop( mkSizedOp(ty,Iop_Or8), + binop( mkSizedOp(ty,Iop_Shr8), + mkexpr(dst0), + mkexpr(rot_amt) + ), + binop( mkSizedOp(ty,Iop_Shl8), + mkexpr(dst0), + binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) + ) + ) + ); + ccOp += X86G_CC_OP_RORB; + + } + + /* dst1 now holds the rotated value. Build flag thunk. We + need the resulting value for this, and the previous flags. + Except don't set it if the rotate count is zero. */ + + assign(oldFlags, mk_x86g_calculate_eflags_all()); + + /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ + stmt( IRStmt_Put( OFFB_CC_OP, + IRExpr_Mux0X( mkexpr(rot_amt32), + IRExpr_Get(OFFB_CC_OP,Ity_I32), + mkU32(ccOp))) ); + stmt( IRStmt_Put( OFFB_CC_DEP1, + IRExpr_Mux0X( mkexpr(rot_amt32), + IRExpr_Get(OFFB_CC_DEP1,Ity_I32), + widenUto32(mkexpr(dst1)))) ); + stmt( IRStmt_Put( OFFB_CC_DEP2, + IRExpr_Mux0X( mkexpr(rot_amt32), + IRExpr_Get(OFFB_CC_DEP2,Ity_I32), + mkU32(0))) ); + stmt( IRStmt_Put( OFFB_CC_NDEP, + IRExpr_Mux0X( mkexpr(rot_amt32), + IRExpr_Get(OFFB_CC_NDEP,Ity_I32), + mkexpr(oldFlags))) ); + } /* if (isRotate) */ + + /* Save result, and finish up. */ + if (epartIsReg(modrm)) { + putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); + if (vex_traceflags & VEX_TRACE_FE) { + vex_printf("%s%c ", + nameGrp2(gregOfRM(modrm)), nameISize(sz) ); + if (shift_expr_txt) + vex_printf("%s", shift_expr_txt); + else + ppIRExpr(shift_expr); + vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm))); + } + } else { + storeLE(mkexpr(addr), mkexpr(dst1)); + if (vex_traceflags & VEX_TRACE_FE) { + vex_printf("%s%c ", + nameGrp2(gregOfRM(modrm)), nameISize(sz) ); + if (shift_expr_txt) + vex_printf("%s", shift_expr_txt); + else + ppIRExpr(shift_expr); + vex_printf(", %s\n", dis_buf); + } + } + return delta; + } + + + /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ + static + UInt dis_Grp8_Imm ( UChar sorb, + Bool locked, + Int delta, UChar modrm, + Int am_sz, Int sz, UInt src_val, + Bool* decode_OK ) + { + /* src_val denotes a d8. + And delta on entry points at the modrm byte. */ + + IRType ty = szToITy(sz); + IRTemp t2 = newTemp(Ity_I32); + IRTemp t2m = newTemp(Ity_I32); + IRTemp t_addr = IRTemp_INVALID; + HChar dis_buf[50]; + UInt mask; + + /* we're optimists :-) */ + *decode_OK = True; + + /* Limit src_val -- the bit offset -- to something within a word. + The Intel docs say that literal offsets larger than a word are + masked in this way. */ + switch (sz) { + case 2: src_val &= 15; break; + case 4: src_val &= 31; break; + default: *decode_OK = False; return delta; + } + + /* Invent a mask suitable for the operation. */ + switch (gregOfRM(modrm)) { + case 4: /* BT */ mask = 0; break; + case 5: /* BTS */ mask = 1 << src_val; break; + case 6: /* BTR */ mask = ~(1 << src_val); break; + case 7: /* BTC */ mask = 1 << src_val; break; + /* If this needs to be extended, probably simplest to make a + new function to handle the other cases (0 .. 3). The + Intel docs do however not indicate any use for 0 .. 3, so + we don't expect this to happen. */ + default: *decode_OK = False; return delta; + } + + /* Fetch the value to be tested and modified into t2, which is + 32-bits wide regardless of sz. */ + if (epartIsReg(modrm)) { + vassert(am_sz == 1); + assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) ); + delta += (am_sz + 1); + DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), + src_val, nameIReg(sz,eregOfRM(modrm))); + } else { + Int len; + t_addr = disAMode ( &len, sorb, delta, dis_buf); + delta += (len+1); + assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) ); + DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), + src_val, dis_buf); + } + + /* Compute the new value into t2m, if non-BT. */ + switch (gregOfRM(modrm)) { + case 4: /* BT */ + break; + case 5: /* BTS */ + assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) ); + break; + case 6: /* BTR */ + assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) ); + break; + case 7: /* BTC */ + assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) ); + break; + default: + /*NOTREACHED*/ /*the previous switch guards this*/ + vassert(0); + } + + /* Write the result back, if non-BT. If the CAS fails then we + side-exit from the trace at this point, and so the flag state is + not affected. This is of course as required. */ + if (gregOfRM(modrm) != 4 /* BT */) { + if (epartIsReg(modrm)) { + putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m))); + } else { + if (locked) { + casLE( mkexpr(t_addr), + narrowTo(ty, mkexpr(t2))/*expd*/, + narrowTo(ty, mkexpr(t2m))/*new*/, + guest_EIP_curr_instr ); + } else { + storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); + } + } + } + + /* Copy relevant bit from t2 into the carry flag. */ + /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop(Iop_And32, + binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)), + mkU32(1)) + )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + + return delta; + } + + + /* Signed/unsigned widening multiply. Generate IR to multiply the + value in EAX/AX/AL by the given IRTemp, and park the result in + EDX:EAX/DX:AX/AX. + */ + static void codegen_mulL_A_D ( Int sz, Bool syned, + IRTemp tmp, HChar* tmp_txt ) + { + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + + assign( t1, getIReg(sz, R_EAX) ); + + switch (ty) { + case Ity_I32: { + IRTemp res64 = newTemp(Ity_I64); + IRTemp resHi = newTemp(Ity_I32); + IRTemp resLo = newTemp(Ity_I32); + IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; + UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; + setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); + assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); + assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); + assign( resLo, unop(Iop_64to32,mkexpr(res64))); + putIReg(4, R_EDX, mkexpr(resHi)); + putIReg(4, R_EAX, mkexpr(resLo)); + break; + } + case Ity_I16: { + IRTemp res32 = newTemp(Ity_I32); + IRTemp resHi = newTemp(Ity_I16); + IRTemp resLo = newTemp(Ity_I16); + IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; + UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; + setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); + assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); + assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); + assign( resLo, unop(Iop_32to16,mkexpr(res32))); + putIReg(2, R_EDX, mkexpr(resHi)); + putIReg(2, R_EAX, mkexpr(resLo)); + break; + } + case Ity_I8: { + IRTemp res16 = newTemp(Ity_I16); + IRTemp resHi = newTemp(Ity_I8); + IRTemp resLo = newTemp(Ity_I8); + IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; + UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; + setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); + assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); + assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); + assign( resLo, unop(Iop_16to8,mkexpr(res16))); + putIReg(2, R_EAX, mkexpr(res16)); + break; + } + default: + vpanic("codegen_mulL_A_D(x86)"); + } + DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); + } + + + /* Group 3 extended opcodes. */ + static + UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK ) + { + UInt d32; + UChar modrm; + HChar dis_buf[50]; + Int len; + IRTemp addr; + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + IRTemp dst1, src, dst0; + + *decode_OK = True; /* may change this later */ + + modrm = getIByte(delta); + + if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) { + /* LOCK prefix only allowed with not and neg subopcodes */ + *decode_OK = False; + return delta; + } + + if (epartIsReg(modrm)) { + switch (gregOfRM(modrm)) { + case 0: { /* TEST */ + delta++; d32 = getUDisp(sz, delta); delta += sz; + dst1 = newTemp(ty); + assign(dst1, binop(mkSizedOp(ty,Iop_And8), + getIReg(sz,eregOfRM(modrm)), + mkU(ty,d32))); + setFlags_DEP1( Iop_And8, dst1, ty ); + DIP("test%c $0x%x, %s\n", nameISize(sz), d32, + nameIReg(sz, eregOfRM(modrm))); + break; + } + case 1: /* UNDEFINED */ + /* The Intel docs imply this insn is undefined and binutils + agrees. Unfortunately Core 2 will run it (with who + knows what result?) sandpile.org reckons it's an alias + for case 0. We play safe. */ + *decode_OK = False; + break; + case 2: /* NOT */ + delta++; + putIReg(sz, eregOfRM(modrm), + unop(mkSizedOp(ty,Iop_Not8), + getIReg(sz, eregOfRM(modrm)))); + DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); + break; + case 3: /* NEG */ + delta++; + dst0 = newTemp(ty); + src = newTemp(ty); + dst1 = newTemp(ty); + assign(dst0, mkU(ty,0)); + assign(src, getIReg(sz,eregOfRM(modrm))); + assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src))); + setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); + putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); + DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); + break; + case 4: /* MUL (unsigned widening) */ + delta++; + src = newTemp(ty); + assign(src, getIReg(sz,eregOfRM(modrm))); + codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) ); + break; + case 5: /* IMUL (signed widening) */ + delta++; + src = newTemp(ty); + assign(src, getIReg(sz,eregOfRM(modrm))); + codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) ); + break; + case 6: /* DIV */ + delta++; + assign( t1, getIReg(sz, eregOfRM(modrm)) ); + codegen_div ( sz, t1, False ); + DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); + break; + case 7: /* IDIV */ + delta++; + assign( t1, getIReg(sz, eregOfRM(modrm)) ); + codegen_div ( sz, t1, True ); + DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); + break; + default: + /* This can't happen - gregOfRM should return 0 .. 7 only */ + vpanic("Grp3(x86)"); + } + } else { + addr = disAMode ( &len, sorb, delta, dis_buf ); + t1 = newTemp(ty); + delta += len; + assign(t1, loadLE(ty,mkexpr(addr))); + switch (gregOfRM(modrm)) { + case 0: { /* TEST */ + d32 = getUDisp(sz, delta); delta += sz; + dst1 = newTemp(ty); + assign(dst1, binop(mkSizedOp(ty,Iop_And8), + mkexpr(t1), mkU(ty,d32))); + setFlags_DEP1( Iop_And8, dst1, ty ); + DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); + break; + } + case 1: /* UNDEFINED */ + /* See comment above on R case */ + *decode_OK = False; + break; + case 2: /* NOT */ + dst1 = newTemp(ty); + assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); + if (locked) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, + guest_EIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(dst1) ); + } + DIP("not%c %s\n", nameISize(sz), dis_buf); + break; + case 3: /* NEG */ + dst0 = newTemp(ty); + src = newTemp(ty); + dst1 = newTemp(ty); + assign(dst0, mkU(ty,0)); + assign(src, mkexpr(t1)); + assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), + mkexpr(dst0), mkexpr(src))); + if (locked) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, + guest_EIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(dst1) ); + } + setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); + DIP("neg%c %s\n", nameISize(sz), dis_buf); + break; + case 4: /* MUL */ + codegen_mulL_A_D ( sz, False, t1, dis_buf ); + break; + case 5: /* IMUL */ + codegen_mulL_A_D ( sz, True, t1, dis_buf ); + break; + case 6: /* DIV */ + codegen_div ( sz, t1, False ); + DIP("div%c %s\n", nameISize(sz), dis_buf); + break; + case 7: /* IDIV */ + codegen_div ( sz, t1, True ); + DIP("idiv%c %s\n", nameISize(sz), dis_buf); + break; + default: + /* This can't happen - gregOfRM should return 0 .. 7 only */ + vpanic("Grp3(x86)"); + } + } + return delta; + } + + + /* Group 4 extended opcodes. */ + static + UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK ) + { + Int alen; + UChar modrm; + HChar dis_buf[50]; + IRType ty = Ity_I8; + IRTemp t1 = newTemp(ty); + IRTemp t2 = newTemp(ty); + + *decode_OK = True; + + modrm = getIByte(delta); + + if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { + /* LOCK prefix only allowed with inc and dec subopcodes */ + *decode_OK = False; + return delta; + } + + if (epartIsReg(modrm)) { + assign(t1, getIReg(1, eregOfRM(modrm))); + switch (gregOfRM(modrm)) { + case 0: /* INC */ + assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); + putIReg(1, eregOfRM(modrm), mkexpr(t2)); + setFlags_INC_DEC( True, t2, ty ); + break; + case 1: /* DEC */ + assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); + putIReg(1, eregOfRM(modrm), mkexpr(t2)); + setFlags_INC_DEC( False, t2, ty ); + break; + default: + *decode_OK = False; + return delta; + } + delta++; + DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), + nameIReg(1, eregOfRM(modrm))); + } else { + IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( t1, loadLE(ty, mkexpr(addr)) ); + switch (gregOfRM(modrm)) { + case 0: /* INC */ + assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); + if (locked) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, + guest_EIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(t2) ); + } + setFlags_INC_DEC( True, t2, ty ); + break; + case 1: /* DEC */ + assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); + if (locked) { + casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, + guest_EIP_curr_instr ); + } else { + storeLE( mkexpr(addr), mkexpr(t2) ); + } + setFlags_INC_DEC( False, t2, ty ); + break; + default: + *decode_OK = False; + return delta; + } + delta += alen; + DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf); + } + return delta; + } + + + /* Group 5 extended opcodes. */ + static + UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, + DisResult* dres, Bool* decode_OK ) + { + Int len; + UChar modrm; + HChar dis_buf[50]; + IRTemp addr = IRTemp_INVALID; + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + IRTemp t2 = IRTemp_INVALID; + + *decode_OK = True; + + modrm = getIByte(delta); + + if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { + /* LOCK prefix only allowed with inc and dec subopcodes */ + *decode_OK = False; + return delta; + } + + if (epartIsReg(modrm)) { + assign(t1, getIReg(sz,eregOfRM(modrm))); + switch (gregOfRM(modrm)) { + case 0: /* INC */ + vassert(sz == 2 || sz == 4); + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(t1), mkU(ty,1))); + setFlags_INC_DEC( True, t2, ty ); + putIReg(sz,eregOfRM(modrm),mkexpr(t2)); + break; + case 1: /* DEC */ + vassert(sz == 2 || sz == 4); + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Sub8), + mkexpr(t1), mkU(ty,1))); + setFlags_INC_DEC( False, t2, ty ); + putIReg(sz,eregOfRM(modrm),mkexpr(t2)); + break; + case 2: /* call Ev */ + vassert(sz == 4); + t2 = newTemp(Ity_I32); + assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); + putIReg(4, R_ESP, mkexpr(t2)); + storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1)); + jmp_treg(Ijk_Call,t1); + dres->whatNext = Dis_StopHere; + break; + case 4: /* jmp Ev */ + vassert(sz == 4); + jmp_treg(Ijk_Boring,t1); + dres->whatNext = Dis_StopHere; + break; + case 6: /* PUSH Ev */ + vassert(sz == 4 || sz == 2); + t2 = newTemp(Ity_I32); + assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); + putIReg(4, R_ESP, mkexpr(t2) ); + storeLE( mkexpr(t2), mkexpr(t1) ); + break; + default: + *decode_OK = False; + return delta; + } + delta++; + DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), + nameISize(sz), nameIReg(sz, eregOfRM(modrm))); + } else { + addr = disAMode ( &len, sorb, delta, dis_buf ); + assign(t1, loadLE(ty,mkexpr(addr))); + switch (gregOfRM(modrm)) { + case 0: /* INC */ + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(t1), mkU(ty,1))); + if (locked) { + casLE( mkexpr(addr), + mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); + } else { + storeLE(mkexpr(addr),mkexpr(t2)); + } + setFlags_INC_DEC( True, t2, ty ); + break; + case 1: /* DEC */ + t2 = newTemp(ty); + assign(t2, binop(mkSizedOp(ty,Iop_Sub8), + mkexpr(t1), mkU(ty,1))); + if (locked) { + casLE( mkexpr(addr), + mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); + } else { + storeLE(mkexpr(addr),mkexpr(t2)); + } + setFlags_INC_DEC( False, t2, ty ); + break; + case 2: /* call Ev */ + vassert(sz == 4); + t2 = newTemp(Ity_I32); + assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); + putIReg(4, R_ESP, mkexpr(t2)); + storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len)); + jmp_treg(Ijk_Call,t1); + dres->whatNext = Dis_StopHere; + break; + case 4: /* JMP Ev */ + vassert(sz == 4); + jmp_treg(Ijk_Boring,t1); + dres->whatNext = Dis_StopHere; + break; + case 6: /* PUSH Ev */ + vassert(sz == 4 || sz == 2); + t2 = newTemp(Ity_I32); + assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); + putIReg(4, R_ESP, mkexpr(t2) ); + storeLE( mkexpr(t2), mkexpr(t1) ); + break; + default: + *decode_OK = False; + return delta; + } + delta += len; + DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), + nameISize(sz), dis_buf); + } + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- Disassembling string ops (including REP prefixes) ---*/ + /*------------------------------------------------------------*/ + + /* Code shared by all the string ops */ + static + void dis_string_op_increment(Int sz, Int t_inc) + { + if (sz == 4 || sz == 2) { + assign( t_inc, + binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ), + mkU8(sz/2) ) ); + } else { + assign( t_inc, + IRExpr_Get( OFFB_DFLAG, Ity_I32 ) ); + } + } + + static + void dis_string_op( void (*dis_OP)( Int, IRTemp ), + Int sz, HChar* name, UChar sorb ) + { + IRTemp t_inc = newTemp(Ity_I32); + vassert(sorb == 0); /* hmm. so what was the point of passing it in? */ + dis_string_op_increment(sz, t_inc); + dis_OP( sz, t_inc ); + DIP("%s%c\n", name, nameISize(sz)); + } + + static + void dis_MOVS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp td = newTemp(Ity_I32); /* EDI */ + IRTemp ts = newTemp(Ity_I32); /* ESI */ + + assign( td, getIReg(4, R_EDI) ); + assign( ts, getIReg(4, R_ESI) ); + + storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); + + putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); + putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); + } + + static + void dis_LODS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp ts = newTemp(Ity_I32); /* ESI */ + + assign( ts, getIReg(4, R_ESI) ); + + putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) ); + + putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); + } + + static + void dis_STOS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp ta = newTemp(ty); /* EAX */ + IRTemp td = newTemp(Ity_I32); /* EDI */ + + assign( ta, getIReg(sz, R_EAX) ); + assign( td, getIReg(4, R_EDI) ); + + storeLE( mkexpr(td), mkexpr(ta) ); + + putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); + } + + static + void dis_CMPS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp tdv = newTemp(ty); /* (EDI) */ + IRTemp tsv = newTemp(ty); /* (ESI) */ + IRTemp td = newTemp(Ity_I32); /* EDI */ + IRTemp ts = newTemp(Ity_I32); /* ESI */ + + assign( td, getIReg(4, R_EDI) ); + assign( ts, getIReg(4, R_ESI) ); + + assign( tdv, loadLE(ty,mkexpr(td)) ); + assign( tsv, loadLE(ty,mkexpr(ts)) ); + + setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); + + putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); + putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); + } + + static + void dis_SCAS ( Int sz, IRTemp t_inc ) + { + IRType ty = szToITy(sz); + IRTemp ta = newTemp(ty); /* EAX */ + IRTemp td = newTemp(Ity_I32); /* EDI */ + IRTemp tdv = newTemp(ty); /* (EDI) */ + + assign( ta, getIReg(sz, R_EAX) ); + assign( td, getIReg(4, R_EDI) ); + + assign( tdv, loadLE(ty,mkexpr(td)) ); + setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); + + putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); + } + + + /* Wrap the appropriate string op inside a REP/REPE/REPNE. + We assume the insn is the last one in the basic block, and so emit a jump + to the next insn, rather than just falling through. */ + static + void dis_REP_op ( X86Condcode cond, + void (*dis_OP)(Int, IRTemp), + Int sz, Addr32 eip, Addr32 eip_next, HChar* name ) + { + IRTemp t_inc = newTemp(Ity_I32); + IRTemp tc = newTemp(Ity_I32); /* ECX */ + + assign( tc, getIReg(4,R_ECX) ); + + stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)), + Ijk_Boring, + IRConst_U32(eip_next) ) ); + + putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); + + dis_string_op_increment(sz, t_inc); + dis_OP (sz, t_inc); + + if (cond == X86CondAlways) { + jmp_lit(Ijk_Boring,eip); + } else { + stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond), + Ijk_Boring, + IRConst_U32(eip) ) ); + jmp_lit(Ijk_Boring,eip_next); + } + DIP("%s%c\n", name, nameISize(sz)); + } + + + /*------------------------------------------------------------*/ + /*--- Arithmetic, etc. ---*/ + /*------------------------------------------------------------*/ + + /* IMUL E, G. Supplied eip points to the modR/M byte. */ + static + UInt dis_mul_E_G ( UChar sorb, + Int size, + Int delta0 ) + { + Int alen; + HChar dis_buf[50]; + UChar rm = getIByte(delta0); + IRType ty = szToITy(size); + IRTemp te = newTemp(ty); + IRTemp tg = newTemp(ty); + IRTemp resLo = newTemp(ty); + + assign( tg, getIReg(size, gregOfRM(rm)) ); + if (epartIsReg(rm)) { + assign( te, getIReg(size, eregOfRM(rm)) ); + } else { + IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf ); + assign( te, loadLE(ty,mkexpr(addr)) ); + } + + setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB ); + + assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); + + putIReg(size, gregOfRM(rm), mkexpr(resLo) ); + + if (epartIsReg(rm)) { + DIP("imul%c %s, %s\n", nameISize(size), + nameIReg(size,eregOfRM(rm)), + nameIReg(size,gregOfRM(rm))); + return 1+delta0; + } else { + DIP("imul%c %s, %s\n", nameISize(size), + dis_buf, nameIReg(size,gregOfRM(rm))); + return alen+delta0; + } + } + + + /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */ + static + UInt dis_imul_I_E_G ( UChar sorb, + Int size, + Int delta, + Int litsize ) + { + Int d32, alen; + HChar dis_buf[50]; + UChar rm = getIByte(delta); + IRType ty = szToITy(size); + IRTemp te = newTemp(ty); + IRTemp tl = newTemp(ty); + IRTemp resLo = newTemp(ty); + + vassert(size == 1 || size == 2 || size == 4); + + if (epartIsReg(rm)) { + assign(te, getIReg(size, eregOfRM(rm))); + delta++; + } else { + IRTemp addr = disAMode( &alen, sorb, delta, dis_buf ); + assign(te, loadLE(ty, mkexpr(addr))); + delta += alen; + } + d32 = getSDisp(litsize,delta); + delta += litsize; + + if (size == 1) d32 &= 0xFF; + if (size == 2) d32 &= 0xFFFF; + + assign(tl, mkU(ty,d32)); + + assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); + + setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB ); + + putIReg(size, gregOfRM(rm), mkexpr(resLo)); + + DIP("imul %d, %s, %s\n", d32, + ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ), + nameIReg(size,gregOfRM(rm)) ); + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- ---*/ + /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ + /*--- ---*/ + /*------------------------------------------------------------*/ + + /* --- Helper functions for dealing with the register stack. --- */ + + /* --- Set the emulation-warning pseudo-register. --- */ + + static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) + { + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); + stmt( IRStmt_Put( OFFB_EMWARN, e ) ); + } + + /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ + + static IRExpr* mkQNaN64 ( void ) + { + /* QNaN is 0 2047 1 0(51times) + == 0b 11111111111b 1 0(51times) + == 0x7FF8 0000 0000 0000 + */ + return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); + } + + /* --------- Get/put the top-of-stack pointer. --------- */ + + static IRExpr* get_ftop ( void ) + { + return IRExpr_Get( OFFB_FTOP, Ity_I32 ); + } + + static void put_ftop ( IRExpr* e ) + { + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); + stmt( IRStmt_Put( OFFB_FTOP, e ) ); + } + + /* --------- Get/put the C3210 bits. --------- */ + + static IRExpr* get_C3210 ( void ) + { + return IRExpr_Get( OFFB_FC3210, Ity_I32 ); + } + + static void put_C3210 ( IRExpr* e ) + { + stmt( IRStmt_Put( OFFB_FC3210, e ) ); + } + + /* --------- Get/put the FPU rounding mode. --------- */ + static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) + { + return IRExpr_Get( OFFB_FPROUND, Ity_I32 ); + } + + static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) + { + stmt( IRStmt_Put( OFFB_FPROUND, e ) ); + } + + + /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ + /* Produces a value in 0 .. 3, which is encoded as per the type + IRRoundingMode. Since the guest_FPROUND value is also encoded as + per IRRoundingMode, we merely need to get it and mask it for + safety. + */ + static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) + { + return binop( Iop_And32, get_fpround(), mkU32(3) ); + } + + static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) + { + return mkU32(Irrm_NEAREST); + } + + + /* --------- Get/set FP register tag bytes. --------- */ + + /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ + + static void put_ST_TAG ( Int i, IRExpr* value ) + { + IRRegArray* descr; + vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); + descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); + } + + /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be + zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ + + static IRExpr* get_ST_TAG ( Int i ) + { + IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + return IRExpr_GetI( descr, get_ftop(), i ); + } + + + /* --------- Get/set FP registers. --------- */ + + /* Given i, and some expression e, emit 'ST(i) = e' and set the + register's tag to indicate the register is full. The previous + state of the register is not checked. */ + + static void put_ST_UNCHECKED ( Int i, IRExpr* value ) + { + IRRegArray* descr; + vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); + descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); + stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); + /* Mark the register as in-use. */ + put_ST_TAG(i, mkU8(1)); + } + + /* Given i, and some expression e, emit + ST(i) = is_full(i) ? NaN : e + and set the tag accordingly. + */ + + static void put_ST ( Int i, IRExpr* value ) + { + put_ST_UNCHECKED( i, + IRExpr_Mux0X( get_ST_TAG(i), + /* 0 means empty */ + value, + /* non-0 means full */ + mkQNaN64() + ) + ); + } + + + /* Given i, generate an expression yielding 'ST(i)'. */ + + static IRExpr* get_ST_UNCHECKED ( Int i ) + { + IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); + return IRExpr_GetI( descr, get_ftop(), i ); + } + + + /* Given i, generate an expression yielding + is_full(i) ? ST(i) : NaN + */ + + static IRExpr* get_ST ( Int i ) + { + return + IRExpr_Mux0X( get_ST_TAG(i), + /* 0 means empty */ + mkQNaN64(), + /* non-0 means full */ + get_ST_UNCHECKED(i)); + } + + + /* Adjust FTOP downwards by one register. */ + + static void fp_push ( void ) + { + put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); + } + + /* Adjust FTOP upwards by one register, and mark the vacated register + as empty. */ + + static void fp_pop ( void ) + { + put_ST_TAG(0, mkU8(0)); + put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); + } + + /* Clear the C2 bit of the FPU status register, for + sin/cos/tan/sincos. */ + + static void clear_C2 ( void ) + { + put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) ); + } + + /* Invent a plausible-looking FPU status word value: + ((ftop & 7) << 11) | (c3210 & 0x4700) + */ + static IRExpr* get_FPU_sw ( void ) + { + return + unop(Iop_32to16, + binop(Iop_Or32, + binop(Iop_Shl32, + binop(Iop_And32, get_ftop(), mkU32(7)), + mkU8(11)), + binop(Iop_And32, get_C3210(), mkU32(0x4700)) + )); + } + + + /* ------------------------------------------------------- */ + /* Given all that stack-mangling junk, we can now go ahead + and describe FP instructions. + */ + + /* ST(0) = ST(0) `op` mem64/32(addr) + Need to check ST(0)'s tag on read, but not on write. + */ + static + void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, + IROp op, Bool dbl ) + { + DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); + if (dbl) { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + loadLE(Ity_F64,mkexpr(addr)) + )); + } else { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) + )); + } + } + + + /* ST(0) = mem64/32(addr) `op` ST(0) + Need to check ST(0)'s tag on read, but not on write. + */ + static + void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, + IROp op, Bool dbl ) + { + DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); + if (dbl) { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + loadLE(Ity_F64,mkexpr(addr)), + get_ST(0) + )); + } else { + put_ST_UNCHECKED(0, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), + get_ST(0) + )); + } + } + + + /* ST(dst) = ST(dst) `op` ST(src). + Check dst and src tags when reading but not on write. + */ + static + void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, + Bool pop_after ) + { + DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", + (Int)st_src, (Int)st_dst ); + put_ST_UNCHECKED( + st_dst, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(st_dst), + get_ST(st_src) ) + ); + if (pop_after) + fp_pop(); + } + + /* ST(dst) = ST(src) `op` ST(dst). + Check dst and src tags when reading but not on write. + */ + static + void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, + Bool pop_after ) + { + DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", + (Int)st_src, (Int)st_dst ); + put_ST_UNCHECKED( + st_dst, + triop( op, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(st_src), + get_ST(st_dst) ) + ); + if (pop_after) + fp_pop(); + } + + /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */ + static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) + { + DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i ); + /* This is a bit of a hack (and isn't really right). It sets + Z,P,C,O correctly, but forces A and S to zero, whereas the Intel + documentation implies A and S are unchanged. + */ + /* It's also fishy in that it is used both for COMIP and + UCOMIP, and they aren't the same (although similar). */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, + binop( Iop_And32, + binop(Iop_CmpF64, get_ST(0), get_ST(i)), + mkU32(0x45) + ))); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + if (pop_after) + fp_pop(); + } + + + static + UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) + { + Int len; + UInt r_src, r_dst; + HChar dis_buf[50]; + IRTemp t1, t2; + + /* On entry, delta points at the second byte of the insn (the modrm + byte).*/ + UChar first_opcode = getIByte(delta-1); + UChar modrm = getIByte(delta+0); + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ + + if (first_opcode == 0xD8) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FADD single-real */ + fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); + break; + + case 1: /* FMUL single-real */ + fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); + break; + + case 2: /* FCOM single-real */ + DIP("fcoms %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + unop(Iop_F32toF64, + loadLE(Ity_F32,mkexpr(addr)))), + mkU8(8)), + mkU32(0x4500) + )); + break; + + case 3: /* FCOMP single-real */ + DIP("fcomps %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + unop(Iop_F32toF64, + loadLE(Ity_F32,mkexpr(addr)))), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + break; + + case 4: /* FSUB single-real */ + fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); + break; + + case 5: /* FSUBR single-real */ + fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); + break; + + case 6: /* FDIV single-real */ + fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); + break; + + case 7: /* FDIVR single-real */ + fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xD8\n"); + goto decode_fail; + } + } else { + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ + fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); + break; + + case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ + fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); + break; + + /* Dunno if this is right */ + case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ + r_dst = (UInt)modrm - 0xD0; + DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + )); + break; + + /* Dunno if this is right */ + case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ + r_dst = (UInt)modrm - 0xD8; + DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + break; + + case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ + fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); + break; + + case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ + fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); + break; + + case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ + fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); + break; + + case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ + fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xD9) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FLD single-real */ + DIP("flds %s\n", dis_buf); + fp_push(); + put_ST(0, unop(Iop_F32toF64, + loadLE(Ity_F32, mkexpr(addr)))); + break; + + case 2: /* FST single-real */ + DIP("fsts %s\n", dis_buf); + storeLE(mkexpr(addr), + binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); + break; + + case 3: /* FSTP single-real */ + DIP("fstps %s\n", dis_buf); + storeLE(mkexpr(addr), + binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); + fp_pop(); + break; + + case 4: { /* FLDENV m28 */ + /* Uses dirty helper: + VexEmWarn x86g_do_FLDENV ( VexGuestX86State*, HWord ) */ + IRTemp ew = newTemp(Ity_I32); + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_FLDENV", + &x86g_dirtyhelper_FLDENV, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + d->tmp = ew; + /* declare we're reading memory */ + d->mFx = Ifx_Read; + d->mAddr = mkexpr(addr); + d->mSize = 28; + + /* declare we're writing guest state */ + d->nFxState = 4; + + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_FPTAGS; + d->fxState[1].size = 8 * sizeof(UChar); + + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_FPROUND; + d->fxState[2].size = sizeof(UInt); + + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_FC3210; + d->fxState[3].size = sizeof(UInt); + + stmt( IRStmt_Dirty(d) ); + + /* ew contains any emulation warning we may need to + issue. If needed, side-exit to the next insn, + reporting the warning, so that Valgrind's dispatcher + sees the warning. */ + put_emwarn( mkexpr(ew) ); + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), + Ijk_EmWarn, + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + ) + ); + + DIP("fldenv %s\n", dis_buf); + break; + } + + case 5: {/* FLDCW */ + /* The only thing we observe in the control word is the + rounding mode. Therefore, pass the 16-bit value + (x87 native-format control word) to a clean helper, + getting back a 64-bit value, the lower half of which + is the FPROUND value to store, and the upper half of + which is the emulation-warning token which may be + generated. + */ + /* ULong x86h_check_fldcw ( UInt ); */ + IRTemp t64 = newTemp(Ity_I64); + IRTemp ew = newTemp(Ity_I32); + DIP("fldcw %s\n", dis_buf); + assign( t64, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86g_check_fldcw", + &x86g_check_fldcw, + mkIRExprVec_1( + unop( Iop_16Uto32, + loadLE(Ity_I16, mkexpr(addr))) + ) + ) + ); + + put_fpround( unop(Iop_64to32, mkexpr(t64)) ); + assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); + put_emwarn( mkexpr(ew) ); + /* Finally, if an emulation warning was reported, + side-exit to the next insn, reporting the warning, + so that Valgrind's dispatcher sees the warning. */ + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), + Ijk_EmWarn, + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + ) + ); + break; + } + + case 6: { /* FNSTENV m28 */ + /* Uses dirty helper: + void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */ + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_FSTENV", + &x86g_dirtyhelper_FSTENV, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 28; + + /* declare we're reading guest state */ + d->nFxState = 4; + + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = OFFB_FPTAGS; + d->fxState[1].size = 8 * sizeof(UChar); + + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = OFFB_FPROUND; + d->fxState[2].size = sizeof(UInt); + + d->fxState[3].fx = Ifx_Read; + d->fxState[3].offset = OFFB_FC3210; + d->fxState[3].size = sizeof(UInt); + + stmt( IRStmt_Dirty(d) ); + + DIP("fnstenv %s\n", dis_buf); + break; + } + + case 7: /* FNSTCW */ + /* Fake up a native x87 FPU control word. The only + thing it depends on is FPROUND[1:0], so call a clean + helper to cook it up. */ + /* UInt x86h_create_fpucw ( UInt fpround ) */ + DIP("fnstcw %s\n", dis_buf); + storeLE( + mkexpr(addr), + unop( Iop_32to16, + mkIRExprCCall( + Ity_I32, 0/*regp*/, + "x86g_create_fpucw", &x86g_create_fpucw, + mkIRExprVec_1( get_fpround() ) + ) + ) + ); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xD9\n"); + goto decode_fail; + } + + } else { + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FLD %st(?) */ + r_src = (UInt)modrm - 0xC0; + DIP("fld %%st(%d)\n", (Int)r_src); + t1 = newTemp(Ity_F64); + assign(t1, get_ST(r_src)); + fp_push(); + put_ST(0, mkexpr(t1)); + break; + + case 0xC8 ... 0xCF: /* FXCH %st(?) */ + r_src = (UInt)modrm - 0xC8; + DIP("fxch %%st(%d)\n", (Int)r_src); + t1 = newTemp(Ity_F64); + t2 = newTemp(Ity_F64); + assign(t1, get_ST(0)); + assign(t2, get_ST(r_src)); + put_ST_UNCHECKED(0, mkexpr(t2)); + put_ST_UNCHECKED(r_src, mkexpr(t1)); + break; + + case 0xE0: /* FCHS */ + DIP("fchs\n"); + put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); + break; + + case 0xE1: /* FABS */ + DIP("fabs\n"); + put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); + break; + + case 0xE4: /* FTST */ + DIP("ftst\n"); + /* This forces C1 to zero, which isn't right. */ + /* Well, in fact the Intel docs say (bizarrely): "C1 is + set to 0 if stack underflow occurred; otherwise, set + to 0" which is pretty nonsensical. I guess it's a + typo. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + IRExpr_Const(IRConst_F64i(0x0ULL))), + mkU8(8)), + mkU32(0x4500) + )); + break; + + case 0xE5: { /* FXAM */ + /* This is an interesting one. It examines %st(0), + regardless of whether the tag says it's empty or not. + Here, just pass both the tag (in our format) and the + value (as a double, actually a ULong) to a helper + function. */ + IRExpr** args + = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)), + unop(Iop_ReinterpF64asI64, + get_ST_UNCHECKED(0)) ); + put_C3210(mkIRExprCCall( + Ity_I32, + 0/*regparm*/, + "x86g_calculate_FXAM", &x86g_calculate_FXAM, + args + )); + DIP("fxam\n"); + break; + } + + case 0xE8: /* FLD1 */ + DIP("fld1\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); + break; + + case 0xE9: /* FLDL2T */ + DIP("fldl2t\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); + break; + + case 0xEA: /* FLDL2E */ + DIP("fldl2e\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); + break; + + case 0xEB: /* FLDPI */ + DIP("fldpi\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); + break; + + case 0xEC: /* FLDLG2 */ + DIP("fldlg2\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); + break; + + case 0xED: /* FLDLN2 */ + DIP("fldln2\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); + break; + + case 0xEE: /* FLDZ */ + DIP("fldz\n"); + fp_push(); + /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ + put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); + break; + + case 0xF0: /* F2XM1 */ + DIP("f2xm1\n"); + put_ST_UNCHECKED(0, + binop(Iop_2xm1F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + break; + + case 0xF1: /* FYL2X */ + DIP("fyl2x\n"); + put_ST_UNCHECKED(1, + triop(Iop_Yl2xF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(1), + get_ST(0))); + fp_pop(); + break; + + case 0xF2: /* FPTAN */ + DIP("ftan\n"); + put_ST_UNCHECKED(0, + binop(Iop_TanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + fp_push(); + put_ST(0, IRExpr_Const(IRConst_F64(1.0))); + clear_C2(); /* HACK */ + break; + + case 0xF3: /* FPATAN */ + DIP("fpatan\n"); + put_ST_UNCHECKED(1, + triop(Iop_AtanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(1), + get_ST(0))); + fp_pop(); + break; + + case 0xF4: { /* FXTRACT */ + IRTemp argF = newTemp(Ity_F64); + IRTemp sigF = newTemp(Ity_F64); + IRTemp expF = newTemp(Ity_F64); + IRTemp argI = newTemp(Ity_I64); + IRTemp sigI = newTemp(Ity_I64); + IRTemp expI = newTemp(Ity_I64); + DIP("fxtract\n"); + assign( argF, get_ST(0) ); + assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); + assign( sigI, + mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86amd64g_calculate_FXTRACT", + &x86amd64g_calculate_FXTRACT, + mkIRExprVec_2( mkexpr(argI), + mkIRExpr_HWord(0)/*sig*/ )) + ); + assign( expI, + mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86amd64g_calculate_FXTRACT", + &x86amd64g_calculate_FXTRACT, + mkIRExprVec_2( mkexpr(argI), + mkIRExpr_HWord(1)/*exp*/ )) + ); + assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); + assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); + /* exponent */ + put_ST_UNCHECKED(0, mkexpr(expF) ); + fp_push(); + /* significand */ + put_ST(0, mkexpr(sigF) ); + break; + } + + case 0xF5: { /* FPREM1 -- IEEE compliant */ + IRTemp a1 = newTemp(Ity_F64); + IRTemp a2 = newTemp(Ity_F64); + DIP("fprem1\n"); + /* Do FPREM1 twice, once to get the remainder, and once + to get the C3210 flag values. */ + assign( a1, get_ST(0) ); + assign( a2, get_ST(1) ); + put_ST_UNCHECKED(0, + triop(Iop_PRem1F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2))); + put_C3210( + triop(Iop_PRem1C3210F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2)) ); + break; + } + + case 0xF7: /* FINCSTP */ + DIP("fprem\n"); + put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); + break; + + case 0xF8: { /* FPREM -- not IEEE compliant */ + IRTemp a1 = newTemp(Ity_F64); + IRTemp a2 = newTemp(Ity_F64); + DIP("fprem\n"); + /* Do FPREM twice, once to get the remainder, and once + to get the C3210 flag values. */ + assign( a1, get_ST(0) ); + assign( a2, get_ST(1) ); + put_ST_UNCHECKED(0, + triop(Iop_PRemF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2))); + put_C3210( + triop(Iop_PRemC3210F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1), + mkexpr(a2)) ); + break; + } + + case 0xF9: /* FYL2XP1 */ + DIP("fyl2xp1\n"); + put_ST_UNCHECKED(1, + triop(Iop_Yl2xp1F64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(1), + get_ST(0))); + fp_pop(); + break; + + case 0xFA: /* FSQRT */ + DIP("fsqrt\n"); + put_ST_UNCHECKED(0, + binop(Iop_SqrtF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + break; + + case 0xFB: { /* FSINCOS */ + IRTemp a1 = newTemp(Ity_F64); + assign( a1, get_ST(0) ); + DIP("fsincos\n"); + put_ST_UNCHECKED(0, + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1))); + fp_push(); + put_ST(0, + binop(Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(a1))); + clear_C2(); /* HACK */ + break; + } + + case 0xFC: /* FRNDINT */ + DIP("frndint\n"); + put_ST_UNCHECKED(0, + binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); + break; + + case 0xFD: /* FSCALE */ + DIP("fscale\n"); + put_ST_UNCHECKED(0, + triop(Iop_ScaleF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + get_ST(1))); + break; + + case 0xFE: /* FSIN */ + DIP("fsin\n"); + put_ST_UNCHECKED(0, + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + clear_C2(); /* HACK */ + break; + + case 0xFF: /* FCOS */ + DIP("fcos\n"); + put_ST_UNCHECKED(0, + binop(Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0))); + clear_C2(); /* HACK */ + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDA) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IROp fop; + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + switch (gregOfRM(modrm)) { + + case 0: /* FIADD m32int */ /* ST(0) += m32int */ + DIP("fiaddl %s\n", dis_buf); + fop = Iop_AddF64; + goto do_fop_m32; + + case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ + DIP("fimull %s\n", dis_buf); + fop = Iop_MulF64; + goto do_fop_m32; + + case 2: /* FICOM m32int */ + DIP("ficoml %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + unop(Iop_I32toF64, + loadLE(Ity_I32,mkexpr(addr)))), + mkU8(8)), + mkU32(0x4500) + )); + break; + + case 3: /* FICOMP m32int */ + DIP("ficompl %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + unop(Iop_I32toF64, + loadLE(Ity_I32,mkexpr(addr)))), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + break; + + case 4: /* FISUB m32int */ /* ST(0) -= m32int */ + DIP("fisubl %s\n", dis_buf); + fop = Iop_SubF64; + goto do_fop_m32; + + case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ + DIP("fisubrl %s\n", dis_buf); + fop = Iop_SubF64; + goto do_foprev_m32; + + case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ + DIP("fidivl %s\n", dis_buf); + fop = Iop_DivF64; + goto do_fop_m32; + + case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ + DIP("fidivrl %s\n", dis_buf); + fop = Iop_DivF64; + goto do_foprev_m32; + + do_fop_m32: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + unop(Iop_I32toF64, + loadLE(Ity_I32, mkexpr(addr))))); + break; + + do_foprev_m32: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + unop(Iop_I32toF64, + loadLE(Ity_I32, mkexpr(addr))), + get_ST(0))); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xDA\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC0; + DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondB)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC8; + DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondZ)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD0; + DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondBE)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD8; + DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondP)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xE9: /* FUCOMPP %st(0),%st(1) */ + DIP("fucompp %%st(0),%%st(1)\n"); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(1)), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + fp_pop(); + break; + + default: + goto decode_fail; + } + + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDB) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FILD m32int */ + DIP("fildl %s\n", dis_buf); + fp_push(); + put_ST(0, unop(Iop_I32toF64, + loadLE(Ity_I32, mkexpr(addr)))); + break; + + case 1: /* FISTTPL m32 (SSE3) */ + DIP("fisttpl %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_ST(0)) ); + fp_pop(); + break; + + case 2: /* FIST m32 */ + DIP("fistl %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI32, get_roundingmode(), get_ST(0)) ); + break; + + case 3: /* FISTP m32 */ + DIP("fistpl %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI32, get_roundingmode(), get_ST(0)) ); + fp_pop(); + break; + + case 5: { /* FLD extended-real */ + /* Uses dirty helper: + ULong x86g_loadF80le ( UInt ) + addr holds the address. First, do a dirty call to + get hold of the data. */ + IRTemp val = newTemp(Ity_I64); + IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); + + IRDirty* d = unsafeIRDirty_1_N ( + val, + 0/*regparms*/, + "x86g_dirtyhelper_loadF80le", + &x86g_dirtyhelper_loadF80le, + args + ); + /* declare that we're reading memory */ + d->mFx = Ifx_Read; + d->mAddr = mkexpr(addr); + d->mSize = 10; + + /* execute the dirty call, dumping the result in val. */ + stmt( IRStmt_Dirty(d) ); + fp_push(); + put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); + + DIP("fldt %s\n", dis_buf); + break; + } + + case 7: { /* FSTP extended-real */ + /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */ + IRExpr** args + = mkIRExprVec_2( mkexpr(addr), + unop(Iop_ReinterpF64asI64, get_ST(0)) ); + + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_storeF80le", + &x86g_dirtyhelper_storeF80le, + args + ); + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 10; + + /* execute the dirty call. */ + stmt( IRStmt_Dirty(d) ); + fp_pop(); + + DIP("fstpt\n %s", dis_buf); + break; + } + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xDB\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC0; + DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondNB)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ + r_src = (UInt)modrm - 0xC8; + DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondNZ)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD0; + DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondNBE)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD8; + DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_x86g_calculate_condition(X86CondNP)), + get_ST(0), get_ST(r_src)) ); + break; + + case 0xE2: + DIP("fnclex\n"); + break; + + case 0xE3: { + /* Uses dirty helper: + void x86g_do_FINIT ( VexGuestX86State* ) */ + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_FINIT", + &x86g_dirtyhelper_FINIT, + mkIRExprVec_0() + ); + d->needsBBP = True; + + /* declare we're writing guest state */ + d->nFxState = 5; + + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(UInt); + + d->fxState[4].fx = Ifx_Write; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(UInt); + + stmt( IRStmt_Dirty(d) ); + + DIP("fninit\n"); + break; + } + + case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); + break; + + case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDC) { + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FADD double-real */ + fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); + break; + + case 1: /* FMUL double-real */ + fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); + break; + + case 2: /* FCOM double-real */ + DIP("fcoml %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + loadLE(Ity_F64,mkexpr(addr))), + mkU8(8)), + mkU32(0x4500) + )); + break; + + case 3: /* FCOMP double-real */ + DIP("fcompl %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + loadLE(Ity_F64,mkexpr(addr))), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + break; + + case 4: /* FSUB double-real */ + fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); + break; + + case 5: /* FSUBR double-real */ + fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); + break; + + case 6: /* FDIV double-real */ + fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); + break; + + case 7: /* FDIVR double-real */ + fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xDC\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ + fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); + break; + + case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ + fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); + break; + + case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); + break; + + case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ + fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); + break; + + case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); + break; + + case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ + fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); + break; + + default: + goto decode_fail; + } + + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDD) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FLD double-real */ + DIP("fldl %s\n", dis_buf); + fp_push(); + put_ST(0, loadLE(Ity_F64, mkexpr(addr))); + break; + + case 1: /* FISTTPQ m64 (SSE3) */ + DIP("fistppll %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI64, mkU32(Irrm_ZERO), get_ST(0)) ); + fp_pop(); + break; + + case 2: /* FST double-real */ + DIP("fstl %s\n", dis_buf); + storeLE(mkexpr(addr), get_ST(0)); + break; + + case 3: /* FSTP double-real */ + DIP("fstpl %s\n", dis_buf); + storeLE(mkexpr(addr), get_ST(0)); + fp_pop(); + break; + + case 4: { /* FRSTOR m108 */ + /* Uses dirty helper: + VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ + IRTemp ew = newTemp(Ity_I32); + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_FRSTOR", + &x86g_dirtyhelper_FRSTOR, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + d->tmp = ew; + /* declare we're reading memory */ + d->mFx = Ifx_Read; + d->mAddr = mkexpr(addr); + d->mSize = 108; + + /* declare we're writing guest state */ + d->nFxState = 5; + + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(UInt); + + d->fxState[4].fx = Ifx_Write; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(UInt); + + stmt( IRStmt_Dirty(d) ); + + /* ew contains any emulation warning we may need to + issue. If needed, side-exit to the next insn, + reporting the warning, so that Valgrind's dispatcher + sees the warning. */ + put_emwarn( mkexpr(ew) ); + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), + Ijk_EmWarn, + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + ) + ); + + DIP("frstor %s\n", dis_buf); + break; + } + + case 6: { /* FNSAVE m108 */ + /* Uses dirty helper: + void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ + IRDirty* d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_FSAVE", + &x86g_dirtyhelper_FSAVE, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 108; + + /* declare we're reading guest state */ + d->nFxState = 5; + + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Read; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(UInt); + + d->fxState[4].fx = Ifx_Read; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(UInt); + + stmt( IRStmt_Dirty(d) ); + + DIP("fnsave %s\n", dis_buf); + break; + } + + case 7: { /* FNSTSW m16 */ + IRExpr* sw = get_FPU_sw(); + vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); + storeLE( mkexpr(addr), sw ); + DIP("fnstsw %s\n", dis_buf); + break; + } + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xDD\n"); + goto decode_fail; + } + } else { + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FFREE %st(?) */ + r_dst = (UInt)modrm - 0xC0; + DIP("ffree %%st(%d)\n", (Int)r_dst); + put_ST_TAG ( r_dst, mkU8(0) ); + break; + + case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xD0; + DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst); + /* P4 manual says: "If the destination operand is a + non-empty register, the invalid-operation exception + is not generated. Hence put_ST_UNCHECKED. */ + put_ST_UNCHECKED(r_dst, get_ST(0)); + break; + + case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xD8; + DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst); + /* P4 manual says: "If the destination operand is a + non-empty register, the invalid-operation exception + is not generated. Hence put_ST_UNCHECKED. */ + put_ST_UNCHECKED(r_dst, get_ST(0)); + fp_pop(); + break; + + case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xE0; + DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + )); + break; + + case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ + r_dst = (UInt)modrm - 0xE8; + DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + break; + + default: + goto decode_fail; + } + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDE) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IROp fop; + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FIADD m16int */ /* ST(0) += m16int */ + DIP("fiaddw %s\n", dis_buf); + fop = Iop_AddF64; + goto do_fop_m16; + + case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ + DIP("fimulw %s\n", dis_buf); + fop = Iop_MulF64; + goto do_fop_m16; + + case 2: /* FICOM m16int */ + DIP("ficomw %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16,mkexpr(addr))))), + mkU8(8)), + mkU32(0x4500) + )); + break; + + case 3: /* FICOMP m16int */ + DIP("ficompw %s\n", dis_buf); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, + get_ST(0), + unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16,mkexpr(addr))))), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + break; + + case 4: /* FISUB m16int */ /* ST(0) -= m16int */ + DIP("fisubw %s\n", dis_buf); + fop = Iop_SubF64; + goto do_fop_m16; + + case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ + DIP("fisubrw %s\n", dis_buf); + fop = Iop_SubF64; + goto do_foprev_m16; + + case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ + DIP("fisubw %s\n", dis_buf); + fop = Iop_DivF64; + goto do_fop_m16; + + case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ + DIP("fidivrw %s\n", dis_buf); + fop = Iop_DivF64; + goto do_foprev_m16; + + do_fop_m16: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + get_ST(0), + unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(addr)))))); + break; + + do_foprev_m16: + put_ST_UNCHECKED(0, + triop(fop, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(addr)))), + get_ST(0))); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xDE\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ + fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); + break; + + case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ + fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); + break; + + case 0xD9: /* FCOMPP %st(0),%st(1) */ + DIP("fuompp %%st(0),%%st(1)\n"); + /* This forces C1 to zero, which isn't right. */ + put_C3210( + binop( Iop_And32, + binop(Iop_Shl32, + binop(Iop_CmpF64, get_ST(0), get_ST(1)), + mkU8(8)), + mkU32(0x4500) + )); + fp_pop(); + fp_pop(); + break; + + case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); + break; + + case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ + fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); + break; + + case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ + fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); + break; + + case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ + fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); + break; + + default: + goto decode_fail; + } + + } + } + + /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ + else + if (first_opcode == 0xDF) { + + if (modrm < 0xC0) { + + /* bits 5,4,3 are an opcode extension, and the modRM also + specifies an address. */ + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + + switch (gregOfRM(modrm)) { + + case 0: /* FILD m16int */ + DIP("fildw %s\n", dis_buf); + fp_push(); + put_ST(0, unop(Iop_I32toF64, + unop(Iop_16Sto32, + loadLE(Ity_I16, mkexpr(addr))))); + break; + + case 1: /* FISTTPS m16 (SSE3) */ + DIP("fisttps %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI16, mkU32(Irrm_ZERO), get_ST(0)) ); + fp_pop(); + break; + + case 2: /* FIST m16 */ + DIP("fistp %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI16, get_roundingmode(), get_ST(0)) ); + break; + + case 3: /* FISTP m16 */ + DIP("fistps %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI16, get_roundingmode(), get_ST(0)) ); + fp_pop(); + break; + + case 5: /* FILD m64 */ + DIP("fildll %s\n", dis_buf); + fp_push(); + put_ST(0, binop(Iop_I64toF64, + get_roundingmode(), + loadLE(Ity_I64, mkexpr(addr)))); + break; + + case 7: /* FISTP m64 */ + DIP("fistpll %s\n", dis_buf); + storeLE( mkexpr(addr), + binop(Iop_F64toI64, get_roundingmode(), get_ST(0)) ); + fp_pop(); + break; + + default: + vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); + vex_printf("first_opcode == 0xDF\n"); + goto decode_fail; + } + + } else { + + delta++; + switch (modrm) { + + case 0xC0: /* FFREEP %st(0) */ + DIP("ffreep %%st(%d)\n", 0); + put_ST_TAG ( 0, mkU8(0) ); + fp_pop(); + break; + + case 0xE0: /* FNSTSW %ax */ + DIP("fnstsw %%ax\n"); + /* Get the FPU status word value and dump it in %AX. */ + if (0) { + /* The obvious thing to do is simply dump the 16-bit + status word value in %AX. However, due to a + limitation in Memcheck's origin tracking + machinery, this causes Memcheck not to track the + origin of any undefinedness into %AH (only into + %AL/%AX/%EAX), which means origins are lost in + the sequence "fnstsw %ax; test $M,%ah; jcond .." */ + putIReg(2, R_EAX, get_FPU_sw()); + } else { + /* So a somewhat lame kludge is to make it very + clear to Memcheck that the value is written to + both %AH and %AL. This generates marginally + worse code, but I don't think it matters much. */ + IRTemp t16 = newTemp(Ity_I16); + assign(t16, get_FPU_sw()); + putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) ); + putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) ); + } + break; + + case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); + break; + + case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ + /* not really right since COMIP != UCOMIP */ + fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); + break; + + default: + goto decode_fail; + } + } + + } + + else + vpanic("dis_FPU(x86): invalid primary opcode"); + + *decode_ok = True; + return delta; + + decode_fail: + *decode_ok = False; + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- ---*/ + /*--- MMX INSTRUCTIONS ---*/ + /*--- ---*/ + /*------------------------------------------------------------*/ + + /* Effect of MMX insns on x87 FPU state (table 11-2 of + IA32 arch manual, volume 3): + + Read from, or write to MMX register (viz, any insn except EMMS): + * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero + * FP stack pointer set to zero + + EMMS: + * All tags set to Invalid (empty) -- FPTAGS[i] := zero + * FP stack pointer set to zero + */ + + static void do_MMX_preamble ( void ) + { + Int i; + IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + IRExpr* zero = mkU32(0); + IRExpr* tag1 = mkU8(1); + put_ftop(zero); + for (i = 0; i < 8; i++) + stmt( IRStmt_PutI( descr, zero, i, tag1 ) ); + } + + static void do_EMMS_preamble ( void ) + { + Int i; + IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); + IRExpr* zero = mkU32(0); + IRExpr* tag0 = mkU8(0); + put_ftop(zero); + for (i = 0; i < 8; i++) + stmt( IRStmt_PutI( descr, zero, i, tag0 ) ); + } + + + static IRExpr* getMMXReg ( UInt archreg ) + { + vassert(archreg < 8); + return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); + } + + + static void putMMXReg ( UInt archreg, IRExpr* e ) + { + vassert(archreg < 8); + vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); + stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); + } + + + /* Helper for non-shift MMX insns. Note this is incomplete in the + sense that it does not first call do_MMX_preamble() -- that is the + responsibility of its caller. */ + + static + UInt dis_MMXop_regmem_to_reg ( UChar sorb, + Int delta, + UChar opc, + HChar* name, + Bool show_granularity ) + { + HChar dis_buf[50]; + UChar modrm = getIByte(delta); + Bool isReg = epartIsReg(modrm); + IRExpr* argL = NULL; + IRExpr* argR = NULL; + IRExpr* argG = NULL; + IRExpr* argE = NULL; + IRTemp res = newTemp(Ity_I64); + + Bool invG = False; + IROp op = Iop_INVALID; + void* hAddr = NULL; + HChar* hName = NULL; + Bool eLeft = False; + + # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) + + switch (opc) { + /* Original MMX ones */ + case 0xFC: op = Iop_Add8x8; break; + case 0xFD: op = Iop_Add16x4; break; + case 0xFE: op = Iop_Add32x2; break; + + case 0xEC: op = Iop_QAdd8Sx8; break; + case 0xED: op = Iop_QAdd16Sx4; break; + + case 0xDC: op = Iop_QAdd8Ux8; break; + case 0xDD: op = Iop_QAdd16Ux4; break; + + case 0xF8: op = Iop_Sub8x8; break; + case 0xF9: op = Iop_Sub16x4; break; + case 0xFA: op = Iop_Sub32x2; break; + + case 0xE8: op = Iop_QSub8Sx8; break; + case 0xE9: op = Iop_QSub16Sx4; break; + + case 0xD8: op = Iop_QSub8Ux8; break; + case 0xD9: op = Iop_QSub16Ux4; break; + + case 0xE5: op = Iop_MulHi16Sx4; break; + case 0xD5: op = Iop_Mul16x4; break; + case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break; + + case 0x74: op = Iop_CmpEQ8x8; break; + case 0x75: op = Iop_CmpEQ16x4; break; + case 0x76: op = Iop_CmpEQ32x2; break; + + case 0x64: op = Iop_CmpGT8Sx8; break; + case 0x65: op = Iop_CmpGT16Sx4; break; + case 0x66: op = Iop_CmpGT32Sx2; break; + + case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break; + case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break; + case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break; + + case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; + case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; + case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; + + case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; + case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; + case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; + + case 0xDB: op = Iop_And64; break; + case 0xDF: op = Iop_And64; invG = True; break; + case 0xEB: op = Iop_Or64; break; + case 0xEF: /* Possibly do better here if argL and argR are the + same reg */ + op = Iop_Xor64; break; + + /* Introduced in SSE1 */ + case 0xE0: op = Iop_Avg8Ux8; break; + case 0xE3: op = Iop_Avg16Ux4; break; + case 0xEE: op = Iop_Max16Sx4; break; + case 0xDE: op = Iop_Max8Ux8; break; + case 0xEA: op = Iop_Min16Sx4; break; + case 0xDA: op = Iop_Min8Ux8; break; + case 0xE4: op = Iop_MulHi16Ux4; break; + case 0xF6: XXX(x86g_calculate_mmx_psadbw); break; + + /* Introduced in SSE2 */ + case 0xD4: op = Iop_Add64; break; + case 0xFB: op = Iop_Sub64; break; + + default: + vex_printf("\n0x%x\n", (Int)opc); + vpanic("dis_MMXop_regmem_to_reg"); + } + + # undef XXX + + argG = getMMXReg(gregOfRM(modrm)); + if (invG) + argG = unop(Iop_Not64, argG); + + if (isReg) { + delta++; + argE = getMMXReg(eregOfRM(modrm)); + } else { + Int len; + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + argE = loadLE(Ity_I64, mkexpr(addr)); + } + + if (eLeft) { + argL = argE; + argR = argG; + } else { + argL = argG; + argR = argE; + } + + if (op != Iop_INVALID) { + vassert(hName == NULL); + vassert(hAddr == NULL); + assign(res, binop(op, argL, argR)); + } else { + vassert(hName != NULL); + vassert(hAddr != NULL); + assign( res, + mkIRExprCCall( + Ity_I64, + 0/*regparms*/, hName, hAddr, + mkIRExprVec_2( argL, argR ) + ) + ); + } + + putMMXReg( gregOfRM(modrm), mkexpr(res) ); + + DIP("%s%s %s, %s\n", + name, show_granularity ? nameMMXGran(opc & 3) : "", + ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ), + nameMMXReg(gregOfRM(modrm)) ); + + return delta; + } + + + /* Vector by scalar shift of G by the amount specified at the bottom + of E. This is a straight copy of dis_SSE_shiftG_byE. */ + + static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen, size; + IRTemp addr; + Bool shl, shr, sar; + UChar rm = getIByte(delta); + IRTemp g0 = newTemp(Ity_I64); + IRTemp g1 = newTemp(Ity_I64); + IRTemp amt = newTemp(Ity_I32); + IRTemp amt8 = newTemp(Ity_I8); + + if (epartIsReg(rm)) { + assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) ); + DIP("%s %s,%s\n", opname, + nameMMXReg(eregOfRM(rm)), + nameMMXReg(gregOfRM(rm)) ); + delta++; + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameMMXReg(gregOfRM(rm)) ); + delta += alen; + } + assign( g0, getMMXReg(gregOfRM(rm)) ); + assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x4: shl = True; size = 32; break; + case Iop_ShlN32x2: shl = True; size = 32; break; + case Iop_Shl64: shl = True; size = 64; break; + case Iop_ShrN16x4: shr = True; size = 16; break; + case Iop_ShrN32x2: shr = True; size = 32; break; + case Iop_Shr64: shr = True; size = 64; break; + case Iop_SarN16x4: sar = True; size = 16; break; + case Iop_SarN32x2: sar = True; size = 32; break; + default: vassert(0); + } + + if (shl || shr) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), + mkU64(0), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else + if (sar) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), + binop(op, mkexpr(g0), mkU8(size-1)), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else { + /*NOTREACHED*/ + vassert(0); + } + + putMMXReg( gregOfRM(rm), mkexpr(g1) ); + return delta; + } + + + /* Vector by scalar shift of E by an immediate byte. This is a + straight copy of dis_SSE_shiftE_imm. */ + + static + UInt dis_MMX_shiftE_imm ( Int delta, HChar* opname, IROp op ) + { + Bool shl, shr, sar; + UChar rm = getIByte(delta); + IRTemp e0 = newTemp(Ity_I64); + IRTemp e1 = newTemp(Ity_I64); + UChar amt, size; + vassert(epartIsReg(rm)); + vassert(gregOfRM(rm) == 2 + || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); + amt = getIByte(delta+1); + delta += 2; + DIP("%s $%d,%s\n", opname, + (Int)amt, + nameMMXReg(eregOfRM(rm)) ); + + assign( e0, getMMXReg(eregOfRM(rm)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x4: shl = True; size = 16; break; + case Iop_ShlN32x2: shl = True; size = 32; break; + case Iop_Shl64: shl = True; size = 64; break; + case Iop_SarN16x4: sar = True; size = 16; break; + case Iop_SarN32x2: sar = True; size = 32; break; + case Iop_ShrN16x4: shr = True; size = 16; break; + case Iop_ShrN32x2: shr = True; size = 32; break; + case Iop_Shr64: shr = True; size = 64; break; + default: vassert(0); + } + + if (shl || shr) { + assign( e1, amt >= size + ? mkU64(0) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else + if (sar) { + assign( e1, amt >= size + ? binop(op, mkexpr(e0), mkU8(size-1)) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else { + /*NOTREACHED*/ + vassert(0); + } + + putMMXReg( eregOfRM(rm), mkexpr(e1) ); + return delta; + } + + + /* Completely handle all MMX instructions except emms. */ + + static + UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta ) + { + Int len; + UChar modrm; + HChar dis_buf[50]; + UChar opc = getIByte(delta); + delta++; + + /* dis_MMX handles all insns except emms. */ + do_MMX_preamble(); + + switch (opc) { + + case 0x6E: + /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/ + if (sz != 4) + goto mmx_decode_failure; + modrm = getIByte(delta); + if (epartIsReg(modrm)) { + delta++; + putMMXReg( + gregOfRM(modrm), + binop( Iop_32HLto64, + mkU32(0), + getIReg(4, eregOfRM(modrm)) ) ); + DIP("movd %s, %s\n", + nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); + } else { + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + putMMXReg( + gregOfRM(modrm), + binop( Iop_32HLto64, + mkU32(0), + loadLE(Ity_I32, mkexpr(addr)) ) ); + DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm))); + } + break; + + case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */ + if (sz != 4) + goto mmx_decode_failure; + modrm = getIByte(delta); + if (epartIsReg(modrm)) { + delta++; + putIReg( 4, eregOfRM(modrm), + unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); + DIP("movd %s, %s\n", + nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); + } else { + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + storeLE( mkexpr(addr), + unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); + DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf); + } + break; + + case 0x6F: + /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + modrm = getIByte(delta); + if (epartIsReg(modrm)) { + delta++; + putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) ); + DIP("movq %s, %s\n", + nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); + } else { + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movq %s, %s\n", + dis_buf, nameMMXReg(gregOfRM(modrm))); + } + break; + + case 0x7F: + /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ + if (sz != 4) + goto mmx_decode_failure; + modrm = getIByte(delta); + if (epartIsReg(modrm)) { + delta++; + putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) ); + DIP("movq %s, %s\n", + nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm))); + } else { + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); + DIP("mov(nt)q %s, %s\n", + nameMMXReg(gregOfRM(modrm)), dis_buf); + } + break; + + case 0xFC: + case 0xFD: + case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True ); + break; + + case 0xEC: + case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True ); + break; + + case 0xDC: + case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True ); + break; + + case 0xF8: + case 0xF9: + case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True ); + break; + + case 0xE8: + case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True ); + break; + + case 0xD8: + case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True ); + break; + + case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False ); + break; + + case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False ); + break; + + case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ + vassert(sz == 4); + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False ); + break; + + case 0x74: + case 0x75: + case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True ); + break; + + case 0x64: + case 0x65: + case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True ); + break; + + case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False ); + break; + + case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False ); + break; + + case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False ); + break; + + case 0x68: + case 0x69: + case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True ); + break; + + case 0x60: + case 0x61: + case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True ); + break; + + case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False ); + break; + + case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False ); + break; + + case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False ); + break; + + case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ + if (sz != 4) + goto mmx_decode_failure; + delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False ); + break; + + # define SHIFT_BY_REG(_name,_op) \ + delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \ + break; + + /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); + case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); + case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); + + /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); + case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); + case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); + + /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); + case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); + + # undef SHIFT_BY_REG + + case 0x71: + case 0x72: + case 0x73: { + /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ + UChar byte2, subopc; + if (sz != 4) + goto mmx_decode_failure; + byte2 = getIByte(delta); /* amode / sub-opcode */ + subopc = toUChar( (byte2 >> 3) & 7 ); + + # define SHIFT_BY_IMM(_name,_op) \ + do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ + } while (0) + + if (subopc == 2 /*SRL*/ && opc == 0x71) + SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); + else if (subopc == 2 /*SRL*/ && opc == 0x72) + SHIFT_BY_IMM("psrld", Iop_ShrN32x2); + else if (subopc == 2 /*SRL*/ && opc == 0x73) + SHIFT_BY_IMM("psrlq", Iop_Shr64); + + else if (subopc == 4 /*SAR*/ && opc == 0x71) + SHIFT_BY_IMM("psraw", Iop_SarN16x4); + else if (subopc == 4 /*SAR*/ && opc == 0x72) + SHIFT_BY_IMM("psrad", Iop_SarN32x2); + + else if (subopc == 6 /*SHL*/ && opc == 0x71) + SHIFT_BY_IMM("psllw", Iop_ShlN16x4); + else if (subopc == 6 /*SHL*/ && opc == 0x72) + SHIFT_BY_IMM("pslld", Iop_ShlN32x2); + else if (subopc == 6 /*SHL*/ && opc == 0x73) + SHIFT_BY_IMM("psllq", Iop_Shl64); + + else goto mmx_decode_failure; + + # undef SHIFT_BY_IMM + break; + } + + case 0xF7: { + IRTemp addr = newTemp(Ity_I32); + IRTemp regD = newTemp(Ity_I64); + IRTemp regM = newTemp(Ity_I64); + IRTemp mask = newTemp(Ity_I64); + IRTemp olddata = newTemp(Ity_I64); + IRTemp newdata = newTemp(Ity_I64); + + modrm = getIByte(delta); + if (sz != 4 || (!epartIsReg(modrm))) + goto mmx_decode_failure; + delta++; + + assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); + assign( regM, getMMXReg( eregOfRM(modrm) )); + assign( regD, getMMXReg( gregOfRM(modrm) )); + assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); + assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); + assign( newdata, + binop(Iop_Or64, + binop(Iop_And64, + mkexpr(regD), + mkexpr(mask) ), + binop(Iop_And64, + mkexpr(olddata), + unop(Iop_Not64, mkexpr(mask)))) ); + storeLE( mkexpr(addr), mkexpr(newdata) ); + DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ), + nameMMXReg( gregOfRM(modrm) ) ); + break; + } + + /* --- MMX decode failure --- */ + default: + mmx_decode_failure: + *decode_ok = False; + return delta; /* ignored */ + + } + + *decode_ok = True; + return delta; + } + + + /*------------------------------------------------------------*/ + /*--- More misc arithmetic and other obscure insns. ---*/ + /*------------------------------------------------------------*/ + + /* Double length left and right shifts. Apparently only required in + v-size (no b- variant). */ + static + UInt dis_SHLRD_Gv_Ev ( UChar sorb, + Int delta, UChar modrm, + Int sz, + IRExpr* shift_amt, + Bool amt_is_literal, + HChar* shift_amt_txt, + Bool left_shift ) + { + /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used + for printing it. And eip on entry points at the modrm byte. */ + Int len; + HChar dis_buf[50]; + + IRType ty = szToITy(sz); + IRTemp gsrc = newTemp(ty); + IRTemp esrc = newTemp(ty); + IRTemp addr = IRTemp_INVALID; + IRTemp tmpSH = newTemp(Ity_I8); + IRTemp tmpL = IRTemp_INVALID; + IRTemp tmpRes = IRTemp_INVALID; + IRTemp tmpSubSh = IRTemp_INVALID; + IROp mkpair; + IROp getres; + IROp shift; + IRExpr* mask = NULL; + + vassert(sz == 2 || sz == 4); + + /* The E-part is the destination; this is shifted. The G-part + supplies bits to be shifted into the E-part, but is not + changed. + + If shifting left, form a double-length word with E at the top + and G at the bottom, and shift this left. The result is then in + the high part. + + If shifting right, form a double-length word with G at the top + and E at the bottom, and shift this right. The result is then + at the bottom. */ + + /* Fetch the operands. */ + + assign( gsrc, getIReg(sz, gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + delta++; + assign( esrc, getIReg(sz, eregOfRM(modrm)) ); + DIP("sh%cd%c %s, %s, %s\n", + ( left_shift ? 'l' : 'r' ), nameISize(sz), + shift_amt_txt, + nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm))); + } else { + addr = disAMode ( &len, sorb, delta, dis_buf ); + delta += len; + assign( esrc, loadLE(ty, mkexpr(addr)) ); + DIP("sh%cd%c %s, %s, %s\n", + ( left_shift ? 'l' : 'r' ), nameISize(sz), + shift_amt_txt, + nameIReg(sz, gregOfRM(modrm)), dis_buf); + } + + /* Round up the relevant primops. */ + + if (sz == 4) { + tmpL = newTemp(Ity_I64); + tmpRes = newTemp(Ity_I32); + tmpSubSh = newTemp(Ity_I32); + mkpair = Iop_32HLto64; + getres = left_shift ? Iop_64HIto32 : Iop_64to32; + shift = left_shift ? Iop_Shl64 : Iop_Shr64; + mask = mkU8(31); + } else { + /* sz == 2 */ + tmpL = newTemp(Ity_I32); + tmpRes = newTemp(Ity_I16); + tmpSubSh = newTemp(Ity_I16); + mkpair = Iop_16HLto32; + getres = left_shift ? Iop_32HIto16 : Iop_32to16; + shift = left_shift ? Iop_Shl32 : Iop_Shr32; + mask = mkU8(15); + } + + /* Do the shift, calculate the subshift value, and set + the flag thunk. */ + + assign( tmpSH, binop(Iop_And8, shift_amt, mask) ); + + if (left_shift) + assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) ); + else + assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) ); + + assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) ); + assign( tmpSubSh, + unop(getres, + binop(shift, + mkexpr(tmpL), + binop(Iop_And8, + binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), + mask))) ); + + setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32, + tmpRes, tmpSubSh, ty, tmpSH ); + + /* Put result back. */ + + if (epartIsReg(modrm)) { + putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes)); + } else { + storeLE( mkexpr(addr), mkexpr(tmpRes) ); + } + + if (amt_is_literal) delta++; + return delta; + } + + + /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not + required. */ + + typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; + + static HChar* nameBtOp ( BtOp op ) + { + switch (op) { + case BtOpNone: return ""; + case BtOpSet: return "s"; + case BtOpReset: return "r"; + case BtOpComp: return "c"; + default: vpanic("nameBtOp(x86)"); + } + } + + + static + UInt dis_bt_G_E ( UChar sorb, Bool locked, Int sz, Int delta, BtOp op ) + { + HChar dis_buf[50]; + UChar modrm; + Int len; + IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, + t_addr1, t_esp, t_mask, t_new; + + vassert(sz == 2 || sz == 4); + + t_fetched = t_bitno0 = t_bitno1 = t_bitno2 + = t_addr0 = t_addr1 = t_esp + = t_mask = t_new = IRTemp_INVALID; + + t_fetched = newTemp(Ity_I8); + t_new = newTemp(Ity_I8); + t_bitno0 = newTemp(Ity_I32); + t_bitno1 = newTemp(Ity_I32); + t_bitno2 = newTemp(Ity_I8); + t_addr1 = newTemp(Ity_I32); + modrm = getIByte(delta); + + assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) ); + + if (epartIsReg(modrm)) { + delta++; + /* Get it onto the client's stack. */ + t_esp = newTemp(Ity_I32); + t_addr0 = newTemp(Ity_I32); + + assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); + putIReg(4, R_ESP, mkexpr(t_esp)); + + storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) ); + + /* Make t_addr0 point at it. */ + assign( t_addr0, mkexpr(t_esp) ); + + /* Mask out upper bits of the shift amount, since we're doing a + reg. */ + assign( t_bitno1, binop(Iop_And32, + mkexpr(t_bitno0), + mkU32(sz == 4 ? 31 : 15)) ); + + } else { + t_addr0 = disAMode ( &len, sorb, delta, dis_buf ); + delta += len; + assign( t_bitno1, mkexpr(t_bitno0) ); + } + + /* At this point: t_addr0 is the address being operated on. If it + was a reg, we will have pushed it onto the client's stack. + t_bitno1 is the bit number, suitably masked in the case of a + reg. */ + + /* Now the main sequence. */ + assign( t_addr1, + binop(Iop_Add32, + mkexpr(t_addr0), + binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) ); + + /* t_addr1 now holds effective address */ + + assign( t_bitno2, + unop(Iop_32to8, + binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) ); + + /* t_bitno2 contains offset of bit within byte */ + + if (op != BtOpNone) { + t_mask = newTemp(Ity_I8); + assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); + } + + /* t_mask is now a suitable byte mask */ + + assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); + + if (op != BtOpNone) { + switch (op) { + case BtOpSet: + assign( t_new, + binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); + break; + case BtOpComp: + assign( t_new, + binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); + break; + case BtOpReset: + assign( t_new, + binop(Iop_And8, mkexpr(t_fetched), + unop(Iop_Not8, mkexpr(t_mask))) ); + break; + default: + vpanic("dis_bt_G_E(x86)"); + } + if (locked && !epartIsReg(modrm)) { + casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, + mkexpr(t_new)/*new*/, + guest_EIP_curr_instr ); + } else { + storeLE( mkexpr(t_addr1), mkexpr(t_new) ); + } + } + + /* Side effect done; now get selected bit into Carry flag */ + /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop(Iop_And32, + binop(Iop_Shr32, + unop(Iop_8Uto32, mkexpr(t_fetched)), + mkexpr(t_bitno2)), + mkU32(1))) + ); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + + /* Move reg operand from stack back to reg */ + if (epartIsReg(modrm)) { + /* t_esp still points at it. */ + putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) ); + putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(sz)) ); + } + + DIP("bt%s%c %s, %s\n", + nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)), + ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) ); + + return delta; + } + + + + /* Handle BSF/BSR. Only v-size seems necessary. */ + static + UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds ) + { + Bool isReg; + UChar modrm; + HChar dis_buf[50]; + + IRType ty = szToITy(sz); + IRTemp src = newTemp(ty); + IRTemp dst = newTemp(ty); + + IRTemp src32 = newTemp(Ity_I32); + IRTemp dst32 = newTemp(Ity_I32); + IRTemp src8 = newTemp(Ity_I8); + + vassert(sz == 4 || sz == 2); + + modrm = getIByte(delta); + + isReg = epartIsReg(modrm); + if (isReg) { + delta++; + assign( src, getIReg(sz, eregOfRM(modrm)) ); + } else { + Int len; + IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); + delta += len; + assign( src, loadLE(ty, mkexpr(addr)) ); + } + + DIP("bs%c%c %s, %s\n", + fwds ? 'f' : 'r', nameISize(sz), + ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ), + nameIReg(sz, gregOfRM(modrm))); + + /* Generate an 8-bit expression which is zero iff the + original is zero, and nonzero otherwise */ + assign( src8, + unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8), + mkexpr(src), mkU(ty,0))) ); + + /* Flags: Z is 1 iff source value is zero. All others + are undefined -- we force them to zero. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + IRExpr_Mux0X( mkexpr(src8), + /* src==0 */ + mkU32(X86G_CC_MASK_Z), + /* src!=0 */ + mkU32(0) + ) + )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + + /* Result: iff source value is zero, we can't use + Iop_Clz32/Iop_Ctz32 as they have no defined result in that case. + But anyway, Intel x86 semantics say the result is undefined in + such situations. Hence handle the zero case specially. */ + + /* Bleh. What we compute: + + bsf32: if src == 0 then 0 else Ctz32(src) + bsr32: if src == 0 then 0 else 31 - Clz32(src) + + bsf16: if src == 0 then 0 else Ctz32(16Uto32(src)) + bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src)) + + First, widen src to 32 bits if it is not already. + + Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the + dst register unchanged when src == 0. Hence change accordingly. + */ + if (sz == 2) + assign( src32, unop(Iop_16Uto32, mkexpr(src)) ); + else + assign( src32, mkexpr(src) ); + + /* The main computation, guarding against zero. */ + assign( dst32, + IRExpr_Mux0X( + mkexpr(src8), + /* src == 0 -- leave dst unchanged */ + widenUto32( getIReg( sz, gregOfRM(modrm) ) ), + /* src != 0 */ + fwds ? unop(Iop_Ctz32, mkexpr(src32)) + : binop(Iop_Sub32, + mkU32(31), + unop(Iop_Clz32, mkexpr(src32))) + ) + ); + + if (sz == 2) + assign( dst, unop(Iop_32to16, mkexpr(dst32)) ); + else + assign( dst, mkexpr(dst32) ); + + /* dump result back */ + putIReg( sz, gregOfRM(modrm), mkexpr(dst) ); + + return delta; + } + + + static + void codegen_xchg_eAX_Reg ( Int sz, Int reg ) + { + IRType ty = szToITy(sz); + IRTemp t1 = newTemp(ty); + IRTemp t2 = newTemp(ty); + vassert(sz == 2 || sz == 4); + assign( t1, getIReg(sz, R_EAX) ); + assign( t2, getIReg(sz, reg) ); + putIReg( sz, R_EAX, mkexpr(t2) ); + putIReg( sz, reg, mkexpr(t1) ); + DIP("xchg%c %s, %s\n", + nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg)); + } + + + static + void codegen_SAHF ( void ) + { + /* Set the flags to: + (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag + | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A + |X86G_CC_MASK_P|X86G_CC_MASK_C) + */ + UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A + |X86G_CC_MASK_C|X86G_CC_MASK_P; + IRTemp oldflags = newTemp(Ity_I32); + assign( oldflags, mk_x86g_calculate_eflags_all() ); + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, + binop(Iop_Or32, + binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)), + binop(Iop_And32, + binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)), + mkU32(mask_SZACP)) + ) + )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + } + + + static + void codegen_LAHF ( void ) + { + /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ + IRExpr* eax_with_hole; + IRExpr* new_byte; + IRExpr* new_eax; + UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A + |X86G_CC_MASK_C|X86G_CC_MASK_P; + + IRTemp flags = newTemp(Ity_I32); + assign( flags, mk_x86g_calculate_eflags_all() ); + + eax_with_hole + = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF)); + new_byte + = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)), + mkU32(1<<1)); + new_eax + = binop(Iop_Or32, eax_with_hole, + binop(Iop_Shl32, new_byte, mkU8(8))); + putIReg(4, R_EAX, new_eax); + } + + + static + UInt dis_cmpxchg_G_E ( UChar sorb, + Bool locked, + Int size, + Int delta0 ) + { + HChar dis_buf[50]; + Int len; + + IRType ty = szToITy(size); + IRTemp acc = newTemp(ty); + IRTemp src = newTemp(ty); + IRTemp dest = newTemp(ty); + IRTemp dest2 = newTemp(ty); + IRTemp acc2 = newTemp(ty); + IRTemp cond8 = newTemp(Ity_I8); + IRTemp addr = IRTemp_INVALID; + UChar rm = getUChar(delta0); + + /* There are 3 cases to consider: + + reg-reg: ignore any lock prefix, generate sequence based + on Mux0X + + reg-mem, not locked: ignore any lock prefix, generate sequence + based on Mux0X + + reg-mem, locked: use IRCAS + */ + if (epartIsReg(rm)) { + /* case 1 */ + assign( dest, getIReg(size, eregOfRM(rm)) ); + delta0++; + assign( src, getIReg(size, gregOfRM(rm)) ); + assign( acc, getIReg(size, R_EAX) ); + setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); + assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); + assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); + assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); + putIReg(size, R_EAX, mkexpr(acc2)); + putIReg(size, eregOfRM(rm), mkexpr(dest2)); + DIP("cmpxchg%c %s,%s\n", nameISize(size), + nameIReg(size,gregOfRM(rm)), + nameIReg(size,eregOfRM(rm)) ); + } + else if (!epartIsReg(rm) && !locked) { + /* case 2 */ + addr = disAMode ( &len, sorb, delta0, dis_buf ); + assign( dest, loadLE(ty, mkexpr(addr)) ); + delta0 += len; + assign( src, getIReg(size, gregOfRM(rm)) ); + assign( acc, getIReg(size, R_EAX) ); + setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); + assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); + assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); + assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); + putIReg(size, R_EAX, mkexpr(acc2)); + storeLE( mkexpr(addr), mkexpr(dest2) ); + DIP("cmpxchg%c %s,%s\n", nameISize(size), + nameIReg(size,gregOfRM(rm)), dis_buf); + } + else if (!epartIsReg(rm) && locked) { + /* case 3 */ + /* src is new value. acc is expected value. dest is old value. + Compute success from the output of the IRCAS, and steer the + new value for EAX accordingly: in case of success, EAX is + unchanged. */ + addr = disAMode ( &len, sorb, delta0, dis_buf ); + delta0 += len; + assign( src, getIReg(size, gregOfRM(rm)) ); + assign( acc, getIReg(size, R_EAX) ); + stmt( IRStmt_CAS( + mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), + NULL, mkexpr(acc), NULL, mkexpr(src) ) + )); + setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); + assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); + assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); + putIReg(size, R_EAX, mkexpr(acc2)); + DIP("cmpxchg%c %s,%s\n", nameISize(size), + nameIReg(size,gregOfRM(rm)), dis_buf); + } + else vassert(0); + + return delta0; + } + + + /* Handle conditional move instructions of the form + cmovcc E(reg-or-mem), G(reg) + + E(src) is reg-or-mem + G(dst) is reg. + + If E is reg, --> GET %E, tmps + GET %G, tmpd + CMOVcc tmps, tmpd + PUT tmpd, %G + + If E is mem --> (getAddr E) -> tmpa + LD (tmpa), tmps + GET %G, tmpd + CMOVcc tmps, tmpd + PUT tmpd, %G + */ + static + UInt dis_cmov_E_G ( UChar sorb, + Int sz, + X86Condcode cond, + Int delta0 ) + { + UChar rm = getIByte(delta0); + HChar dis_buf[50]; + Int len; + + IRType ty = szToITy(sz); + IRTemp tmps = newTemp(ty); + IRTemp tmpd = newTemp(ty); + + if (epartIsReg(rm)) { + assign( tmps, getIReg(sz, eregOfRM(rm)) ); + assign( tmpd, getIReg(sz, gregOfRM(rm)) ); + + putIReg(sz, gregOfRM(rm), + IRExpr_Mux0X( unop(Iop_1Uto8, + mk_x86g_calculate_condition(cond)), + mkexpr(tmpd), + mkexpr(tmps) ) + ); + DIP("cmov%c%s %s,%s\n", nameISize(sz), + name_X86Condcode(cond), + nameIReg(sz,eregOfRM(rm)), + nameIReg(sz,gregOfRM(rm))); + return 1+delta0; + } + + /* E refers to memory */ + { + IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); + assign( tmps, loadLE(ty, mkexpr(addr)) ); + assign( tmpd, getIReg(sz, gregOfRM(rm)) ); + + putIReg(sz, gregOfRM(rm), + IRExpr_Mux0X( unop(Iop_1Uto8, + mk_x86g_calculate_condition(cond)), + mkexpr(tmpd), + mkexpr(tmps) ) + ); + + DIP("cmov%c%s %s,%s\n", nameISize(sz), + name_X86Condcode(cond), + dis_buf, + nameIReg(sz,gregOfRM(rm))); + return len+delta0; + } + } + + + static + UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0, + Bool* decodeOK ) + { + Int len; + UChar rm = getIByte(delta0); + HChar dis_buf[50]; + + IRType ty = szToITy(sz); + IRTemp tmpd = newTemp(ty); + IRTemp tmpt0 = newTemp(ty); + IRTemp tmpt1 = newTemp(ty); + + /* There are 3 cases to consider: + + reg-reg: currently unhandled + + reg-mem, not locked: ignore any lock prefix, generate 'naive' + (non-atomic) sequence + + reg-mem, locked: use IRCAS + */ + + if (epartIsReg(rm)) { + /* case 1 */ + *decodeOK = False; + return delta0; + /* Currently we don't handle xadd_G_E with register operand. */ + } + else if (!epartIsReg(rm) && !locked) { + /* case 2 */ + IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); + assign( tmpd, loadLE(ty, mkexpr(addr)) ); + assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); + assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(tmpd), mkexpr(tmpt0)) ); + storeLE( mkexpr(addr), mkexpr(tmpt1) ); + setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); + putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); + DIP("xadd%c %s, %s\n", + nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); + *decodeOK = True; + return len+delta0; + } + else if (!epartIsReg(rm) && locked) { + /* case 3 */ + IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); + assign( tmpd, loadLE(ty, mkexpr(addr)) ); + assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); + assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), + mkexpr(tmpd), mkexpr(tmpt0)) ); + casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, + mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr ); + setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); + putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); + DIP("xadd%c %s, %s\n", + nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); + *decodeOK = True; + return len+delta0; + } + /*UNREACHED*/ + vassert(0); + } + + /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ + + static + UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 ) + { + Int len; + IRTemp addr; + UChar rm = getIByte(delta0); + HChar dis_buf[50]; + + if (epartIsReg(rm)) { + putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); + DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); + return 1+delta0; + } else { + addr = disAMode ( &len, sorb, delta0, dis_buf ); + putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); + DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); + return len+delta0; + } + } + + /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If + dst is ireg and sz==4, zero out top half of it. */ + + static + UInt dis_mov_Sw_Ew ( UChar sorb, + Int sz, + Int delta0 ) + { + Int len; + IRTemp addr; + UChar rm = getIByte(delta0); + HChar dis_buf[50]; + + vassert(sz == 2 || sz == 4); + + if (epartIsReg(rm)) { + if (sz == 4) + putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); + else + putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); + + DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); + return 1+delta0; + } else { + addr = disAMode ( &len, sorb, delta0, dis_buf ); + storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); + DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); + return len+delta0; + } + } + + + static + void dis_push_segreg ( UInt sreg, Int sz ) + { + IRTemp t1 = newTemp(Ity_I16); + IRTemp ta = newTemp(Ity_I32); + vassert(sz == 2 || sz == 4); + + assign( t1, getSReg(sreg) ); + assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); + putIReg(4, R_ESP, mkexpr(ta)); + storeLE( mkexpr(ta), mkexpr(t1) ); + + DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); + } + + static + void dis_pop_segreg ( UInt sreg, Int sz ) + { + IRTemp t1 = newTemp(Ity_I16); + IRTemp ta = newTemp(Ity_I32); + vassert(sz == 2 || sz == 4); + + assign( ta, getIReg(4, R_ESP) ); + assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); + + putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); + putSReg( sreg, mkexpr(t1) ); + DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); + } + + static + void dis_ret ( UInt d32 ) + { + IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32); + assign(t1, getIReg(4,R_ESP)); + assign(t2, loadLE(Ity_I32,mkexpr(t1))); + putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32))); + jmp_treg(Ijk_Ret,t2); + } + + /*------------------------------------------------------------*/ + /*--- SSE/SSE2/SSE3 helpers ---*/ + /*------------------------------------------------------------*/ + + /* Worker function; do not call directly. + Handles full width G = G `op` E and G = (not G) `op` E. + */ + + static UInt dis_SSE_E_to_G_all_wrk ( + UChar sorb, Int delta, + HChar* opname, IROp op, + Bool invertG + ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getIByte(delta); + IRExpr* gpart + = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm))) + : getXMMReg(gregOfRM(rm)); + if (epartIsReg(rm)) { + putXMMReg( gregOfRM(rm), + binop(op, gpart, + getXMMReg(eregOfRM(rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + putXMMReg( gregOfRM(rm), + binop(op, gpart, + loadLE(Ity_V128, mkexpr(addr))) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + return delta+alen; + } + } + + + /* All lanes SSE binary operation, G = G `op` E. */ + + static + UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, HChar* opname, IROp op ) + { + return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False ); + } + + /* All lanes SSE binary operation, G = (not G) `op` E. */ + + static + UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta, + HChar* opname, IROp op ) + { + return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True ); + } + + + /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ + + static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getIByte(delta); + IRExpr* gpart = getXMMReg(gregOfRM(rm)); + if (epartIsReg(rm)) { + putXMMReg( gregOfRM(rm), + binop(op, gpart, + getXMMReg(eregOfRM(rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + return delta+1; + } else { + /* We can only do a 32-bit memory read, so the upper 3/4 of the + E operand needs to be made simply of zeroes. */ + IRTemp epart = newTemp(Ity_V128); + addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( epart, unop( Iop_32UtoV128, + loadLE(Ity_I32, mkexpr(addr))) ); + putXMMReg( gregOfRM(rm), + binop(op, gpart, mkexpr(epart)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + return delta+alen; + } + } + + + /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ + + static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getIByte(delta); + IRExpr* gpart = getXMMReg(gregOfRM(rm)); + if (epartIsReg(rm)) { + putXMMReg( gregOfRM(rm), + binop(op, gpart, + getXMMReg(eregOfRM(rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + return delta+1; + } else { + /* We can only do a 64-bit memory read, so the upper half of the + E operand needs to be made simply of zeroes. */ + IRTemp epart = newTemp(Ity_V128); + addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( epart, unop( Iop_64UtoV128, + loadLE(Ity_I64, mkexpr(addr))) ); + putXMMReg( gregOfRM(rm), + binop(op, gpart, mkexpr(epart)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + return delta+alen; + } + } + + + /* All lanes unary SSE operation, G = op(E). */ + + static UInt dis_SSE_E_to_G_unary_all ( + UChar sorb, Int delta, + HChar* opname, IROp op + ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getIByte(delta); + if (epartIsReg(rm)) { + putXMMReg( gregOfRM(rm), + unop(op, getXMMReg(eregOfRM(rm))) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + putXMMReg( gregOfRM(rm), + unop(op, loadLE(Ity_V128, mkexpr(addr))) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + return delta+alen; + } + } + + + /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ + + static UInt dis_SSE_E_to_G_unary_lo32 ( + UChar sorb, Int delta, + HChar* opname, IROp op + ) + { + /* First we need to get the old G value and patch the low 32 bits + of the E operand into it. Then apply op and write back to G. */ + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getIByte(delta); + IRTemp oldG0 = newTemp(Ity_V128); + IRTemp oldG1 = newTemp(Ity_V128); + + assign( oldG0, getXMMReg(gregOfRM(rm)) ); + + if (epartIsReg(rm)) { + assign( oldG1, + binop( Iop_SetV128lo32, + mkexpr(oldG0), + getXMMRegLane32(eregOfRM(rm), 0)) ); + putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( oldG1, + binop( Iop_SetV128lo32, + mkexpr(oldG0), + loadLE(Ity_I32, mkexpr(addr)) )); + putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + return delta+alen; + } + } + + + /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ + + static UInt dis_SSE_E_to_G_unary_lo64 ( + UChar sorb, Int delta, + HChar* opname, IROp op + ) + { + /* First we need to get the old G value and patch the low 64 bits + of the E operand into it. Then apply op and write back to G. */ + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getIByte(delta); + IRTemp oldG0 = newTemp(Ity_V128); + IRTemp oldG1 = newTemp(Ity_V128); + + assign( oldG0, getXMMReg(gregOfRM(rm)) ); + + if (epartIsReg(rm)) { + assign( oldG1, + binop( Iop_SetV128lo64, + mkexpr(oldG0), + getXMMRegLane64(eregOfRM(rm), 0)) ); + putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + return delta+1; + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( oldG1, + binop( Iop_SetV128lo64, + mkexpr(oldG0), + loadLE(Ity_I64, mkexpr(addr)) )); + putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + return delta+alen; + } + } + + + /* SSE integer binary operation: + G = G `op` E (eLeft == False) + G = E `op` G (eLeft == True) + */ + static UInt dis_SSEint_E_to_G( + UChar sorb, Int delta, + HChar* opname, IROp op, + Bool eLeft + ) + { + HChar dis_buf[50]; + Int alen; + IRTemp addr; + UChar rm = getIByte(delta); + IRExpr* gpart = getXMMReg(gregOfRM(rm)); + IRExpr* epart = NULL; + if (epartIsReg(rm)) { + epart = getXMMReg(eregOfRM(rm)); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + delta += 1; + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + epart = loadLE(Ity_V128, mkexpr(addr)); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + delta += alen; + } + putXMMReg( gregOfRM(rm), + eLeft ? binop(op, epart, gpart) + : binop(op, gpart, epart) ); + return delta; + } + + + /* Helper for doing SSE FP comparisons. */ + + static void findSSECmpOp ( Bool* needNot, IROp* op, + Int imm8, Bool all_lanes, Int sz ) + { + imm8 &= 7; + *needNot = False; + *op = Iop_INVALID; + if (imm8 >= 4) { + *needNot = True; + imm8 -= 4; + } + + if (sz == 4 && all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ32Fx4; return; + case 1: *op = Iop_CmpLT32Fx4; return; + case 2: *op = Iop_CmpLE32Fx4; return; + case 3: *op = Iop_CmpUN32Fx4; return; + default: break; + } + } + if (sz == 4 && !all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ32F0x4; return; + case 1: *op = Iop_CmpLT32F0x4; return; + case 2: *op = Iop_CmpLE32F0x4; return; + case 3: *op = Iop_CmpUN32F0x4; return; + default: break; + } + } + if (sz == 8 && all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ64Fx2; return; + case 1: *op = Iop_CmpLT64Fx2; return; + case 2: *op = Iop_CmpLE64Fx2; return; + case 3: *op = Iop_CmpUN64Fx2; return; + default: break; + } + } + if (sz == 8 && !all_lanes) { + switch (imm8) { + case 0: *op = Iop_CmpEQ64F0x2; return; + case 1: *op = Iop_CmpLT64F0x2; return; + case 2: *op = Iop_CmpLE64F0x2; return; + case 3: *op = Iop_CmpUN64F0x2; return; + default: break; + } + } + vpanic("findSSECmpOp(x86,guest)"); + } + + /* Handles SSE 32F/64F comparisons. */ + + static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta, + HChar* opname, Bool all_lanes, Int sz ) + { + HChar dis_buf[50]; + Int alen, imm8; + IRTemp addr; + Bool needNot = False; + IROp op = Iop_INVALID; + IRTemp plain = newTemp(Ity_V128); + UChar rm = getIByte(delta); + UShort mask = 0; + vassert(sz == 4 || sz == 8); + if (epartIsReg(rm)) { + imm8 = getIByte(delta+1); + findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); + assign( plain, binop(op, getXMMReg(gregOfRM(rm)), + getXMMReg(eregOfRM(rm))) ); + delta += 2; + DIP("%s $%d,%s,%s\n", opname, + (Int)imm8, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + imm8 = getIByte(delta+alen); + findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); + assign( plain, + binop( + op, + getXMMReg(gregOfRM(rm)), + all_lanes ? loadLE(Ity_V128, mkexpr(addr)) + : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) + : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) + ) + ); + delta += alen+1; + DIP("%s $%d,%s,%s\n", opname, + (Int)imm8, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + } + + if (needNot && all_lanes) { + putXMMReg( gregOfRM(rm), + unop(Iop_NotV128, mkexpr(plain)) ); + } + else + if (needNot && !all_lanes) { + mask = toUShort( sz==4 ? 0x000F : 0x00FF ); + putXMMReg( gregOfRM(rm), + binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); + } + else { + putXMMReg( gregOfRM(rm), mkexpr(plain) ); + } + + return delta; + } + + + /* Vector by scalar shift of G by the amount specified at the bottom + of E. */ + + static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta, + HChar* opname, IROp op ) + { + HChar dis_buf[50]; + Int alen, size; + IRTemp addr; + Bool shl, shr, sar; + UChar rm = getIByte(delta); + IRTemp g0 = newTemp(Ity_V128); + IRTemp g1 = newTemp(Ity_V128); + IRTemp amt = newTemp(Ity_I32); + IRTemp amt8 = newTemp(Ity_I8); + if (epartIsReg(rm)) { + assign( amt, getXMMRegLane32(eregOfRM(rm), 0) ); + DIP("%s %s,%s\n", opname, + nameXMMReg(eregOfRM(rm)), + nameXMMReg(gregOfRM(rm)) ); + delta++; + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); + DIP("%s %s,%s\n", opname, + dis_buf, + nameXMMReg(gregOfRM(rm)) ); + delta += alen; + } + assign( g0, getXMMReg(gregOfRM(rm)) ); + assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x8: shl = True; size = 32; break; + case Iop_ShlN32x4: shl = True; size = 32; break; + case Iop_ShlN64x2: shl = True; size = 64; break; + case Iop_SarN16x8: sar = True; size = 16; break; + case Iop_SarN32x4: sar = True; size = 32; break; + case Iop_ShrN16x8: shr = True; size = 16; break; + case Iop_ShrN32x4: shr = True; size = 32; break; + case Iop_ShrN64x2: shr = True; size = 64; break; + default: vassert(0); + } + + if (shl || shr) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), + mkV128(0x0000), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else + if (sar) { + assign( + g1, + IRExpr_Mux0X( + unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), + binop(op, mkexpr(g0), mkU8(size-1)), + binop(op, mkexpr(g0), mkexpr(amt8)) + ) + ); + } else { + /*NOTREACHED*/ + vassert(0); + } + + putXMMReg( gregOfRM(rm), mkexpr(g1) ); + return delta; + } + + + /* Vector by scalar shift of E by an immediate byte. */ + + static + UInt dis_SSE_shiftE_imm ( Int delta, HChar* opname, IROp op ) + { + Bool shl, shr, sar; + UChar rm = getIByte(delta); + IRTemp e0 = newTemp(Ity_V128); + IRTemp e1 = newTemp(Ity_V128); + UChar amt, size; + vassert(epartIsReg(rm)); + vassert(gregOfRM(rm) == 2 + || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); + amt = getIByte(delta+1); + delta += 2; + DIP("%s $%d,%s\n", opname, + (Int)amt, + nameXMMReg(eregOfRM(rm)) ); + assign( e0, getXMMReg(eregOfRM(rm)) ); + + shl = shr = sar = False; + size = 0; + switch (op) { + case Iop_ShlN16x8: shl = True; size = 16; break; + case Iop_ShlN32x4: shl = True; size = 32; break; + case Iop_ShlN64x2: shl = True; size = 64; break; + case Iop_SarN16x8: sar = True; size = 16; break; + case Iop_SarN32x4: sar = True; size = 32; break; + case Iop_ShrN16x8: shr = True; size = 16; break; + case Iop_ShrN32x4: shr = True; size = 32; break; + case Iop_ShrN64x2: shr = True; size = 64; break; + default: vassert(0); + } + + if (shl || shr) { + assign( e1, amt >= size + ? mkV128(0x0000) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else + if (sar) { + assign( e1, amt >= size + ? binop(op, mkexpr(e0), mkU8(size-1)) + : binop(op, mkexpr(e0), mkU8(amt)) + ); + } else { + /*NOTREACHED*/ + vassert(0); + } + + putXMMReg( eregOfRM(rm), mkexpr(e1) ); + return delta; + } + + + /* Get the current SSE rounding mode. */ + + static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) + { + return binop( Iop_And32, + IRExpr_Get( OFFB_SSEROUND, Ity_I32 ), + mkU32(3) ); + } + + static void put_sse_roundingmode ( IRExpr* sseround ) + { + vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); + stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) ); + } + + /* Break a 128-bit value up into four 32-bit ints. */ + + static void breakup128to32s ( IRTemp t128, + /*OUTs*/ + IRTemp* t3, IRTemp* t2, + IRTemp* t1, IRTemp* t0 ) + { + IRTemp hi64 = newTemp(Ity_I64); + IRTemp lo64 = newTemp(Ity_I64); + assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); + assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); + + vassert(t0 && *t0 == IRTemp_INVALID); + vassert(t1 && *t1 == IRTemp_INVALID); + vassert(t2 && *t2 == IRTemp_INVALID); + vassert(t3 && *t3 == IRTemp_INVALID); + + *t0 = newTemp(Ity_I32); + *t1 = newTemp(Ity_I32); + *t2 = newTemp(Ity_I32); + *t3 = newTemp(Ity_I32); + assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); + assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); + assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); + assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); + } + + /* Construct a 128-bit value from four 32-bit ints. */ + + static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, + IRTemp t1, IRTemp t0 ) + { + return + binop( Iop_64HLtoV128, + binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), + binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) + ); + } + + /* Break a 64-bit value up into four 16-bit ints. */ + + static void breakup64to16s ( IRTemp t64, + /*OUTs*/ + IRTemp* t3, IRTemp* t2, + IRTemp* t1, IRTemp* t0 ) + { + IRTemp hi32 = newTemp(Ity_I32); + IRTemp lo32 = newTemp(Ity_I32); + assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); + assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); + + vassert(t0 && *t0 == IRTemp_INVALID); + vassert(t1 && *t1 == IRTemp_INVALID); + vassert(t2 && *t2 == IRTemp_INVALID); + vassert(t3 && *t3 == IRTemp_INVALID); + + *t0 = newTemp(Ity_I16); + *t1 = newTemp(Ity_I16); + *t2 = newTemp(Ity_I16); + *t3 = newTemp(Ity_I16); + assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); + assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); + assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); + assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); + } + + /* Construct a 64-bit value from four 16-bit ints. */ + + static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, + IRTemp t1, IRTemp t0 ) + { + return + binop( Iop_32HLto64, + binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), + binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) + ); + } + + /* Generate IR to set the guest %EFLAGS from the pushfl-format image + in the given 32-bit temporary. The flags that are set are: O S Z A + C P D ID AC. + + In all cases, code to set AC is generated. However, VEX actually + ignores the AC value and so can optionally emit an emulation + warning when it is enabled. In this routine, an emulation warning + is only emitted if emit_AC_emwarn is True, in which case + next_insn_EIP must be correct (this allows for correct code + generation for popfl/popfw). If emit_AC_emwarn is False, + next_insn_EIP is unimportant (this allows for easy if kludgey code + generation for IRET.) */ + + static + void set_EFLAGS_from_value ( IRTemp t1, + Bool emit_AC_emwarn, + Addr32 next_insn_EIP ) + { + vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32); + + /* t1 is the flag word. Mask out everything except OSZACP and set + the flags thunk to X86G_CC_OP_COPY. */ + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, + binop(Iop_And32, + mkexpr(t1), + mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P + | X86G_CC_MASK_A | X86G_CC_MASK_Z + | X86G_CC_MASK_S| X86G_CC_MASK_O ) + ) + ) + ); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + + /* Also need to set the D flag, which is held in bit 10 of t1. + If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ + stmt( IRStmt_Put( + OFFB_DFLAG, + IRExpr_Mux0X( + unop(Iop_32to8, + binop(Iop_And32, + binop(Iop_Shr32, mkexpr(t1), mkU8(10)), + mkU32(1))), + mkU32(1), + mkU32(0xFFFFFFFF))) + ); + + /* Set the ID flag */ + stmt( IRStmt_Put( + OFFB_IDFLAG, + IRExpr_Mux0X( + unop(Iop_32to8, + binop(Iop_And32, + binop(Iop_Shr32, mkexpr(t1), mkU8(21)), + mkU32(1))), + mkU32(0), + mkU32(1))) + ); + + /* And set the AC flag. If setting it 1 to, possibly emit an + emulation warning. */ + stmt( IRStmt_Put( + OFFB_ACFLAG, + IRExpr_Mux0X( + unop(Iop_32to8, + binop(Iop_And32, + binop(Iop_Shr32, mkexpr(t1), mkU8(18)), + mkU32(1))), + mkU32(0), + mkU32(1))) + ); + + if (emit_AC_emwarn) { + put_emwarn( mkU32(EmWarn_X86_acFlag) ); + stmt( + IRStmt_Exit( + binop( Iop_CmpNE32, + binop(Iop_And32, mkexpr(t1), mkU32(1<<18)), + mkU32(0) ), + Ijk_EmWarn, + IRConst_U32( next_insn_EIP ) + ) + ); + } + } + + + /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit + values (aa,bb), computes, for each of the 4 16-bit lanes: + + (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 + */ + static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) + { + IRTemp aa = newTemp(Ity_I64); + IRTemp bb = newTemp(Ity_I64); + IRTemp aahi32s = newTemp(Ity_I64); + IRTemp aalo32s = newTemp(Ity_I64); + IRTemp bbhi32s = newTemp(Ity_I64); + IRTemp bblo32s = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + IRTemp one32x2 = newTemp(Ity_I64); + assign(aa, aax); + assign(bb, bbx); + assign( aahi32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), + mkU8(16) )); + assign( aalo32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), + mkU8(16) )); + assign( bbhi32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), + mkU8(16) )); + assign( bblo32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), + mkU8(16) )); + assign(one32x2, mkU64( (1ULL << 32) + 1 )); + assign( + rHi, + binop( + Iop_ShrN32x2, + binop( + Iop_Add32x2, + binop( + Iop_ShrN32x2, + binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), + mkU8(14) + ), + mkexpr(one32x2) + ), + mkU8(1) + ) + ); + assign( + rLo, + binop( + Iop_ShrN32x2, + binop( + Iop_Add32x2, + binop( + Iop_ShrN32x2, + binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), + mkU8(14) + ), + mkexpr(one32x2) + ), + mkU8(1) + ) + ); + return + binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); + } + + /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit + values (aa,bb), computes, for each lane: + + if aa_lane < 0 then - bb_lane + else if aa_lane > 0 then bb_lane + else 0 + */ + static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) + { + IRTemp aa = newTemp(Ity_I64); + IRTemp bb = newTemp(Ity_I64); + IRTemp zero = newTemp(Ity_I64); + IRTemp bbNeg = newTemp(Ity_I64); + IRTemp negMask = newTemp(Ity_I64); + IRTemp posMask = newTemp(Ity_I64); + IROp opSub = Iop_INVALID; + IROp opCmpGTS = Iop_INVALID; + + switch (laneszB) { + case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; + case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; + case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; + default: vassert(0); + } + + assign( aa, aax ); + assign( bb, bbx ); + assign( zero, mkU64(0) ); + assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); + assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); + assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); + + return + binop(Iop_Or64, + binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), + binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); + + } + + /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit + value aa, computes, for each lane + + if aa < 0 then -aa else aa + + Note that the result is interpreted as unsigned, so that the + absolute value of the most negative signed input can be + represented. + */ + static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) + { + IRTemp aa = newTemp(Ity_I64); + IRTemp zero = newTemp(Ity_I64); + IRTemp aaNeg = newTemp(Ity_I64); + IRTemp negMask = newTemp(Ity_I64); + IRTemp posMask = newTemp(Ity_I64); + IROp opSub = Iop_INVALID; + IROp opSarN = Iop_INVALID; + + switch (laneszB) { + case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; + case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; + case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; + default: vassert(0); + } + + assign( aa, aax ); + assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); + assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); + assign( zero, mkU64(0) ); + assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); + return + binop(Iop_Or64, + binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), + binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); + } + + static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, + IRTemp lo64, Int byteShift ) + { + vassert(byteShift >= 1 && byteShift <= 7); + return + binop(Iop_Or64, + binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), + binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) + ); + } + + /* Generate a SIGSEGV followed by a restart of the current instruction + if effective_addr is not 16-aligned. This is required behaviour + for some SSE3 instructions and all 128-bit SSSE3 instructions. + This assumes that guest_RIP_curr_instr is set correctly! */ + static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) + { + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, + binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)), + mkU32(0)), + Ijk_SigSEGV, + IRConst_U32(guest_EIP_curr_instr) + ) + ); + } + + + /* Helper for deciding whether a given insn (starting at the opcode + byte) may validly be used with a LOCK prefix. The following insns + may be used with LOCK when their destination operand is in memory. + AFAICS this is exactly the same for both 32-bit and 64-bit mode. + + ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 + OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 + ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 + SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 + AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 + SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 + XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 + + DEC FE /1, FF /1 + INC FE /0, FF /0 + + NEG F6 /3, F7 /3 + NOT F6 /2, F7 /2 + + XCHG 86, 87 + + BTC 0F BB, 0F BA /7 + BTR 0F B3, 0F BA /6 + BTS 0F AB, 0F BA /5 + + CMPXCHG 0F B0, 0F B1 + CMPXCHG8B 0F C7 /1 + + XADD 0F C0, 0F C1 + + ------------------------------ + + 80 /0 = addb $imm8, rm8 + 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 + 82 /0 = addb $imm8, rm8 + 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 + + 00 = addb r8, rm8 + 01 = addl r32, rm32 and addw r16, rm16 + + Same for ADD OR ADC SBB AND SUB XOR + + FE /1 = dec rm8 + FF /1 = dec rm32 and dec rm16 + + FE /0 = inc rm8 + FF /0 = inc rm32 and inc rm16 + + F6 /3 = neg rm8 + F7 /3 = neg rm32 and neg rm16 + + F6 /2 = not rm8 + F7 /2 = not rm32 and not rm16 + + 0F BB = btcw r16, rm16 and btcl r32, rm32 + OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 + + Same for BTS, BTR + */ + static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) + { + switch (opc[0]) { + case 0x00: case 0x01: case 0x08: case 0x09: + case 0x10: case 0x11: case 0x18: case 0x19: + case 0x20: case 0x21: case 0x28: case 0x29: + case 0x30: case 0x31: + if (!epartIsReg(opc[1])) + return True; + break; + + case 0x80: case 0x81: case 0x82: case 0x83: + if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6 + && !epartIsReg(opc[1])) + return True; + break; + + case 0xFE: case 0xFF: + if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1 + && !epartIsReg(opc[1])) + return True; + break; + + case 0xF6: case 0xF7: + if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3 + && !epartIsReg(opc[1])) + return True; + break; + + case 0x86: case 0x87: + if (!epartIsReg(opc[1])) + return True; + break; + + case 0x0F: { + switch (opc[1]) { + case 0xBB: case 0xB3: case 0xAB: + if (!epartIsReg(opc[2])) + return True; + break; + case 0xBA: + if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7 + && !epartIsReg(opc[2])) + return True; + break; + case 0xB0: case 0xB1: + if (!epartIsReg(opc[2])) + return True; + break; + case 0xC7: + if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) + return True; + break; + case 0xC0: case 0xC1: + if (!epartIsReg(opc[2])) + return True; + break; + default: + break; + } /* switch (opc[1]) */ + break; + } + + default: + break; + } /* switch (opc[0]) */ + + return False; + } + + + /*------------------------------------------------------------*/ + /*--- Disassemble a single instruction ---*/ + /*------------------------------------------------------------*/ + + /* Disassemble a single instruction into IR. The instruction is + located in host memory at &guest_code[delta]. *expect_CAS is set + to True if the resulting IR is expected to contain an IRCAS + statement, and False if it's not expected to. This makes it + possible for the caller of disInstr_X86_WRK to check that + LOCK-prefixed instructions are at least plausibly translated, in + that it becomes possible to check that a (validly) LOCK-prefixed + instruction generates a translation containing an IRCAS, and + instructions without LOCK prefixes don't generate translations + containing an IRCAS. + */ + static + DisResult disInstr_X86_WRK ( + /*OUT*/Bool* expect_CAS, + Bool put_IP, + Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), + void* callback_opaque, + Long delta64, + VexArchInfo* archinfo + ) + { + IRType ty; + IRTemp addr, t0, t1, t2, t3, t4, t5, t6; + Int alen; + UChar opc, modrm, abyte, pre; + UInt d32; + HChar dis_buf[50]; + Int am_sz, d_sz, n_prefixes; + DisResult dres; + UChar* insn; /* used in SSE decoders */ + + /* The running delta */ + Int delta = (Int)delta64; + + /* Holds eip at the start of the insn, so that we can print + consistent error messages for unimplemented insns. */ + Int delta_start = delta; + + /* sz denotes the nominal data-op size of the insn; we change it to + 2 if an 0x66 prefix is seen */ + Int sz = 4; + + /* sorb holds the segment-override-prefix byte, if any. Zero if no + prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65} + indicating the prefix. */ + UChar sorb = 0; + + /* Gets set to True if a LOCK prefix is seen. */ + Bool pfx_lock = False; + + /* Set result defaults. */ + dres.whatNext = Dis_Continue; + dres.len = 0; + dres.continueAt = 0; + + *expect_CAS = False; + + addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; + + vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr); + DIP("\t0x%x: ", guest_EIP_bbstart+delta); + + /* We may be asked to update the guest EIP before going further. */ + if (put_IP) + stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) ); + + /* Spot "Special" instructions (see comment at top of file). */ + { + UChar* code = (UChar*)(guest_code + delta); + /* Spot the 12-byte preamble: + C1C703 roll $3, %edi + C1C70D roll $13, %edi + C1C71D roll $29, %edi + C1C713 roll $19, %edi + */ + if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 && + code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D && + code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D && + code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) { + /* Got a "Special" instruction preamble. Which one is it? */ + if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) { + /* %EDX = client_request ( %EAX ) */ + DIP("%%edx = client_request ( %%eax )\n"); + delta += 14; + jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta); + dres.whatNext = Dis_StopHere; + goto decode_success; + } + else + if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) { + /* %EAX = guest_NRADDR */ + DIP("%%eax = guest_NRADDR\n"); + delta += 14; + putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 )); + goto decode_success; + } + else + if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) { + /* call-noredir *%EAX */ + DIP("call-noredir *%%eax\n"); + delta += 14; + t1 = newTemp(Ity_I32); + assign(t1, getIReg(4,R_EAX)); + t2 = newTemp(Ity_I32); + assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); + putIReg(4, R_ESP, mkexpr(t2)); + storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta)); + jmp_treg(Ijk_NoRedir,t1); + dres.whatNext = Dis_StopHere; + goto decode_success; + } + /* We don't know what it is. */ + goto decode_failure; + /*NOTREACHED*/ + } + } + + /* Handle a couple of weird-ass NOPs that have been observed in the + wild. */ + { + UChar* code = (UChar*)(guest_code + delta); + /* Sun's JVM 1.5.0 uses the following as a NOP: + 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */ + if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64 + && code[3] == 0x65 && code[4] == 0x90) { + DIP("%%es:%%cs:%%fs:%%gs:nop\n"); + delta += 5; + goto decode_success; + } + /* don't barf on recent binutils padding + 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%eax,%eax,1) */ + if (code[0] == 0x66 + && code[1] == 0x2E && code[2] == 0x0F && code[3] == 0x1F + && code[4] == 0x84 && code[5] == 0x00 && code[6] == 0x00 + && code[7] == 0x00 && code[8] == 0x00 && code[9] == 0x00 ) { + DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n"); + delta += 10; + goto decode_success; + } + } + + /* Normal instruction handling starts here. */ + + /* Deal with some but not all prefixes: + 66(oso) + F0(lock) + 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:) + Not dealt with (left in place): + F2 F3 + */ + n_prefixes = 0; + while (True) { + if (n_prefixes > 7) goto decode_failure; + pre = getUChar(delta); + switch (pre) { + case 0x66: + sz = 2; + break; + case 0xF0: + pfx_lock = True; + *expect_CAS = True; + break; + case 0x3E: /* %DS: */ + case 0x26: /* %ES: */ + case 0x64: /* %FS: */ + case 0x65: /* %GS: */ + if (sorb != 0) + goto decode_failure; /* only one seg override allowed */ + sorb = pre; + break; + case 0x2E: { /* %CS: */ + /* 2E prefix on a conditional branch instruction is a + branch-prediction hint, which can safely be ignored. */ + UChar op1 = getIByte(delta+1); + UChar op2 = getIByte(delta+2); + if ((op1 >= 0x70 && op1 <= 0x7F) + || (op1 == 0xE3) + || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) { + if (0) vex_printf("vex x86->IR: ignoring branch hint\n"); + } else { + /* All other CS override cases are not handled */ + goto decode_failure; + } + break; + } + case 0x36: /* %SS: */ + /* SS override cases are not handled */ + goto decode_failure; + default: + goto not_a_prefix; + } + n_prefixes++; + delta++; + } + + not_a_prefix: + + /* Now we should be looking at the primary opcode byte or the + leading F2 or F3. Check that any LOCK prefix is actually + allowed. */ + + if (pfx_lock) { + if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { + DIP("lock "); + } else { + *expect_CAS = False; + goto decode_failure; + } + } + + + /* ---------------------------------------------------- */ + /* --- The SSE decoder. --- */ + /* ---------------------------------------------------- */ + + /* What did I do to deserve SSE ? Perhaps I was really bad in a + previous life? */ + + /* Note, this doesn't handle SSE2 or SSE3. That is handled in a + later section, further on. */ + + insn = (UChar*)&guest_code[delta]; + + /* Treat fxsave specially. It should be doable even on an SSE0 + (Pentium-II class) CPU. Hence be prepared to handle it on + any subarchitecture variant. + */ + + /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) { + IRDirty* d; + modrm = getIByte(delta+2); + vassert(sz == 4); + vassert(!epartIsReg(modrm)); + + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + + DIP("fxsave %s\n", dis_buf); + + /* Uses dirty helper: + void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */ + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_FXSAVE", + &x86g_dirtyhelper_FXSAVE, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 512; + + /* declare we're reading guest state */ + d->nFxState = 7; + + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Read; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(UInt); + + d->fxState[4].fx = Ifx_Read; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(UInt); + + d->fxState[5].fx = Ifx_Read; + d->fxState[5].offset = OFFB_XMM0; + d->fxState[5].size = 8 * sizeof(U128); + + d->fxState[6].fx = Ifx_Read; + d->fxState[6].offset = OFFB_SSEROUND; + d->fxState[6].size = sizeof(UInt); + + /* Be paranoid ... this assertion tries to ensure the 8 %xmm + images are packed back-to-back. If not, the value of + d->fxState[5].size is wrong. */ + vassert(16 == sizeof(U128)); + vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); + + stmt( IRStmt_Dirty(d) ); + + goto decode_success; + } + + /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) { + IRDirty* d; + modrm = getIByte(delta+2); + vassert(sz == 4); + vassert(!epartIsReg(modrm)); + + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + + DIP("fxrstor %s\n", dis_buf); + + /* Uses dirty helper: + void x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) */ + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "x86g_dirtyhelper_FXRSTOR", + &x86g_dirtyhelper_FXRSTOR, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + + /* declare we're reading memory */ + d->mFx = Ifx_Read; + d->mAddr = mkexpr(addr); + d->mSize = 512; + + /* declare we're writing guest state */ + d->nFxState = 7; + + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(UInt); + + d->fxState[4].fx = Ifx_Write; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(UInt); + + d->fxState[5].fx = Ifx_Write; + d->fxState[5].offset = OFFB_XMM0; + d->fxState[5].size = 8 * sizeof(U128); + + d->fxState[6].fx = Ifx_Write; + d->fxState[6].offset = OFFB_SSEROUND; + d->fxState[6].size = sizeof(UInt); + + /* Be paranoid ... this assertion tries to ensure the 8 %xmm + images are packed back-to-back. If not, the value of + d->fxState[5].size is wrong. */ + vassert(16 == sizeof(U128)); + vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); + + stmt( IRStmt_Dirty(d) ); + + goto decode_success; + } + + /* ------ SSE decoder main ------ */ + + /* Skip parts of the decoder which don't apply given the stated + guest subarchitecture. */ + if (archinfo->hwcaps == 0/*baseline, no sse at all*/) + goto after_sse_decoders; + + /* Otherwise we must be doing sse1 or sse2, so we can at least try + for SSE1 here. */ + + /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 ); + goto decode_success; + } + + /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 ); + goto decode_success; + } + + /* 0F 55 = ANDNPS -- G = (not G) and E */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) { + delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 ); + goto decode_success; + } + + /* 0F 54 = ANDPS -- G = G and E */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 ); + goto decode_success; + } + + /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) { + delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 ); + goto decode_success; + } + + /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) { + vassert(sz == 4); + delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 ); + goto decode_success; + } + + /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ + /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ + if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { + IRTemp argL = newTemp(Ity_F32); + IRTemp argR = newTemp(Ity_F32); + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) ); + delta += 2+1; + DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm)) ); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); + delta += 2+alen; + DIP("[u]comiss %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) ); + + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop( Iop_And32, + binop(Iop_CmpF64, + unop(Iop_F32toF64,mkexpr(argL)), + unop(Iop_F32toF64,mkexpr(argR))), + mkU32(0x45) + ))); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + goto decode_success; + } + + /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low + half xmm */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) { + IRTemp arg64 = newTemp(Ity_I64); + IRTemp rmode = newTemp(Ity_I32); + vassert(sz == 4); + + modrm = getIByte(delta+2); + do_MMX_preamble(); + if (epartIsReg(modrm)) { + assign( arg64, getMMXReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtpi2ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + + putXMMRegLane32F( + gregOfRM(modrm), 0, + binop(Iop_F64toF32, + mkexpr(rmode), + unop(Iop_I32toF64, + unop(Iop_64to32, mkexpr(arg64)) )) ); + + putXMMRegLane32F( + gregOfRM(modrm), 1, + binop(Iop_F64toF32, + mkexpr(rmode), + unop(Iop_I32toF64, + unop(Iop_64HIto32, mkexpr(arg64)) )) ); + + goto decode_success; + } + + /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low + quarter xmm */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) { + IRTemp arg32 = newTemp(Ity_I32); + IRTemp rmode = newTemp(Ity_I32); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + assign( arg32, getIReg(4, eregOfRM(modrm)) ); + delta += 3+1; + DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); + delta += 3+alen; + DIP("cvtsi2ss %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + + putXMMRegLane32F( + gregOfRM(modrm), 0, + binop(Iop_F64toF32, + mkexpr(rmode), + unop(Iop_I32toF64, mkexpr(arg32)) ) ); + + goto decode_success; + } + + /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x + I32 in mmx, according to prevailing SSE rounding mode */ + /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x + I32 in mmx, rounding towards zero */ + if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { + IRTemp dst64 = newTemp(Ity_I64); + IRTemp rmode = newTemp(Ity_I32); + IRTemp f32lo = newTemp(Ity_F32); + IRTemp f32hi = newTemp(Ity_F32); + Bool r2zero = toBool(insn[1] == 0x2C); + + do_MMX_preamble(); + modrm = getIByte(delta+2); + + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); + assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1)); + DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); + assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32, + mkexpr(addr), + mkU32(4) ))); + delta += 2+alen; + DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + if (r2zero) { + assign(rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + assign( + dst64, + binop( Iop_32HLto64, + binop( Iop_F64toI32, + mkexpr(rmode), + unop( Iop_F32toF64, mkexpr(f32hi) ) ), + binop( Iop_F64toI32, + mkexpr(rmode), + unop( Iop_F32toF64, mkexpr(f32lo) ) ) + ) + ); + + putMMXReg(gregOfRM(modrm), mkexpr(dst64)); + goto decode_success; + } + + /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to + I32 in ireg, according to prevailing SSE rounding mode */ + /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to + I32 in ireg, rounding towards zero */ + if (insn[0] == 0xF3 && insn[1] == 0x0F + && (insn[2] == 0x2D || insn[2] == 0x2C)) { + IRTemp rmode = newTemp(Ity_I32); + IRTemp f32lo = newTemp(Ity_F32); + Bool r2zero = toBool(insn[2] == 0x2C); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + delta += 3+1; + assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); + DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRM(modrm)), + nameIReg(4, gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); + delta += 3+alen; + DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameIReg(4, gregOfRM(modrm))); + } + + if (r2zero) { + assign( rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + putIReg(4, gregOfRM(modrm), + binop( Iop_F64toI32, + mkexpr(rmode), + unop( Iop_F32toF64, mkexpr(f32lo) ) ) + ); + + goto decode_success; + } + + /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 ); + goto decode_success; + } + + /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 ); + goto decode_success; + } + + /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ + if (insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) { + + IRTemp t64 = newTemp(Ity_I64); + IRTemp ew = newTemp(Ity_I32); + + modrm = getIByte(delta+2); + vassert(!epartIsReg(modrm)); + vassert(sz == 4); + + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + DIP("ldmxcsr %s\n", dis_buf); + + /* The only thing we observe in %mxcsr is the rounding mode. + Therefore, pass the 32-bit value (SSE native-format control + word) to a clean helper, getting back a 64-bit value, the + lower half of which is the SSEROUND value to store, and the + upper half of which is the emulation-warning token which may + be generated. + */ + /* ULong x86h_check_ldmxcsr ( UInt ); */ + assign( t64, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86g_check_ldmxcsr", + &x86g_check_ldmxcsr, + mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) ) + ) + ); + + put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); + assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); + put_emwarn( mkexpr(ew) ); + /* Finally, if an emulation warning was reported, side-exit to + the next insn, reporting the warning, so that Valgrind's + dispatcher sees the warning. */ + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), + Ijk_EmWarn, + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + ) + ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F F7 = MASKMOVQ -- 8x8 masked store */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) { + Bool ok = False; + delta = dis_MMX( &ok, sorb, sz, delta+1 ); + if (!ok) + goto decode_failure; + goto decode_success; + } + + /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); + goto decode_success; + } + + /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 ); + goto decode_success; + } + + /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 ); + goto decode_success; + } + + /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 ); + goto decode_success; + } + + /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ + /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ + if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + putXMMReg( gregOfRM(modrm), + getXMMReg( eregOfRM(modrm) )); + DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + putXMMReg( gregOfRM(modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("mov[ua]ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ + /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ + if (sz == 4 && insn[0] == 0x0F + && (insn[1] == 0x29 || insn[1] == 0x11)) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + /* fall through; awaiting test case */ + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); + DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)), + dis_buf ); + delta += 2+alen; + goto decode_success; + } + } + + /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ + /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, + getXMMRegLane64( eregOfRM(modrm), 0 ) ); + DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movhps %s,%s\n", dis_buf, + nameXMMReg( gregOfRM(modrm) )); + } + goto decode_success; + } + + /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRM(insn[2]), + 1/*upper lane*/ ) ); + DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ + /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putXMMRegLane64( gregOfRM(modrm), + 0/*lower lane*/, + getXMMRegLane64( eregOfRM(modrm), 1 )); + DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movlps %s, %s\n", + dis_buf, nameXMMReg( gregOfRM(modrm) )); + } + goto decode_success; + } + + /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRM(insn[2]), + 0/*lower lane*/ ) ); + DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) + to 4 lowest bits of ireg(G) */ + if (insn[0] == 0x0F && insn[1] == 0x50) { + modrm = getIByte(delta+2); + if (sz == 4 && epartIsReg(modrm)) { + Int src; + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I32); + delta += 2+1; + src = eregOfRM(modrm); + assign( t0, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), + mkU32(1) )); + assign( t1, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), + mkU32(2) )); + assign( t2, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), + mkU32(4) )); + assign( t3, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), + mkU32(8) )); + putIReg(4, gregOfRM(modrm), + binop(Iop_Or32, + binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), + binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) + ) + ); + DIP("movmskps %s,%s\n", nameXMMReg(src), + nameIReg(4, gregOfRM(modrm))); + goto decode_success; + } + /* else fall through */ + } + + /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ + /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ + if (insn[0] == 0x0F && insn[1] == 0x2B) { + modrm = getIByte(delta+2); + if (!epartIsReg(modrm)) { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); + DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", + dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the + Intel manual does not say anything about the usual business of + the FP reg tags getting trashed whenever an MMX insn happens. + So we just leave them alone. + */ + if (insn[0] == 0x0F && insn[1] == 0xE7) { + modrm = getIByte(delta+2); + if (sz == 4 && !epartIsReg(modrm)) { + /* do_MMX_preamble(); Intel docs don't specify this */ + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); + DIP("movntq %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + } + + /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G + (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + putXMMRegLane32( gregOfRM(modrm), 0, + getXMMRegLane32( eregOfRM(modrm), 0 )); + DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + /* zero bits 127:64 */ + putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); + /* zero bits 63:32 */ + putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) ); + /* write bits 31:0 */ + putXMMRegLane32( gregOfRM(modrm), 0, + loadLE(Ity_I32, mkexpr(addr)) ); + DIP("movss %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + goto decode_success; + } + + /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem + or lo 1/4 xmm). */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + /* fall through, we don't yet have a test case */ + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + storeLE( mkexpr(addr), + getXMMRegLane32(gregOfRM(modrm), 0) ); + DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)), + dis_buf); + delta += 3+alen; + goto decode_success; + } + } + + /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 ); + goto decode_success; + } + + /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 ); + goto decode_success; + } + + /* 0F 56 = ORPS -- G = G and E */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "pavgb", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "pavgw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put + zero-extend of it in ireg(G). */ + if (insn[0] == 0x0F && insn[1] == 0xC5) { + modrm = insn[2]; + if (sz == 4 && epartIsReg(modrm)) { + IRTemp sV = newTemp(Ity_I64); + t5 = newTemp(Ity_I16); + do_MMX_preamble(); + assign(sV, getMMXReg(eregOfRM(modrm))); + breakup64to16s( sV, &t3, &t2, &t1, &t0 ); + switch (insn[3] & 3) { + case 0: assign(t5, mkexpr(t0)); break; + case 1: assign(t5, mkexpr(t1)); break; + case 2: assign(t5, mkexpr(t2)); break; + case 3: assign(t5, mkexpr(t3)); break; + default: vassert(0); /*NOTREACHED*/ + } + putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5))); + DIP("pextrw $%d,%s,%s\n", + (Int)insn[3], nameMMXReg(eregOfRM(modrm)), + nameIReg(4,gregOfRM(modrm))); + delta += 4; + goto decode_success; + } + /* else fall through */ + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and + put it into the specified lane of mmx(G). */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) { + /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the + mmx reg. t4 is the new lane value. t5 is the original + mmx value. t6 is the new mmx value. */ + Int lane; + t4 = newTemp(Ity_I16); + t5 = newTemp(Ity_I64); + t6 = newTemp(Ity_I64); + modrm = insn[2]; + do_MMX_preamble(); + + assign(t5, getMMXReg(gregOfRM(modrm))); + breakup64to16s( t5, &t3, &t2, &t1, &t0 ); + + if (epartIsReg(modrm)) { + assign(t4, getIReg(2, eregOfRM(modrm))); + delta += 3+1; + lane = insn[3+1-1]; + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + nameIReg(2,eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 3+alen; + lane = insn[3+alen-1]; + assign(t4, loadLE(Ity_I16, mkexpr(addr))); + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + switch (lane & 3) { + case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; + case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; + case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; + case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; + default: vassert(0); /*NOTREACHED*/ + } + putMMXReg(gregOfRM(modrm), mkexpr(t6)); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F EE = PMAXSW -- 16x4 signed max */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "pmaxsw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F DE = PMAXUB -- 8x8 unsigned max */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "pmaxub", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F EA = PMINSW -- 16x4 signed min */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "pminsw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F DA = PMINUB -- 8x8 unsigned min */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "pminub", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in + mmx(G), turn them into a byte, and put zero-extend of it in + ireg(G). */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) { + modrm = insn[2]; + if (epartIsReg(modrm)) { + do_MMX_preamble(); + t0 = newTemp(Ity_I64); + t1 = newTemp(Ity_I32); + assign(t0, getMMXReg(eregOfRM(modrm))); + assign(t1, mkIRExprCCall( + Ity_I32, 0/*regparms*/, + "x86g_calculate_mmx_pmovmskb", + &x86g_calculate_mmx_pmovmskb, + mkIRExprVec_1(mkexpr(t0)))); + putIReg(4, gregOfRM(modrm), mkexpr(t1)); + DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameIReg(4,gregOfRM(modrm))); + delta += 3; + goto decode_success; + } + /* else fall through */ + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "pmuluh", False ); + goto decode_success; + } + + /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ + /* 0F 18 /1 = PREFETCH0 -- with various different hints */ + /* 0F 18 /2 = PREFETCH1 */ + /* 0F 18 /3 = PREFETCH2 */ + if (insn[0] == 0x0F && insn[1] == 0x18 + && !epartIsReg(insn[2]) + && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) { + HChar* hintstr = "??"; + + modrm = getIByte(delta+2); + vassert(!epartIsReg(modrm)); + + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + + switch (gregOfRM(modrm)) { + case 0: hintstr = "nta"; break; + case 1: hintstr = "t0"; break; + case 2: hintstr = "t1"; break; + case 3: hintstr = "t2"; break; + default: vassert(0); /*NOTREACHED*/ + } + + DIP("prefetch%s %s\n", hintstr, dis_buf); + goto decode_success; + } + + /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */ + /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */ + if (insn[0] == 0x0F && insn[1] == 0x0D + && !epartIsReg(insn[2]) + && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) { + HChar* hintstr = "??"; + + modrm = getIByte(delta+2); + vassert(!epartIsReg(modrm)); + + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + + switch (gregOfRM(modrm)) { + case 0: hintstr = ""; break; + case 1: hintstr = "w"; break; + default: vassert(0); /*NOTREACHED*/ + } + + DIP("prefetch%s %s\n", hintstr, dis_buf); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "psadbw", False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) { + Int order; + IRTemp sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_I64); + dV = newTemp(Ity_I64); + do_MMX_preamble(); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + order = (Int)insn[3]; + delta += 2+2; + DIP("pshufw $%d,%s,%s\n", order, + nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + order = (Int)insn[2+alen]; + delta += 3+alen; + DIP("pshufw $%d,%s,%s\n", order, + dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + breakup64to16s( sV, &s3, &s2, &s1, &s0 ); + + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dV, + mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + putMMXReg(gregOfRM(modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ + if (insn[0] == 0x0F && insn[1] == 0x53) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, + "rcpps", Iop_Recip32Fx4 ); + goto decode_success; + } + + /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, + "rcpss", Iop_Recip32F0x4 ); + goto decode_success; + } + + /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ + if (insn[0] == 0x0F && insn[1] == 0x52) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, + "rsqrtps", Iop_RSqrt32Fx4 ); + goto decode_success; + } + + /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, + "rsqrtss", Iop_RSqrt32F0x4 ); + goto decode_success; + } + + /* 0F AE /7 = SFENCE -- flush pending operations to memory */ + if (insn[0] == 0x0F && insn[1] == 0xAE + && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { + vassert(sz == 4); + delta += 3; + /* Insert a memory fence. It's sometimes important that these + are carried through to the generated code. */ + stmt( IRStmt_MBE(Imbe_Fence) ); + DIP("sfence\n"); + goto decode_success; + } + + /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) { + Int select; + IRTemp sV, dV; + IRTemp s3, s2, s1, s0, d3, d2, d1, d0; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + select = (Int)insn[3]; + delta += 2+2; + DIP("shufps $%d,%s,%s\n", select, + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + select = (Int)insn[2+alen]; + delta += 3+alen; + DIP("shufps $%d,%s,%s\n", select, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + breakup128to32s( dV, &d3, &d2, &d1, &d0 ); + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) + # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + + putXMMReg( + gregOfRM(modrm), + mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), + SELD((select>>2)&3), SELD((select>>0)&3) ) + ); + + # undef SELD + # undef SELS + + goto decode_success; + } + + /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) { + delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, + "sqrtps", Iop_Sqrt32Fx4 ); + goto decode_success; + } + + /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, + "sqrtss", Iop_Sqrt32F0x4 ); + goto decode_success; + } + + /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ + if (insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) { + modrm = getIByte(delta+2); + vassert(sz == 4); + vassert(!epartIsReg(modrm)); + + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + + /* Fake up a native SSE mxcsr word. The only thing it depends + on is SSEROUND[1:0], so call a clean helper to cook it up. + */ + /* UInt x86h_create_mxcsr ( UInt sseround ) */ + DIP("stmxcsr %s\n", dis_buf); + storeLE( mkexpr(addr), + mkIRExprCCall( + Ity_I32, 0/*regp*/, + "x86g_create_mxcsr", &x86g_create_mxcsr, + mkIRExprVec_1( get_sse_roundingmode() ) + ) + ); + goto decode_success; + } + + /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 ); + goto decode_success; + } + + /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 ); + goto decode_success; + } + + /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ + /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ + /* These just appear to be special cases of SHUFPS */ + if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { + IRTemp sV, dV; + IRTemp s3, s2, s1, s0, d3, d2, d1, d0; + Bool hi = toBool(insn[1] == 0x15); + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + breakup128to32s( dV, &d3, &d2, &d1, &d0 ); + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + if (hi) { + putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) ); + } else { + putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) ); + } + + goto decode_success; + } + + /* 0F 57 = XORPS -- G = G and E */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 ); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSE decoder. --- */ + /* ---------------------------------------------------- */ + + /* ---------------------------------------------------- */ + /* --- start of the SSE2 decoder. --- */ + /* ---------------------------------------------------- */ + + /* Skip parts of the decoder which don't apply given the stated + guest subarchitecture. */ + if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) + goto after_sse_decoders; /* no SSE2 capabilities */ + + insn = (UChar*)&guest_code[delta]; + + /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 ); + goto decode_success; + } + + /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 ); + goto decode_success; + } + + /* 66 0F 55 = ANDNPD -- G = (not G) and E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) { + delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F 54 = ANDPD -- G = G and E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) { + delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 ); + goto decode_success; + } + + /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) { + vassert(sz == 4); + delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 ); + goto decode_success; + } + + /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ + /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ + if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { + IRTemp argL = newTemp(Ity_F64); + IRTemp argR = newTemp(Ity_F64); + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) ); + delta += 2+1; + DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm)) ); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); + delta += 2+alen; + DIP("[u]comisd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) ); + + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( + OFFB_CC_DEP1, + binop( Iop_And32, + binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)), + mkU32(0x45) + ))); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + goto decode_success; + } + + /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x + F64 in xmm(G) */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) { + IRTemp arg64 = newTemp(Ity_I64); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) ); + delta += 3+1; + DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("cvtdq2pd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + putXMMRegLane64F( + gregOfRM(modrm), 0, + unop(Iop_I32toF64, unop(Iop_64to32, mkexpr(arg64))) + ); + + putXMMRegLane64F( + gregOfRM(modrm), 1, + unop(Iop_I32toF64, unop(Iop_64HIto32, mkexpr(arg64))) + ); + + goto decode_success; + } + + /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in + xmm(G) */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtdq2ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + breakup128to32s( argV, &t3, &t2, &t1, &t0 ); + + # define CVT(_t) binop( Iop_F64toF32, \ + mkexpr(rmode), \ + unop(Iop_I32toF64,mkexpr(_t))) + + putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) ); + putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) ); + putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); + putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in + lo half xmm(G), and zero upper half */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("cvtpd2dq %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + t0 = newTemp(Ity_F64); + t1 = newTemp(Ity_F64); + assign( t0, unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, mkexpr(argV))) ); + assign( t1, unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, mkexpr(argV))) ); + + # define CVT(_t) binop( Iop_F64toI32, \ + mkexpr(rmode), \ + mkexpr(_t) ) + + putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); + putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); + putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); + putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x + I32 in mmx, according to prevailing SSE rounding mode */ + /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x + I32 in mmx, rounding towards zero */ + if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { + IRTemp dst64 = newTemp(Ity_I64); + IRTemp rmode = newTemp(Ity_I32); + IRTemp f64lo = newTemp(Ity_F64); + IRTemp f64hi = newTemp(Ity_F64); + Bool r2zero = toBool(insn[1] == 0x2C); + + do_MMX_preamble(); + modrm = getIByte(delta+2); + + if (epartIsReg(modrm)) { + delta += 2+1; + assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); + assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1)); + DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); + assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32, + mkexpr(addr), + mkU32(8) ))); + delta += 2+alen; + DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + if (r2zero) { + assign(rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + assign( + dst64, + binop( Iop_32HLto64, + binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64hi) ), + binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo) ) + ) + ); + + putMMXReg(gregOfRM(modrm), mkexpr(dst64)); + goto decode_success; + } + + /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in + lo half xmm(G), and zero upper half */ + /* Note, this is practically identical to CVTPD2DQ. It would have + been nicer to merge them together, but the insn[] offsets differ + by one. */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtpd2ps %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + t0 = newTemp(Ity_F64); + t1 = newTemp(Ity_F64); + assign( t0, unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, mkexpr(argV))) ); + assign( t1, unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, mkexpr(argV))) ); + + # define CVT(_t) binop( Iop_F64toF32, \ + mkexpr(rmode), \ + mkexpr(_t) ) + + putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); + putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); + putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); + putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in + xmm(G) */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) { + IRTemp arg64 = newTemp(Ity_I64); + + modrm = getIByte(delta+2); + do_MMX_preamble(); + if (epartIsReg(modrm)) { + assign( arg64, getMMXReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtpi2pd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + putXMMRegLane64F( + gregOfRM(modrm), 0, + unop(Iop_I32toF64, unop(Iop_64to32, mkexpr(arg64)) ) + ); + + putXMMRegLane64F( + gregOfRM(modrm), 1, + unop(Iop_I32toF64, unop(Iop_64HIto32, mkexpr(arg64)) ) + ); + + goto decode_success; + } + + /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in + xmm(G) */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvtps2dq %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, get_sse_roundingmode() ); + breakup128to32s( argV, &t3, &t2, &t1, &t0 ); + + /* This is less than ideal. If it turns out to be a performance + bottleneck it can be improved. */ + # define CVT(_t) \ + binop( Iop_F64toI32, \ + mkexpr(rmode), \ + unop( Iop_F32toF64, \ + unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) + + putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); + putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); + putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); + putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x + F64 in xmm(G). */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) { + IRTemp f32lo = newTemp(Ity_F32); + IRTemp f32hi = newTemp(Ity_F32); + + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) ); + assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) ); + delta += 2+1; + DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); + assign( f32hi, loadLE(Ity_F32, + binop(Iop_Add32,mkexpr(addr),mkU32(4))) ); + delta += 2+alen; + DIP("cvtps2pd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + putXMMRegLane64F( gregOfRM(modrm), 1, + unop(Iop_F32toF64, mkexpr(f32hi)) ); + putXMMRegLane64F( gregOfRM(modrm), 0, + unop(Iop_F32toF64, mkexpr(f32lo)) ); + + goto decode_success; + } + + /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to + I32 in ireg, according to prevailing SSE rounding mode */ + /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to + I32 in ireg, rounding towards zero */ + if (insn[0] == 0xF2 && insn[1] == 0x0F + && (insn[2] == 0x2D || insn[2] == 0x2C)) { + IRTemp rmode = newTemp(Ity_I32); + IRTemp f64lo = newTemp(Ity_F64); + Bool r2zero = toBool(insn[2] == 0x2C); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + delta += 3+1; + assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); + DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", + nameXMMReg(eregOfRM(modrm)), + nameIReg(4, gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); + delta += 3+alen; + DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", + dis_buf, + nameIReg(4, gregOfRM(modrm))); + } + + if (r2zero) { + assign( rmode, mkU32((UInt)Irrm_ZERO) ); + } else { + assign( rmode, get_sse_roundingmode() ); + } + + putIReg(4, gregOfRM(modrm), + binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo)) ); + + goto decode_success; + } + + /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in + low 1/4 xmm(G), according to prevailing SSE rounding mode */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) { + IRTemp rmode = newTemp(Ity_I32); + IRTemp f64lo = newTemp(Ity_F64); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + delta += 3+1; + assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); + DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); + delta += 3+alen; + DIP("cvtsd2ss %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( rmode, get_sse_roundingmode() ); + putXMMRegLane32F( + gregOfRM(modrm), 0, + binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) + ); + + goto decode_success; + } + + /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low + half xmm */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) { + IRTemp arg32 = newTemp(Ity_I32); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + assign( arg32, getIReg(4, eregOfRM(modrm)) ); + delta += 3+1; + DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); + delta += 3+alen; + DIP("cvtsi2sd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + putXMMRegLane64F( + gregOfRM(modrm), 0, + unop(Iop_I32toF64, mkexpr(arg32)) ); + + goto decode_success; + } + + /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in + low half xmm(G) */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) { + IRTemp f32lo = newTemp(Ity_F32); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + delta += 3+1; + assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); + DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); + delta += 3+alen; + DIP("cvtss2sd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + putXMMRegLane64F( gregOfRM(modrm), 0, + unop( Iop_F32toF64, mkexpr(f32lo) ) ); + + goto decode_success; + } + + /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in + lo half xmm(G), and zero upper half, rounding towards zero */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("cvttpd2dq %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, mkU32((UInt)Irrm_ZERO) ); + + t0 = newTemp(Ity_F64); + t1 = newTemp(Ity_F64); + assign( t0, unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, mkexpr(argV))) ); + assign( t1, unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, mkexpr(argV))) ); + + # define CVT(_t) binop( Iop_F64toI32, \ + mkexpr(rmode), \ + mkexpr(_t) ) + + putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); + putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); + putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); + putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in + xmm(G), rounding towards zero */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) { + IRTemp argV = newTemp(Ity_V128); + IRTemp rmode = newTemp(Ity_I32); + vassert(sz == 4); + + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + assign( argV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("cvttps2dq %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm)) ); + } + + assign( rmode, mkU32((UInt)Irrm_ZERO) ); + breakup128to32s( argV, &t3, &t2, &t1, &t0 ); + + /* This is less than ideal. If it turns out to be a performance + bottleneck it can be improved. */ + # define CVT(_t) \ + binop( Iop_F64toI32, \ + mkexpr(rmode), \ + unop( Iop_F32toF64, \ + unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) + + putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); + putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); + putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); + putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); + + # undef CVT + + goto decode_success; + } + + /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 ); + goto decode_success; + } + + /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 ); + goto decode_success; + } + + /* 0F AE /5 = LFENCE -- flush pending operations to memory */ + /* 0F AE /6 = MFENCE -- flush pending operations to memory */ + if (insn[0] == 0x0F && insn[1] == 0xAE + && epartIsReg(insn[2]) + && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) { + vassert(sz == 4); + delta += 3; + /* Insert a memory fence. It's sometimes important that these + are carried through to the generated code. */ + stmt( IRStmt_MBE(Imbe_Fence) ); + DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m"); + goto decode_success; + } + + /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 ); + goto decode_success; + } + + /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 ); + goto decode_success; + } + + /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 ); + goto decode_success; + } + + /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 ); + goto decode_success; + } + + /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ + /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ + /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ + if (sz == 2 && insn[0] == 0x0F + && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { + HChar* wot = insn[1]==0x28 ? "apd" : + insn[1]==0x10 ? "upd" : "dqa"; + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + putXMMReg( gregOfRM(modrm), + getXMMReg( eregOfRM(modrm) )); + DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + putXMMReg( gregOfRM(modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("mov%s %s,%s\n", wot, dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 2+alen; + } + goto decode_success; + } + + /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ + /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ + if (sz == 2 && insn[0] == 0x0F + && (insn[1] == 0x29 || insn[1] == 0x11)) { + HChar* wot = insn[1]==0x29 ? "apd" : "upd"; + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + /* fall through; awaiting test case */ + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); + DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)), + dis_buf ); + delta += 2+alen; + goto decode_success; + } + } + + /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putXMMReg( + gregOfRM(modrm), + unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) ) + ); + DIP("movd %s, %s\n", + nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + putXMMReg( + gregOfRM(modrm), + unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) + ); + DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm))); + } + goto decode_success; + } + + /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putIReg( 4, eregOfRM(modrm), + getXMMRegLane32(gregOfRM(modrm), 0) ); + DIP("movd %s, %s\n", + nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); + } else { + addr = disAMode( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + storeLE( mkexpr(addr), + getXMMRegLane32(gregOfRM(modrm), 0) ); + DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); + } + goto decode_success; + } + + /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + delta += 2+1; + putXMMReg( eregOfRM(modrm), + getXMMReg(gregOfRM(modrm)) ); + DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), + nameXMMReg(eregOfRM(modrm))); + } else { + addr = disAMode( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); + DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); + } + goto decode_success; + } + + /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ + /* Unfortunately can't simply use the MOVDQA case since the + prefix lengths are different (66 vs F3) */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + putXMMReg( gregOfRM(modrm), + getXMMReg( eregOfRM(modrm) )); + DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + putXMMReg( gregOfRM(modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("movdqu %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + goto decode_success; + } + + /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ + /* Unfortunately can't simply use the MOVDQA case since the + prefix lengths are different (66 vs F3) */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + delta += 3+1; + putXMMReg( eregOfRM(modrm), + getXMMReg(gregOfRM(modrm)) ); + DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), + nameXMMReg(eregOfRM(modrm))); + } else { + addr = disAMode( &alen, sorb, delta+3, dis_buf ); + delta += 3+alen; + storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); + DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); + } + goto decode_success; + } + + /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + do_MMX_preamble(); + putMMXReg( gregOfRM(modrm), + getXMMRegLane64( eregOfRM(modrm), 0 )); + DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + delta += 3+1; + goto decode_success; + } else { + /* fall through, apparently no mem case for this insn */ + } + } + + /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ + /* These seems identical to MOVHPS. This instruction encoding is + completely crazy. */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + /* fall through; apparently reg-reg is not possible */ + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movhpd %s,%s\n", dis_buf, + nameXMMReg( gregOfRM(modrm) )); + goto decode_success; + } + } + + /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ + /* Again, this seems identical to MOVHPS. */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRM(insn[2]), + 1/*upper lane*/ ) ); + DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ + /* Identical to MOVLPS ? */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + /* fall through; apparently reg-reg is not possible */ + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movlpd %s, %s\n", + dis_buf, nameXMMReg( gregOfRM(modrm) )); + goto decode_success; + } + } + + /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ + /* Identical to MOVLPS ? */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) { + if (!epartIsReg(insn[2])) { + delta += 2; + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRM(insn[2]), + 0/*lower lane*/ ) ); + DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), + dis_buf); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to + 2 lowest bits of ireg(G) */ + if (insn[0] == 0x0F && insn[1] == 0x50) { + modrm = getIByte(delta+2); + if (sz == 2 && epartIsReg(modrm)) { + Int src; + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_I32); + delta += 2+1; + src = eregOfRM(modrm); + assign( t0, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), + mkU32(1) )); + assign( t1, binop( Iop_And32, + binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), + mkU32(2) )); + putIReg(4, gregOfRM(modrm), + binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) + ); + DIP("movmskpd %s,%s\n", nameXMMReg(src), + nameIReg(4, gregOfRM(modrm))); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ + if (insn[0] == 0x0F && insn[1] == 0xF7) { + modrm = getIByte(delta+2); + if (sz == 2 && epartIsReg(modrm)) { + IRTemp regD = newTemp(Ity_V128); + IRTemp mask = newTemp(Ity_V128); + IRTemp olddata = newTemp(Ity_V128); + IRTemp newdata = newTemp(Ity_V128); + addr = newTemp(Ity_I32); + + assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); + assign( regD, getXMMReg( gregOfRM(modrm) )); + + /* Unfortunately can't do the obvious thing with SarN8x16 + here since that can't be re-emitted as SSE2 code - no such + insn. */ + assign( + mask, + binop(Iop_64HLtoV128, + binop(Iop_SarN8x8, + getXMMRegLane64( eregOfRM(modrm), 1 ), + mkU8(7) ), + binop(Iop_SarN8x8, + getXMMRegLane64( eregOfRM(modrm), 0 ), + mkU8(7) ) )); + assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); + assign( newdata, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(regD), + mkexpr(mask) ), + binop(Iop_AndV128, + mkexpr(olddata), + unop(Iop_NotV128, mkexpr(mask)))) ); + storeLE( mkexpr(addr), mkexpr(newdata) ); + + delta += 2+1; + DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ), + nameXMMReg( gregOfRM(modrm) ) ); + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ + if (insn[0] == 0x0F && insn[1] == 0xE7) { + modrm = getIByte(delta+2); + if (sz == 2 && !epartIsReg(modrm)) { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); + DIP("movntdq %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + } + + /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ + if (insn[0] == 0x0F && insn[1] == 0xC3) { + vassert(sz == 4); + modrm = getIByte(delta+2); + if (!epartIsReg(modrm)) { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) ); + DIP("movnti %s,%s\n", dis_buf, + nameIReg(4, gregOfRM(modrm))); + delta += 2+alen; + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem + or lo half xmm). */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) { + modrm = getIByte(delta+2); + if (epartIsReg(modrm)) { + /* fall through, awaiting test case */ + /* dst: lo half copied, hi half zeroed */ + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + storeLE( mkexpr(addr), + getXMMRegLane64( gregOfRM(modrm), 0 )); + DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf ); + delta += 2+alen; + goto decode_success; + } + } + + /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero + hi half). */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + do_MMX_preamble(); + putXMMReg( gregOfRM(modrm), + unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) ); + DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + goto decode_success; + } else { + /* fall through, apparently no mem case for this insn */ + } + } + + /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to + G (lo half xmm). Upper half of G is zeroed out. */ + /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to + G (lo half xmm). If E is mem, upper half of G is zeroed out. + If E is reg, upper half of G is unchanged. */ + if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10) + || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + putXMMRegLane64( gregOfRM(modrm), 0, + getXMMRegLane64( eregOfRM(modrm), 0 )); + if (insn[0] == 0xF3/*MOVQ*/) { + /* zero bits 127:64 */ + putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); + } + DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + /* zero bits 127:64 */ + putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); + /* write bits 63:0 */ + putXMMRegLane64( gregOfRM(modrm), 0, + loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movsd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + goto decode_success; + } + + /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem + or lo half xmm). */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) { + vassert(sz == 4); + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + putXMMRegLane64( eregOfRM(modrm), 0, + getXMMRegLane64( gregOfRM(modrm), 0 )); + DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), + nameXMMReg(eregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + storeLE( mkexpr(addr), + getXMMRegLane64(gregOfRM(modrm), 0) ); + DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), + dis_buf); + delta += 3+alen; + } + goto decode_success; + } + + /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 ); + goto decode_success; + } + + /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 ); + goto decode_success; + } + + /* 66 0F 56 = ORPD -- G = G and E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 ); + goto decode_success; + } + + /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) { + Int select; + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp s1 = newTemp(Ity_I64); + IRTemp s0 = newTemp(Ity_I64); + IRTemp d1 = newTemp(Ity_I64); + IRTemp d0 = newTemp(Ity_I64); + + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + select = (Int)insn[3]; + delta += 2+2; + DIP("shufpd $%d,%s,%s\n", select, + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + select = (Int)insn[2+alen]; + delta += 3+alen; + DIP("shufpd $%d,%s,%s\n", select, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); + assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); + + # define SELD(n) mkexpr((n)==0 ? d0 : d1) + # define SELS(n) mkexpr((n)==0 ? s0 : s1) + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) + ); + + # undef SELD + # undef SELS + + goto decode_success; + } + + /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) { + delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, + "sqrtpd", Iop_Sqrt64Fx2 ); + goto decode_success; + } + + /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3, + "sqrtsd", Iop_Sqrt64F0x2 ); + goto decode_success; + } + + /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 ); + goto decode_success; + } + + /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) { + vassert(sz == 4); + delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 ); + goto decode_success; + } + + /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ + /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ + /* These just appear to be special cases of SHUFPS */ + if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { + IRTemp s1 = newTemp(Ity_I64); + IRTemp s0 = newTemp(Ity_I64); + IRTemp d1 = newTemp(Ity_I64); + IRTemp d0 = newTemp(Ity_I64); + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + Bool hi = toBool(insn[1] == 0x15); + + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("unpck%sps %s,%s\n", hi ? "h" : "l", + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); + assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); + + if (hi) { + putXMMReg( gregOfRM(modrm), + binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); + } else { + putXMMReg( gregOfRM(modrm), + binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); + } + + goto decode_success; + } + + /* 66 0F 57 = XORPD -- G = G and E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 ); + goto decode_success; + } + + /* 66 0F 6B = PACKSSDW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "packssdw", Iop_QNarrow32Sx4, True ); + goto decode_success; + } + + /* 66 0F 63 = PACKSSWB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "packsswb", Iop_QNarrow16Sx8, True ); + goto decode_success; + } + + /* 66 0F 67 = PACKUSWB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "packuswb", Iop_QNarrow16Ux8, True ); + goto decode_success; + } + + /* 66 0F FC = PADDB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddb", Iop_Add8x16, False ); + goto decode_success; + } + + /* 66 0F FE = PADDD */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddd", Iop_Add32x4, False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ + /* 0F D4 = PADDQ -- add 64x1 */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "paddq", False ); + goto decode_success; + } + + /* 66 0F D4 = PADDQ */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddq", Iop_Add64x2, False ); + goto decode_success; + } + + /* 66 0F FD = PADDW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddw", Iop_Add16x8, False ); + goto decode_success; + } + + /* 66 0F EC = PADDSB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddsb", Iop_QAdd8Sx16, False ); + goto decode_success; + } + + /* 66 0F ED = PADDSW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddsw", Iop_QAdd16Sx8, False ); + goto decode_success; + } + + /* 66 0F DC = PADDUSB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddusb", Iop_QAdd8Ux16, False ); + goto decode_success; + } + + /* 66 0F DD = PADDUSW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "paddusw", Iop_QAdd16Ux8, False ); + goto decode_success; + } + + /* 66 0F DB = PAND */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F DF = PANDN */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) { + delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 ); + goto decode_success; + } + + /* 66 0F E0 = PAVGB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pavgb", Iop_Avg8Ux16, False ); + goto decode_success; + } + + /* 66 0F E3 = PAVGW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pavgw", Iop_Avg16Ux8, False ); + goto decode_success; + } + + /* 66 0F 74 = PCMPEQB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pcmpeqb", Iop_CmpEQ8x16, False ); + goto decode_success; + } + + /* 66 0F 76 = PCMPEQD */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pcmpeqd", Iop_CmpEQ32x4, False ); + goto decode_success; + } + + /* 66 0F 75 = PCMPEQW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pcmpeqw", Iop_CmpEQ16x8, False ); + goto decode_success; + } + + /* 66 0F 64 = PCMPGTB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pcmpgtb", Iop_CmpGT8Sx16, False ); + goto decode_success; + } + + /* 66 0F 66 = PCMPGTD */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pcmpgtd", Iop_CmpGT32Sx4, False ); + goto decode_success; + } + + /* 66 0F 65 = PCMPGTW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pcmpgtw", Iop_CmpGT16Sx8, False ); + goto decode_success; + } + + /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put + zero-extend of it in ireg(G). */ + if (insn[0] == 0x0F && insn[1] == 0xC5) { + modrm = insn[2]; + if (sz == 2 && epartIsReg(modrm)) { + t5 = newTemp(Ity_V128); + t4 = newTemp(Ity_I16); + assign(t5, getXMMReg(eregOfRM(modrm))); + breakup128to32s( t5, &t3, &t2, &t1, &t0 ); + switch (insn[3] & 7) { + case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; + case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; + case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; + case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; + case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; + case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; + case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; + case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; + default: vassert(0); /*NOTREACHED*/ + } + putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4))); + DIP("pextrw $%d,%s,%s\n", + (Int)insn[3], nameXMMReg(eregOfRM(modrm)), + nameIReg(4,gregOfRM(modrm))); + delta += 4; + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and + put it into the specified lane of xmm(G). */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) { + Int lane; + t4 = newTemp(Ity_I16); + modrm = insn[2]; + + if (epartIsReg(modrm)) { + assign(t4, getIReg(2, eregOfRM(modrm))); + delta += 3+1; + lane = insn[3+1-1]; + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + nameIReg(2,eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 3+alen; + lane = insn[3+alen-1]; + assign(t4, loadLE(Ity_I16, mkexpr(addr))); + DIP("pinsrw $%d,%s,%s\n", (Int)lane, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) ); + goto decode_success; + } + + /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from + E(xmm or mem) to G(xmm) */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) { + IRTemp s1V = newTemp(Ity_V128); + IRTemp s2V = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp s1Hi = newTemp(Ity_I64); + IRTemp s1Lo = newTemp(Ity_I64); + IRTemp s2Hi = newTemp(Ity_I64); + IRTemp s2Lo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( s1V, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("pmaddwd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + assign( s2V, getXMMReg(gregOfRM(modrm)) ); + assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); + assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); + assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); + assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); + assign( dHi, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86g_calculate_mmx_pmaddwd", + &x86g_calculate_mmx_pmaddwd, + mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) + )); + assign( dLo, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86g_calculate_mmx_pmaddwd", + &x86g_calculate_mmx_pmaddwd, + mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) + )); + assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; + putXMMReg(gregOfRM(modrm), mkexpr(dV)); + goto decode_success; + } + + /* 66 0F EE = PMAXSW -- 16x8 signed max */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pmaxsw", Iop_Max16Sx8, False ); + goto decode_success; + } + + /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pmaxub", Iop_Max8Ux16, False ); + goto decode_success; + } + + /* 66 0F EA = PMINSW -- 16x8 signed min */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pminsw", Iop_Min16Sx8, False ); + goto decode_success; + } + + /* 66 0F DA = PMINUB -- 8x16 unsigned min */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pminub", Iop_Min8Ux16, False ); + goto decode_success; + } + + /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in + xmm(G), turn them into a byte, and put zero-extend of it in + ireg(G). Doing this directly is just too cumbersome; give up + therefore and call a helper. */ + /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) { + modrm = insn[2]; + if (epartIsReg(modrm)) { + t0 = newTemp(Ity_I64); + t1 = newTemp(Ity_I64); + assign(t0, getXMMRegLane64(eregOfRM(modrm), 0)); + assign(t1, getXMMRegLane64(eregOfRM(modrm), 1)); + t5 = newTemp(Ity_I32); + assign(t5, mkIRExprCCall( + Ity_I32, 0/*regparms*/, + "x86g_calculate_sse_pmovmskb", + &x86g_calculate_sse_pmovmskb, + mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); + putIReg(4, gregOfRM(modrm), mkexpr(t5)); + DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameIReg(4,gregOfRM(modrm))); + delta += 3; + goto decode_success; + } + /* else fall through */ + } + + /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pmulhuw", Iop_MulHi16Ux8, False ); + goto decode_success; + } + + /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pmulhw", Iop_MulHi16Sx8, False ); + goto decode_success; + } + + /* 66 0F D5 = PMULHL -- 16x8 multiply */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "pmullw", Iop_Mul16x8, False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ + /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x + 0 to form 64-bit result */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + t1 = newTemp(Ity_I32); + t0 = newTemp(Ity_I32); + modrm = insn[2]; + + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 2+alen; + DIP("pmuludq %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + assign( t0, unop(Iop_64to32, mkexpr(dV)) ); + assign( t1, unop(Iop_64to32, mkexpr(sV)) ); + putMMXReg( gregOfRM(modrm), + binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); + goto decode_success; + } + + /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x + 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit + half */ + /* This is a really poor translation -- could be improved if + performance critical */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) { + IRTemp sV, dV; + IRTemp s3, s2, s1, s0, d3, d2, d1, d0; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; + t1 = newTemp(Ity_I64); + t0 = newTemp(Ity_I64); + modrm = insn[2]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("pmuludq %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + breakup128to32s( dV, &d3, &d2, &d1, &d0 ); + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); + putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) ); + assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); + putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) ); + goto decode_success; + } + + /* 66 0F EB = POR */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 ); + goto decode_success; + } + + /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs + from E(xmm or mem) to G(xmm) */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) { + IRTemp s1V = newTemp(Ity_V128); + IRTemp s2V = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp s1Hi = newTemp(Ity_I64); + IRTemp s1Lo = newTemp(Ity_I64); + IRTemp s2Hi = newTemp(Ity_I64); + IRTemp s2Lo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( s1V, getXMMReg(eregOfRM(modrm)) ); + delta += 2+1; + DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 2+alen; + DIP("psadbw %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + assign( s2V, getXMMReg(gregOfRM(modrm)) ); + assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); + assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); + assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); + assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); + assign( dHi, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86g_calculate_mmx_psadbw", + &x86g_calculate_mmx_psadbw, + mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) + )); + assign( dLo, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "x86g_calculate_mmx_psadbw", + &x86g_calculate_mmx_psadbw, + mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) + )); + assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; + putXMMReg(gregOfRM(modrm), mkexpr(dV)); + goto decode_success; + } + + /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) { + Int order; + IRTemp sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + order = (Int)insn[3]; + delta += 2+2; + DIP("pshufd $%d,%s,%s\n", order, + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + order = (Int)insn[2+alen]; + delta += 3+alen; + DIP("pshufd $%d,%s,%s\n", order, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dV, + mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + putXMMReg(gregOfRM(modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or + mem) to G(xmm), and copy lower half */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) { + Int order; + IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + sVhi = newTemp(Ity_I64); + dVhi = newTemp(Ity_I64); + modrm = insn[3]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + order = (Int)insn[4]; + delta += 4+1; + DIP("pshufhw $%d,%s,%s\n", order, + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + order = (Int)insn[3+alen]; + delta += 4+alen; + DIP("pshufhw $%d,%s,%s\n", order, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); + breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); + + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dVhi, + mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + assign(dV, binop( Iop_64HLtoV128, + mkexpr(dVhi), + unop(Iop_V128to64, mkexpr(sV))) ); + putXMMReg(gregOfRM(modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or + mem) to G(xmm), and copy upper half */ + if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) { + Int order; + IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + sVlo = newTemp(Ity_I64); + dVlo = newTemp(Ity_I64); + modrm = insn[3]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + order = (Int)insn[4]; + delta += 4+1; + DIP("pshuflw $%d,%s,%s\n", order, + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + order = (Int)insn[3+alen]; + delta += 4+alen; + DIP("pshuflw $%d,%s,%s\n", order, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); + breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); + + # define SEL(n) \ + ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) + assign(dVlo, + mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), + SEL((order>>2)&3), SEL((order>>0)&3) ) + ); + assign(dV, binop( Iop_64HLtoV128, + unop(Iop_V128HIto64, mkexpr(sV)), + mkexpr(dVlo) ) ); + putXMMReg(gregOfRM(modrm), mkexpr(dV)); + # undef SEL + goto decode_success; + } + + /* 66 0F 72 /6 ib = PSLLD by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 6) { + delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 ); + goto decode_success; + } + + /* 66 0F F2 = PSLLD by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 ); + goto decode_success; + } + + /* 66 0F 73 /7 ib = PSLLDQ by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 7) { + IRTemp sV, dV, hi64, lo64, hi64r, lo64r; + Int imm = (Int)insn[3]; + Int reg = eregOfRM(insn[2]); + DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); + vassert(imm >= 0 && imm <= 255); + delta += 4; + + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + hi64 = newTemp(Ity_I64); + lo64 = newTemp(Ity_I64); + hi64r = newTemp(Ity_I64); + lo64r = newTemp(Ity_I64); + + if (imm >= 16) { + putXMMReg(reg, mkV128(0x0000)); + goto decode_success; + } + + assign( sV, getXMMReg(reg) ); + assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); + + if (imm == 0) { + assign( lo64r, mkexpr(lo64) ); + assign( hi64r, mkexpr(hi64) ); + } + else + if (imm == 8) { + assign( lo64r, mkU64(0) ); + assign( hi64r, mkexpr(lo64) ); + } + else + if (imm > 8) { + assign( lo64r, mkU64(0) ); + assign( hi64r, binop( Iop_Shl64, + mkexpr(lo64), + mkU8( 8*(imm-8) ) )); + } else { + assign( lo64r, binop( Iop_Shl64, + mkexpr(lo64), + mkU8(8 * imm) )); + assign( hi64r, + binop( Iop_Or64, + binop(Iop_Shl64, mkexpr(hi64), + mkU8(8 * imm)), + binop(Iop_Shr64, mkexpr(lo64), + mkU8(8 * (8 - imm)) ) + ) + ); + } + assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); + putXMMReg(reg, mkexpr(dV)); + goto decode_success; + } + + /* 66 0F 73 /6 ib = PSLLQ by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 6) { + delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 ); + goto decode_success; + } + + /* 66 0F F3 = PSLLQ by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 ); + goto decode_success; + } + + /* 66 0F 71 /6 ib = PSLLW by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 6) { + delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 ); + goto decode_success; + } + + /* 66 0F F1 = PSLLW by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 ); + goto decode_success; + } + + /* 66 0F 72 /4 ib = PSRAD by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 4) { + delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 ); + goto decode_success; + } + + /* 66 0F E2 = PSRAD by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 ); + goto decode_success; + } + + /* 66 0F 71 /4 ib = PSRAW by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 4) { + delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 ); + goto decode_success; + } + + /* 66 0F E1 = PSRAW by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 ); + goto decode_success; + } + + /* 66 0F 72 /2 ib = PSRLD by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 2) { + delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 ); + goto decode_success; + } + + /* 66 0F D2 = PSRLD by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 ); + goto decode_success; + } + + /* 66 0F 73 /3 ib = PSRLDQ by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 3) { + IRTemp sV, dV, hi64, lo64, hi64r, lo64r; + Int imm = (Int)insn[3]; + Int reg = eregOfRM(insn[2]); + DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); + vassert(imm >= 0 && imm <= 255); + delta += 4; + + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + hi64 = newTemp(Ity_I64); + lo64 = newTemp(Ity_I64); + hi64r = newTemp(Ity_I64); + lo64r = newTemp(Ity_I64); + + if (imm >= 16) { + putXMMReg(reg, mkV128(0x0000)); + goto decode_success; + } + + assign( sV, getXMMReg(reg) ); + assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); + + if (imm == 0) { + assign( lo64r, mkexpr(lo64) ); + assign( hi64r, mkexpr(hi64) ); + } + else + if (imm == 8) { + assign( hi64r, mkU64(0) ); + assign( lo64r, mkexpr(hi64) ); + } + else + if (imm > 8) { + assign( hi64r, mkU64(0) ); + assign( lo64r, binop( Iop_Shr64, + mkexpr(hi64), + mkU8( 8*(imm-8) ) )); + } else { + assign( hi64r, binop( Iop_Shr64, + mkexpr(hi64), + mkU8(8 * imm) )); + assign( lo64r, + binop( Iop_Or64, + binop(Iop_Shr64, mkexpr(lo64), + mkU8(8 * imm)), + binop(Iop_Shl64, mkexpr(hi64), + mkU8(8 * (8 - imm)) ) + ) + ); + } + + assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); + putXMMReg(reg, mkexpr(dV)); + goto decode_success; + } + + /* 66 0F 73 /2 ib = PSRLQ by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 2) { + delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 ); + goto decode_success; + } + + /* 66 0F D3 = PSRLQ by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 ); + goto decode_success; + } + + /* 66 0F 71 /2 ib = PSRLW by immediate */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 + && epartIsReg(insn[2]) + && gregOfRM(insn[2]) == 2) { + delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 ); + goto decode_success; + } + + /* 66 0F D1 = PSRLW by E */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) { + delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 ); + goto decode_success; + } + + /* 66 0F F8 = PSUBB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubb", Iop_Sub8x16, False ); + goto decode_success; + } + + /* 66 0F FA = PSUBD */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubd", Iop_Sub32x4, False ); + goto decode_success; + } + + /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ + /* 0F FB = PSUBQ -- sub 64x1 */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) { + do_MMX_preamble(); + delta = dis_MMXop_regmem_to_reg ( + sorb, delta+2, insn[1], "psubq", False ); + goto decode_success; + } + + /* 66 0F FB = PSUBQ */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubq", Iop_Sub64x2, False ); + goto decode_success; + } + + /* 66 0F F9 = PSUBW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubw", Iop_Sub16x8, False ); + goto decode_success; + } + + /* 66 0F E8 = PSUBSB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubsb", Iop_QSub8Sx16, False ); + goto decode_success; + } + + /* 66 0F E9 = PSUBSW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubsw", Iop_QSub16Sx8, False ); + goto decode_success; + } + + /* 66 0F D8 = PSUBSB */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubusb", Iop_QSub8Ux16, False ); + goto decode_success; + } + + /* 66 0F D9 = PSUBSW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "psubusw", Iop_QSub16Ux8, False ); + goto decode_success; + } + + /* 66 0F 68 = PUNPCKHBW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpckhbw", + Iop_InterleaveHI8x16, True ); + goto decode_success; + } + + /* 66 0F 6A = PUNPCKHDQ */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpckhdq", + Iop_InterleaveHI32x4, True ); + goto decode_success; + } + + /* 66 0F 6D = PUNPCKHQDQ */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpckhqdq", + Iop_InterleaveHI64x2, True ); + goto decode_success; + } + + /* 66 0F 69 = PUNPCKHWD */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpckhwd", + Iop_InterleaveHI16x8, True ); + goto decode_success; + } + + /* 66 0F 60 = PUNPCKLBW */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpcklbw", + Iop_InterleaveLO8x16, True ); + goto decode_success; + } + + /* 66 0F 62 = PUNPCKLDQ */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpckldq", + Iop_InterleaveLO32x4, True ); + goto decode_success; + } + + /* 66 0F 6C = PUNPCKLQDQ */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpcklqdq", + Iop_InterleaveLO64x2, True ); + goto decode_success; + } + + /* 66 0F 61 = PUNPCKLWD */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) { + delta = dis_SSEint_E_to_G( sorb, delta+2, + "punpcklwd", + Iop_InterleaveLO16x8, True ); + goto decode_success; + } + + /* 66 0F EF = PXOR */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) { + delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 ); + goto decode_success; + } + + //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ + //-- if (insn[0] == 0x0F && insn[1] == 0xAE + //-- && (!epartIsReg(insn[2])) + //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { + //-- Bool store = gregOfRM(insn[2]) == 0; + //-- vg_assert(sz == 4); + //-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); + //-- t1 = LOW24(pair); + //-- eip += 2+HI8(pair); + //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, + //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], + //-- Lit16, (UShort)insn[2], + //-- TempReg, t1 ); + //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); + //-- goto decode_success; + //-- } + + /* 0F AE /7 = CLFLUSH -- flush cache line */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { + + /* This is something of a hack. We need to know the size of the + cache line containing addr. Since we don't (easily), assume + 256 on the basis that no real cache would have a line that + big. It's safe to invalidate more stuff than we need, just + inefficient. */ + UInt lineszB = 256; + + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + delta += 2+alen; + + /* Round addr down to the start of the containing block. */ + stmt( IRStmt_Put( + OFFB_TISTART, + binop( Iop_And32, + mkexpr(addr), + mkU32( ~(lineszB-1) ))) ); + + stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) ); + + irsb->jumpkind = Ijk_TInval; + irsb->next = mkU32(guest_EIP_bbstart+delta); + dres.whatNext = Dis_StopHere; + + DIP("clflush %s\n", dis_buf); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSE2 decoder. --- */ + /* ---------------------------------------------------- */ + + /* ---------------------------------------------------- */ + /* --- start of the SSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /* Skip parts of the decoder which don't apply given the stated + guest subarchitecture. */ + /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */ + /* In fact this is highly bogus; we accept SSE3 insns even on a + SSE2-only guest since they turn into IR which can be re-emitted + successfully on an SSE2 host. */ + if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) + goto after_sse_decoders; /* no SSE3 capabilities */ + + insn = (UChar*)&guest_code[delta]; + + /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), + duplicating some lanes (2:2:0:0). */ + /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), + duplicating some lanes (3:3:1:1). */ + if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F + && (insn[2] == 0x12 || insn[2] == 0x16)) { + IRTemp s3, s2, s1, s0; + IRTemp sV = newTemp(Ity_V128); + Bool isH = insn[2] == 0x16; + s3 = s2 = s1 = s0 = IRTemp_INVALID; + + modrm = insn[3]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg( eregOfRM(modrm)) ); + DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', + dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + putXMMReg( gregOfRM(modrm), + isH ? mk128from32s( s3, s3, s1, s1 ) + : mk128from32s( s2, s2, s0, s0 ) ); + goto decode_success; + } + + /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), + duplicating some lanes (0:1:0:1). */ + if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) { + IRTemp sV = newTemp(Ity_V128); + IRTemp d0 = newTemp(Ity_I64); + + modrm = insn[3]; + if (epartIsReg(modrm)) { + assign( sV, getXMMReg( eregOfRM(modrm)) ); + DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); + DIP("movddup %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + + putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); + goto decode_success; + } + + /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ + if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) { + IRTemp a3, a2, a1, a0, s3, s2, s1, s0; + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp addV = newTemp(Ity_V128); + IRTemp subV = newTemp(Ity_V128); + a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; + + modrm = insn[3]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRM(modrm)) ); + DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("addsubps %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + + assign( gV, getXMMReg(gregOfRM(modrm)) ); + + assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); + assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); + + breakup128to32s( addV, &a3, &a2, &a1, &a0 ); + breakup128to32s( subV, &s3, &s2, &s1, &s0 ); + + putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 )); + goto decode_success; + } + + /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) { + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp addV = newTemp(Ity_V128); + IRTemp subV = newTemp(Ity_V128); + IRTemp a1 = newTemp(Ity_I64); + IRTemp s0 = newTemp(Ity_I64); + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRM(modrm)) ); + DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("addsubpd %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 2+alen; + } + + assign( gV, getXMMReg(gregOfRM(modrm)) ); + + assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); + assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); + + assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); + assign( s0, unop(Iop_V128to64, mkexpr(subV) )); + + putXMMReg( gregOfRM(modrm), + binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); + goto decode_success; + } + + /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ + /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ + if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F + && (insn[2] == 0x7C || insn[2] == 0x7D)) { + IRTemp e3, e2, e1, e0, g3, g2, g1, g0; + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp leftV = newTemp(Ity_V128); + IRTemp rightV = newTemp(Ity_V128); + Bool isAdd = insn[2] == 0x7C; + HChar* str = isAdd ? "add" : "sub"; + e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; + + modrm = insn[3]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRM(modrm)) ); + DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("h%sps %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + + assign( gV, getXMMReg(gregOfRM(modrm)) ); + + breakup128to32s( eV, &e3, &e2, &e1, &e0 ); + breakup128to32s( gV, &g3, &g2, &g1, &g0 ); + + assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); + assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); + + putXMMReg( gregOfRM(modrm), + binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, + mkexpr(leftV), mkexpr(rightV) ) ); + goto decode_success; + } + + /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ + /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ + if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { + IRTemp e1 = newTemp(Ity_I64); + IRTemp e0 = newTemp(Ity_I64); + IRTemp g1 = newTemp(Ity_I64); + IRTemp g0 = newTemp(Ity_I64); + IRTemp eV = newTemp(Ity_V128); + IRTemp gV = newTemp(Ity_V128); + IRTemp leftV = newTemp(Ity_V128); + IRTemp rightV = newTemp(Ity_V128); + Bool isAdd = insn[1] == 0x7C; + HChar* str = isAdd ? "add" : "sub"; + + modrm = insn[2]; + if (epartIsReg(modrm)) { + assign( eV, getXMMReg( eregOfRM(modrm)) ); + DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 2+1; + } else { + addr = disAMode ( &alen, sorb, delta+2, dis_buf ); + assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("h%spd %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 2+alen; + } + + assign( gV, getXMMReg(gregOfRM(modrm)) ); + + assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); + assign( e0, unop(Iop_V128to64, mkexpr(eV) )); + assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); + assign( g0, unop(Iop_V128to64, mkexpr(gV) )); + + assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); + assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); + + putXMMReg( gregOfRM(modrm), + binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, + mkexpr(leftV), mkexpr(rightV) ) ); + goto decode_success; + } + + /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ + if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) { + modrm = getIByte(delta+3); + if (epartIsReg(modrm)) { + goto decode_failure; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + putXMMReg( gregOfRM(modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("lddqu %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /* ---------------------------------------------------- */ + /* --- start of the SSSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and + Unsigned Bytes (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + IRTemp sVoddsSX = newTemp(Ity_I64); + IRTemp sVevensSX = newTemp(Ity_I64); + IRTemp dVoddsZX = newTemp(Ity_I64); + IRTemp dVevensZX = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmaddubsw %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + /* compute dV unsigned x sV signed */ + assign( sVoddsSX, + binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); + assign( sVevensSX, + binop(Iop_SarN16x4, + binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), + mkU8(8)) ); + assign( dVoddsZX, + binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); + assign( dVevensZX, + binop(Iop_ShrN16x4, + binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), + mkU8(8)) ); + + putMMXReg( + gregOfRM(modrm), + binop(Iop_QAdd16Sx4, + binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), + binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) + ) + ); + goto decode_success; + } + + /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and + Unsigned Bytes (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sVoddsSX = newTemp(Ity_V128); + IRTemp sVevensSX = newTemp(Ity_V128); + IRTemp dVoddsZX = newTemp(Ity_V128); + IRTemp dVevensZX = newTemp(Ity_V128); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmaddubsw %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + /* compute dV unsigned x sV signed */ + assign( sVoddsSX, + binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); + assign( sVevensSX, + binop(Iop_SarN16x8, + binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), + mkU8(8)) ); + assign( dVoddsZX, + binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); + assign( dVevensZX, + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), + mkU8(8)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_QAdd16Sx8, + binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), + binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) + ) + ); + goto decode_success; + } + + /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ + /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or + mmx) and G to G (mmx). */ + /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or + mmx) and G to G (mmx). */ + /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G + to G (mmx). */ + + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 + || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { + HChar* str = "???"; + IROp opV64 = Iop_INVALID; + IROp opCatO = Iop_CatOddLanes16x4; + IROp opCatE = Iop_CatEvenLanes16x4; + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + + switch (insn[2]) { + case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; + case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; + case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; + case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; + case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; + case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; + default: vassert(0); + } + if (insn[2] == 0x02 || insn[2] == 0x06) { + opCatO = Iop_InterleaveHI32x2; + opCatE = Iop_InterleaveLO32x2; + } + + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("ph%s %s,%s\n", str, dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + binop(opV64, + binop(opCatE,mkexpr(sV),mkexpr(dV)), + binop(opCatO,mkexpr(sV),mkexpr(dV)) + ) + ); + goto decode_success; + } + + /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or + xmm) and G to G (xmm). */ + /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or + xmm) and G to G (xmm). */ + /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and + G to G (xmm). */ + + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 + || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { + HChar* str = "???"; + IROp opV64 = Iop_INVALID; + IROp opCatO = Iop_CatOddLanes16x4; + IROp opCatE = Iop_CatEvenLanes16x4; + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + + modrm = insn[3]; + + switch (insn[2]) { + case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; + case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; + case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; + case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; + case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; + case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; + default: vassert(0); + } + if (insn[2] == 0x02 || insn[2] == 0x06) { + opCatO = Iop_InterleaveHI32x2; + opCatE = Iop_InterleaveLO32x2; + } + + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg( eregOfRM(modrm)) ); + DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("ph%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + /* This isn't a particularly efficient way to compute the + result, but at least it avoids a proliferation of IROps, + hence avoids complication all the backends. */ + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + binop(opV64, + binop(opCatE,mkexpr(sHi),mkexpr(sLo)), + binop(opCatO,mkexpr(sHi),mkexpr(sLo)) + ), + binop(opV64, + binop(opCatE,mkexpr(dHi),mkexpr(dLo)), + binop(opCatO,mkexpr(dHi),mkexpr(dLo)) + ) + ) + ); + goto decode_success; + } + + /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale + (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmulhrsw %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) + ); + goto decode_success; + } + + /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and + Scale (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmulhrsw %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), + dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) + ) + ); + goto decode_success; + } + + /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ + /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ + /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x08: laneszB = 1; str = "b"; break; + case 0x09: laneszB = 2; str = "w"; break; + case 0x0A: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("psign%s %s,%s\n", str, dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) + ); + goto decode_success; + } + + /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ + /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ + /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x08: laneszB = 1; str = "b"; break; + case 0x09: laneszB = 2; str = "w"; break; + case 0x0A: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("psign%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), + dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) + ) + ); + goto decode_success; + } + + /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ + /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ + /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { + IRTemp sV = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x1C: laneszB = 1; str = "b"; break; + case 0x1D: laneszB = 2; str = "w"; break; + case 0x1E: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + do_MMX_preamble(); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pabs%s %s,%s\n", str, dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + dis_PABS_helper( mkexpr(sV), laneszB ) + ); + goto decode_success; + } + + /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ + /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ + /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { + IRTemp sV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x1C: laneszB = 1; str = "b"; break; + case 0x1D: laneszB = 2; str = "w"; break; + case 0x1E: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pabs%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + dis_PABS_helper( mkexpr(sHi), laneszB ), + dis_PABS_helper( mkexpr(sLo), laneszB ) + ) + ); + goto decode_success; + } + + /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + IRTemp res = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + d32 = (UInt)insn[3+1]; + delta += 3+1+1; + DIP("palignr $%d,%s,%s\n", (Int)d32, + nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + d32 = (UInt)insn[3+alen]; + delta += 3+alen+1; + DIP("palignr $%d%s,%s\n", (Int)d32, + dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + if (d32 == 0) { + assign( res, mkexpr(sV) ); + } + else if (d32 >= 1 && d32 <= 7) { + assign(res, + binop(Iop_Or64, + binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)), + binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32)) + ))); + } + else if (d32 == 8) { + assign( res, mkexpr(dV) ); + } + else if (d32 >= 9 && d32 <= 15) { + assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) ); + } + else if (d32 >= 16 && d32 <= 255) { + assign( res, mkU64(0) ); + } + else + vassert(0); + + putMMXReg( gregOfRM(modrm), mkexpr(res) ); + goto decode_success; + } + + /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + d32 = (UInt)insn[3+1]; + delta += 3+1+1; + DIP("palignr $%d,%s,%s\n", (Int)d32, + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + d32 = (UInt)insn[3+alen]; + delta += 3+alen+1; + DIP("palignr $%d,%s,%s\n", (Int)d32, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + if (d32 == 0) { + assign( rHi, mkexpr(sHi) ); + assign( rLo, mkexpr(sLo) ); + } + else if (d32 >= 1 && d32 <= 7) { + assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) ); + assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) ); + } + else if (d32 == 8) { + assign( rHi, mkexpr(dLo) ); + assign( rLo, mkexpr(sHi) ); + } + else if (d32 >= 9 && d32 <= 15) { + assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) ); + assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) ); + } + else if (d32 == 16) { + assign( rHi, mkexpr(dHi) ); + assign( rLo, mkexpr(dLo) ); + } + else if (d32 >= 17 && d32 <= 23) { + assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) ); + assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) ); + } + else if (d32 == 24) { + assign( rHi, mkU64(0) ); + assign( rLo, mkexpr(dHi) ); + } + else if (d32 >= 25 && d32 <= 31) { + assign( rHi, mkU64(0) ); + assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) ); + } + else if (d32 >= 32 && d32 <= 255) { + assign( rHi, mkU64(0) ); + assign( rLo, mkU64(0) ); + } + else + vassert(0); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) + ); + goto decode_success; + } + + /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pshufb %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + binop( + Iop_And64, + /* permute the lanes */ + binop( + Iop_Perm8x8, + mkexpr(dV), + binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) + ), + /* mask off lanes which have (index & 0x80) == 0x80 */ + unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) + ) + ); + goto decode_success; + } + + /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + IRTemp sevens = newTemp(Ity_I64); + IRTemp mask0x80hi = newTemp(Ity_I64); + IRTemp mask0x80lo = newTemp(Ity_I64); + IRTemp maskBit3hi = newTemp(Ity_I64); + IRTemp maskBit3lo = newTemp(Ity_I64); + IRTemp sAnd7hi = newTemp(Ity_I64); + IRTemp sAnd7lo = newTemp(Ity_I64); + IRTemp permdHi = newTemp(Ity_I64); + IRTemp permdLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pshufb %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + assign( sevens, mkU64(0x0707070707070707ULL) ); + + /* + mask0x80hi = Not(SarN8x8(sHi,7)) + maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) + sAnd7hi = And(sHi,sevens) + permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), + And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) + rHi = And(permdHi,mask0x80hi) + */ + assign( + mask0x80hi, + unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); + + assign( + maskBit3hi, + binop(Iop_SarN8x8, + binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), + mkU8(7))); + + assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); + + assign( + permdHi, + binop( + Iop_Or64, + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), + mkexpr(maskBit3hi)), + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), + unop(Iop_Not64,mkexpr(maskBit3hi))) )); + + assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); + + /* And the same for the lower half of the result. What fun. */ + + assign( + mask0x80lo, + unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); + + assign( + maskBit3lo, + binop(Iop_SarN8x8, + binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), + mkU8(7))); + + assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); + + assign( + permdLo, + binop( + Iop_Or64, + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), + mkexpr(maskBit3lo)), + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), + unop(Iop_Not64,mkexpr(maskBit3lo))) )); + + assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) + ); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + after_sse_decoders: + + /* ---------------------------------------------------- */ + /* --- deal with misc 0x67 pfxs (addr size override) -- */ + /* ---------------------------------------------------- */ + + /* 67 E3 = JCXZ (for JECXZ see below) */ + if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) { + delta += 2; + d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); + delta ++; + stmt( IRStmt_Exit( + binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)), + Ijk_Boring, + IRConst_U32(d32) + )); + DIP("jcxz 0x%x\n", d32); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- start of the baseline insn decoder -- */ + /* ---------------------------------------------------- */ + + /* Get the primary opcode. */ + opc = getIByte(delta); delta++; + + /* We get here if the current insn isn't SSE, or this CPU doesn't + support SSE. */ + + switch (opc) { + + /* ------------------------ Control flow --------------- */ + + case 0xC2: /* RET imm16 */ + d32 = getUDisp16(delta); + delta += 2; + dis_ret(d32); + dres.whatNext = Dis_StopHere; + DIP("ret %d\n", (Int)d32); + break; + case 0xC3: /* RET */ + dis_ret(0); + dres.whatNext = Dis_StopHere; + DIP("ret\n"); + break; + + case 0xCF: /* IRET */ + /* Note, this is an extremely kludgey and limited implementation + of iret. All it really does is: + popl %EIP; popl %CS; popl %EFLAGS. + %CS is set but ignored (as it is in (eg) popw %cs)". */ + t1 = newTemp(Ity_I32); /* ESP */ + t2 = newTemp(Ity_I32); /* new EIP */ + t3 = newTemp(Ity_I32); /* new CS */ + t4 = newTemp(Ity_I32); /* new EFLAGS */ + assign(t1, getIReg(4,R_ESP)); + assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) ))); + assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) ))); + assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) ))); + /* Get stuff off stack */ + putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12))); + /* set %CS (which is ignored anyway) */ + putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) ); + /* set %EFLAGS */ + set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ ); + /* goto new EIP value */ + jmp_treg(Ijk_Ret,t2); + dres.whatNext = Dis_StopHere; + DIP("iret (very kludgey)\n"); + break; + + case 0xE8: /* CALL J4 */ + d32 = getUDisp32(delta); delta += 4; + d32 += (guest_EIP_bbstart+delta); + /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */ + if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58 + && getIByte(delta) <= 0x5F) { + /* Specially treat the position-independent-code idiom + call X + X: popl %reg + as + movl %eip, %reg. + since this generates better code, but for no other reason. */ + Int archReg = getIByte(delta) - 0x58; + /* vex_printf("-- fPIC thingy\n"); */ + putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta)); + delta++; /* Step over the POP */ + DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg)); + } else { + /* The normal sequence for a call. */ + t1 = newTemp(Ity_I32); + assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); + putIReg(4, R_ESP, mkexpr(t1)); + storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta)); + if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) { + /* follow into the call target. */ + dres.whatNext = Dis_Resteer; + dres.continueAt = (Addr64)(Addr32)d32; + } else { + jmp_lit(Ijk_Call,d32); + dres.whatNext = Dis_StopHere; + } + DIP("call 0x%x\n",d32); + } + break; + + //-- case 0xC8: /* ENTER */ + //-- d32 = getUDisp16(eip); eip += 2; + //-- abyte = getIByte(delta); delta++; + //-- + //-- vg_assert(sz == 4); + //-- vg_assert(abyte == 0); + //-- + //-- t1 = newTemp(cb); t2 = newTemp(cb); + //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); + //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); + //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); + //-- uLiteral(cb, sz); + //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); + //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); + //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); + //-- if (d32) { + //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); + //-- uLiteral(cb, d32); + //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); + //-- } + //-- DIP("enter 0x%x, 0x%x", d32, abyte); + //-- break; + + case 0xC9: /* LEAVE */ + vassert(sz == 4); + t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); + assign(t1, getIReg(4,R_EBP)); + /* First PUT ESP looks redundant, but need it because ESP must + always be up-to-date for Memcheck to work... */ + putIReg(4, R_ESP, mkexpr(t1)); + assign(t2, loadLE(Ity_I32,mkexpr(t1))); + putIReg(4, R_EBP, mkexpr(t2)); + putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) ); + DIP("leave\n"); + break; + + /* ---------------- Misc weird-ass insns --------------- */ + + case 0x27: /* DAA */ + case 0x2F: /* DAS */ + case 0x37: /* AAA */ + case 0x3F: /* AAS */ + /* An ugly implementation for some ugly instructions. Oh + well. */ + if (sz != 4) goto decode_failure; + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + /* Make up a 32-bit value (t1), with the old value of AX in the + bottom 16 bits, and the old OSZACP bitmask in the upper 16 + bits. */ + assign(t1, + binop(Iop_16HLto32, + unop(Iop_32to16, + mk_x86g_calculate_eflags_all()), + getIReg(2, R_EAX) + )); + /* Call the helper fn, to get a new AX and OSZACP value, and + poke both back into the guest state. Also pass the helper + the actual opcode so it knows which of the 4 instructions it + is doing the computation for. */ + vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F); + assign(t2, + mkIRExprCCall( + Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas", + &x86g_calculate_daa_das_aaa_aas, + mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) + )); + putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); + + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, + binop(Iop_And32, + binop(Iop_Shr32, mkexpr(t2), mkU8(16)), + mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P + | X86G_CC_MASK_A | X86G_CC_MASK_Z + | X86G_CC_MASK_S| X86G_CC_MASK_O ) + ) + ) + ); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + switch (opc) { + case 0x27: DIP("daa\n"); break; + case 0x2F: DIP("das\n"); break; + case 0x37: DIP("aaa\n"); break; + case 0x3F: DIP("aas\n"); break; + default: vassert(0); + } + break; + + //-- case 0xD4: /* AAM */ + //-- case 0xD5: /* AAD */ + //-- d32 = getIByte(delta); delta++; + //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !"); + //-- t1 = newTemp(cb); + //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); + //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */ + //-- uInstr1(cb, WIDEN, 4, TempReg, t1); + //-- uWiden(cb, 2, False); + //-- uInstr0(cb, CALLM_S, 0); + //-- uInstr1(cb, PUSH, 4, TempReg, t1); + //-- uInstr1(cb, CALLM, 0, Lit16, + //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) ); + //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty); + //-- uInstr1(cb, POP, 4, TempReg, t1); + //-- uInstr0(cb, CALLM_E, 0); + //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); + //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n"); + //-- break; + + /* ------------------------ CWD/CDQ -------------------- */ + + case 0x98: /* CBW */ + if (sz == 4) { + putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX))); + DIP("cwde\n"); + } else { + vassert(sz == 2); + putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX))); + DIP("cbw\n"); + } + break; + + case 0x99: /* CWD/CDQ */ + ty = szToITy(sz); + putIReg(sz, R_EDX, + binop(mkSizedOp(ty,Iop_Sar8), + getIReg(sz, R_EAX), + mkU8(sz == 2 ? 15 : 31)) ); + DIP(sz == 2 ? "cwdq\n" : "cdqq\n"); + break; + + /* ------------------------ FPU ops -------------------- */ + + case 0x9E: /* SAHF */ + codegen_SAHF(); + DIP("sahf\n"); + break; + + case 0x9F: /* LAHF */ + codegen_LAHF(); + DIP("lahf\n"); + break; + + case 0x9B: /* FWAIT */ + /* ignore? */ + DIP("fwait\n"); + break; + + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: { + Int delta0 = delta; + Bool decode_OK = False; + delta = dis_FPU ( &decode_OK, sorb, delta ); + if (!decode_OK) { + delta = delta0; + goto decode_failure; + } + break; + } + + /* ------------------------ INC & DEC ------------------ */ + + case 0x40: /* INC eAX */ + case 0x41: /* INC eCX */ + case 0x42: /* INC eDX */ + case 0x43: /* INC eBX */ + case 0x44: /* INC eSP */ + case 0x45: /* INC eBP */ + case 0x46: /* INC eSI */ + case 0x47: /* INC eDI */ + vassert(sz == 2 || sz == 4); + ty = szToITy(sz); + t1 = newTemp(ty); + assign( t1, binop(mkSizedOp(ty,Iop_Add8), + getIReg(sz, (UInt)(opc - 0x40)), + mkU(ty,1)) ); + setFlags_INC_DEC( True, t1, ty ); + putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1)); + DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40)); + break; + + case 0x48: /* DEC eAX */ + case 0x49: /* DEC eCX */ + case 0x4A: /* DEC eDX */ + case 0x4B: /* DEC eBX */ + case 0x4C: /* DEC eSP */ + case 0x4D: /* DEC eBP */ + case 0x4E: /* DEC eSI */ + case 0x4F: /* DEC eDI */ + vassert(sz == 2 || sz == 4); + ty = szToITy(sz); + t1 = newTemp(ty); + assign( t1, binop(mkSizedOp(ty,Iop_Sub8), + getIReg(sz, (UInt)(opc - 0x48)), + mkU(ty,1)) ); + setFlags_INC_DEC( False, t1, ty ); + putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1)); + DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48)); + break; + + /* ------------------------ INT ------------------------ */ + + case 0xCC: /* INT 3 */ + jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta); + dres.whatNext = Dis_StopHere; + DIP("int $0x3\n"); + break; + + case 0xCD: /* INT imm8 */ + d32 = getIByte(delta); delta++; + + /* For any of the cases where we emit a jump (that is, for all + currently handled cases), it's important that all ArchRegs + carry their up-to-date value at this point. So we declare an + end-of-block here, which forces any TempRegs caching ArchRegs + to be flushed. */ + + /* Handle int $0x40 .. $0x43 by synthesising a segfault and a + restart of this instruction (hence the "-2" two lines below, + to get the restart EIP to be this instruction. This is + probably Linux-specific and it would be more correct to only + do this if the VexAbiInfo says that is what we should do. */ + if (d32 >= 0x40 && d32 <= 0x43) { + jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2); + dres.whatNext = Dis_StopHere; + DIP("int $0x%x\n", (Int)d32); + break; + } + + /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82 + (darwin syscalls). As part of this, note where we are, so we + can back up the guest to this point if the syscall needs to + be restarted. */ + if (d32 == 0x80) { + stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, + mkU32(guest_EIP_curr_instr) ) ); + jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta); + dres.whatNext = Dis_StopHere; + DIP("int $0x80\n"); + break; + } + if (d32 == 0x81) { + stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, + mkU32(guest_EIP_curr_instr) ) ); + jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta); + dres.whatNext = Dis_StopHere; + DIP("int $0x81\n"); + break; + } + if (d32 == 0x82) { + stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, + mkU32(guest_EIP_curr_instr) ) ); + jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta); + dres.whatNext = Dis_StopHere; + DIP("int $0x82\n"); + break; + } + + /* none of the above */ + goto decode_failure; + + /* ------------------------ Jcond, byte offset --------- */ + + case 0xEB: /* Jb (jump, byte offset) */ + d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); + delta++; + if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { + dres.whatNext = Dis_Resteer; + dres.continueAt = (Addr64)(Addr32)d32; + } else { + jmp_lit(Ijk_Boring,d32); + dres.whatNext = Dis_StopHere; + } + DIP("jmp-8 0x%x\n", d32); + break; + + case 0xE9: /* Jv (jump, 16/32 offset) */ + vassert(sz == 4); /* JRS added 2004 July 11 */ + d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta); + delta += sz; + if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { + dres.whatNext = Dis_Resteer; + dres.continueAt = (Addr64)(Addr32)d32; + } else { + jmp_lit(Ijk_Boring,d32); + dres.whatNext = Dis_StopHere; + } + DIP("jmp 0x%x\n", d32); + break; + + case 0x70: + case 0x71: + case 0x72: /* JBb/JNAEb (jump below) */ + case 0x73: /* JNBb/JAEb (jump not below) */ + case 0x74: /* JZb/JEb (jump zero) */ + case 0x75: /* JNZb/JNEb (jump not zero) */ + case 0x76: /* JBEb/JNAb (jump below or equal) */ + case 0x77: /* JNBEb/JAb (jump not below or equal) */ + case 0x78: /* JSb (jump negative) */ + case 0x79: /* JSb (jump not negative) */ + case 0x7A: /* JP (jump parity even) */ + case 0x7B: /* JNP/JPO (jump parity odd) */ + case 0x7C: /* JLb/JNGEb (jump less) */ + case 0x7D: /* JGEb/JNLb (jump greater or equal) */ + case 0x7E: /* JLEb/JNGb (jump less or equal) */ + case 0x7F: /* JGb/JNLEb (jump greater) */ + d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); + delta++; + if (0 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { + /* Unused experimental hack: speculatively follow one arm + of a conditional branch. */ + /* Assume the branch is taken. So we need to emit a + side-exit to the insn following this one, on the negation + of the condition, and continue at the branch target + address (d32). */ + if (0) vex_printf("resteer\n"); + stmt( IRStmt_Exit( + mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))), + Ijk_Boring, + IRConst_U32(guest_EIP_bbstart+delta) ) ); + dres.whatNext = Dis_Resteer; + dres.continueAt = (Addr64)(Addr32)d32; + } else { + jcc_01((X86Condcode)(opc - 0x70), (Addr32)(guest_EIP_bbstart+delta), d32); + dres.whatNext = Dis_StopHere; + } + DIP("j%s-8 0x%x\n", name_X86Condcode(opc - 0x70), d32); + break; + + case 0xE3: /* JECXZ (for JCXZ see above) */ + if (sz != 4) goto decode_failure; + d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); + delta ++; + stmt( IRStmt_Exit( + binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)), + Ijk_Boring, + IRConst_U32(d32) + )); + DIP("jecxz 0x%x\n", d32); + break; + + case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ + case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ + case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ + { /* Again, the docs say this uses ECX/CX as a count depending on + the address size override, not the operand one. Since we + don't handle address size overrides, I guess that means + ECX. */ + IRExpr* zbit = NULL; + IRExpr* count = NULL; + IRExpr* cond = NULL; + HChar* xtra = NULL; + + if (sz != 4) goto decode_failure; + d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); + delta++; + putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1))); + + count = getIReg(4,R_ECX); + cond = binop(Iop_CmpNE32, count, mkU32(0)); + switch (opc) { + case 0xE2: + xtra = ""; + break; + case 0xE1: + xtra = "e"; + zbit = mk_x86g_calculate_condition( X86CondZ ); + cond = mkAnd1(cond, zbit); + break; + case 0xE0: + xtra = "ne"; + zbit = mk_x86g_calculate_condition( X86CondNZ ); + cond = mkAnd1(cond, zbit); + break; + default: + vassert(0); + } + stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) ); + + DIP("loop%s 0x%x\n", xtra, d32); + break; + } + + /* ------------------------ IMUL ----------------------- */ + + case 0x69: /* IMUL Iv, Ev, Gv */ + delta = dis_imul_I_E_G ( sorb, sz, delta, sz ); + break; + case 0x6B: /* IMUL Ib, Ev, Gv */ + delta = dis_imul_I_E_G ( sorb, sz, delta, 1 ); + break; + + /* ------------------------ MOV ------------------------ */ + + case 0x88: /* MOV Gb,Eb */ + delta = dis_mov_G_E(sorb, 1, delta); + break; + + case 0x89: /* MOV Gv,Ev */ + delta = dis_mov_G_E(sorb, sz, delta); + break; + + case 0x8A: /* MOV Eb,Gb */ + delta = dis_mov_E_G(sorb, 1, delta); + break; + + case 0x8B: /* MOV Ev,Gv */ + delta = dis_mov_E_G(sorb, sz, delta); + break; + + case 0x8D: /* LEA M,Gv */ + if (sz != 4) + goto decode_failure; + modrm = getIByte(delta); + if (epartIsReg(modrm)) + goto decode_failure; + /* NOTE! this is the one place where a segment override prefix + has no effect on the address calculation. Therefore we pass + zero instead of sorb here. */ + addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf ); + delta += alen; + putIReg(sz, gregOfRM(modrm), mkexpr(addr)); + DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, + nameIReg(sz,gregOfRM(modrm))); + break; + + case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ + delta = dis_mov_Sw_Ew(sorb, sz, delta); + break; + + case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ + delta = dis_mov_Ew_Sw(sorb, delta); + break; + + case 0xA0: /* MOV Ob,AL */ + sz = 1; + /* Fall through ... */ + case 0xA1: /* MOV Ov,eAX */ + d32 = getUDisp32(delta); delta += 4; + ty = szToITy(sz); + addr = newTemp(Ity_I32); + assign( addr, handleSegOverride(sorb, mkU32(d32)) ); + putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr))); + DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb), + d32, nameIReg(sz,R_EAX)); + break; + + case 0xA2: /* MOV Ob,AL */ + sz = 1; + /* Fall through ... */ + case 0xA3: /* MOV eAX,Ov */ + d32 = getUDisp32(delta); delta += 4; + ty = szToITy(sz); + addr = newTemp(Ity_I32); + assign( addr, handleSegOverride(sorb, mkU32(d32)) ); + storeLE( mkexpr(addr), getIReg(sz,R_EAX) ); + DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX), + sorbTxt(sorb), d32); + break; + + case 0xB0: /* MOV imm,AL */ + case 0xB1: /* MOV imm,CL */ + case 0xB2: /* MOV imm,DL */ + case 0xB3: /* MOV imm,BL */ + case 0xB4: /* MOV imm,AH */ + case 0xB5: /* MOV imm,CH */ + case 0xB6: /* MOV imm,DH */ + case 0xB7: /* MOV imm,BH */ + d32 = getIByte(delta); delta += 1; + putIReg(1, opc-0xB0, mkU8(d32)); + DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0)); + break; + + case 0xB8: /* MOV imm,eAX */ + case 0xB9: /* MOV imm,eCX */ + case 0xBA: /* MOV imm,eDX */ + case 0xBB: /* MOV imm,eBX */ + case 0xBC: /* MOV imm,eSP */ + case 0xBD: /* MOV imm,eBP */ + case 0xBE: /* MOV imm,eSI */ + case 0xBF: /* MOV imm,eDI */ + d32 = getUDisp(sz,delta); delta += sz; + putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32)); + DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8)); + break; + + case 0xC6: /* MOV Ib,Eb */ + sz = 1; + goto do_Mov_I_E; + case 0xC7: /* MOV Iv,Ev */ + goto do_Mov_I_E; + + do_Mov_I_E: + modrm = getIByte(delta); + if (epartIsReg(modrm)) { + delta++; /* mod/rm byte */ + d32 = getUDisp(sz,delta); delta += sz; + putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32)); + DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, + nameIReg(sz,eregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + d32 = getUDisp(sz,delta); delta += sz; + storeLE(mkexpr(addr), mkU(szToITy(sz), d32)); + DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); + } + break; + + /* ------------------------ opl imm, A ----------------- */ + + case 0x04: /* ADD Ib, AL */ + delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); + break; + case 0x05: /* ADD Iv, eAX */ + delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" ); + break; + + case 0x0C: /* OR Ib, AL */ + delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); + break; + case 0x0D: /* OR Iv, eAX */ + delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); + break; + + case 0x14: /* ADC Ib, AL */ + delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); + break; + case 0x15: /* ADC Iv, eAX */ + delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); + break; + + case 0x1C: /* SBB Ib, AL */ + delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); + break; + case 0x1D: /* SBB Iv, eAX */ + delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); + break; + + case 0x24: /* AND Ib, AL */ + delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); + break; + case 0x25: /* AND Iv, eAX */ + delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); + break; + + case 0x2C: /* SUB Ib, AL */ + delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" ); + break; + case 0x2D: /* SUB Iv, eAX */ + delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); + break; + + case 0x34: /* XOR Ib, AL */ + delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); + break; + case 0x35: /* XOR Iv, eAX */ + delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); + break; + + case 0x3C: /* CMP Ib, AL */ + delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); + break; + case 0x3D: /* CMP Iv, eAX */ + delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); + break; + + case 0xA8: /* TEST Ib, AL */ + delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); + break; + case 0xA9: /* TEST Iv, eAX */ + delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); + break; + + /* ------------------------ opl Ev, Gv ----------------- */ + + case 0x02: /* ADD Eb,Gb */ + delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" ); + break; + case 0x03: /* ADD Ev,Gv */ + delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" ); + break; + + case 0x0A: /* OR Eb,Gb */ + delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" ); + break; + case 0x0B: /* OR Ev,Gv */ + delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" ); + break; + + case 0x12: /* ADC Eb,Gb */ + delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" ); + break; + case 0x13: /* ADC Ev,Gv */ + delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" ); + break; + + case 0x1A: /* SBB Eb,Gb */ + delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" ); + break; + case 0x1B: /* SBB Ev,Gv */ + delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" ); + break; + + case 0x22: /* AND Eb,Gb */ + delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" ); + break; + case 0x23: /* AND Ev,Gv */ + delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" ); + break; + + case 0x2A: /* SUB Eb,Gb */ + delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" ); + break; + case 0x2B: /* SUB Ev,Gv */ + delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" ); + break; + + case 0x32: /* XOR Eb,Gb */ + delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" ); + break; + case 0x33: /* XOR Ev,Gv */ + delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" ); + break; + + case 0x3A: /* CMP Eb,Gb */ + delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" ); + break; + case 0x3B: /* CMP Ev,Gv */ + delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" ); + break; + + case 0x84: /* TEST Eb,Gb */ + delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" ); + break; + case 0x85: /* TEST Ev,Gv */ + delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" ); + break; + + /* ------------------------ opl Gv, Ev ----------------- */ + + case 0x00: /* ADD Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Add8, True, 1, delta, "add" ); + break; + case 0x01: /* ADD Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Add8, True, sz, delta, "add" ); + break; + + case 0x08: /* OR Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Or8, True, 1, delta, "or" ); + break; + case 0x09: /* OR Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Or8, True, sz, delta, "or" ); + break; + + case 0x10: /* ADC Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, True, + Iop_Add8, True, 1, delta, "adc" ); + break; + case 0x11: /* ADC Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, True, + Iop_Add8, True, sz, delta, "adc" ); + break; + + case 0x18: /* SBB Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, True, + Iop_Sub8, True, 1, delta, "sbb" ); + break; + case 0x19: /* SBB Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, True, + Iop_Sub8, True, sz, delta, "sbb" ); + break; + + case 0x20: /* AND Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_And8, True, 1, delta, "and" ); + break; + case 0x21: /* AND Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_And8, True, sz, delta, "and" ); + break; + + case 0x28: /* SUB Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Sub8, True, 1, delta, "sub" ); + break; + case 0x29: /* SUB Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Sub8, True, sz, delta, "sub" ); + break; + + case 0x30: /* XOR Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Xor8, True, 1, delta, "xor" ); + break; + case 0x31: /* XOR Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Xor8, True, sz, delta, "xor" ); + break; + + case 0x38: /* CMP Gb,Eb */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Sub8, False, 1, delta, "cmp" ); + break; + case 0x39: /* CMP Gv,Ev */ + delta = dis_op2_G_E ( sorb, pfx_lock, False, + Iop_Sub8, False, sz, delta, "cmp" ); + break; + + /* ------------------------ POP ------------------------ */ + + case 0x58: /* POP eAX */ + case 0x59: /* POP eCX */ + case 0x5A: /* POP eDX */ + case 0x5B: /* POP eBX */ + case 0x5D: /* POP eBP */ + case 0x5E: /* POP eSI */ + case 0x5F: /* POP eDI */ + case 0x5C: /* POP eSP */ + vassert(sz == 2 || sz == 4); + t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32); + assign(t2, getIReg(4, R_ESP)); + assign(t1, loadLE(szToITy(sz),mkexpr(t2))); + putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); + putIReg(sz, opc-0x58, mkexpr(t1)); + DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58)); + break; + + case 0x9D: /* POPF */ + vassert(sz == 2 || sz == 4); + t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); + assign(t2, getIReg(4, R_ESP)); + assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2)))); + putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); + + /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the + value in t1. */ + set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/, + ((Addr32)guest_EIP_bbstart)+delta ); + + DIP("popf%c\n", nameISize(sz)); + break; + + case 0x61: /* POPA */ + /* This is almost certainly wrong for sz==2. So ... */ + if (sz != 4) goto decode_failure; + + /* t5 is the old %ESP value. */ + t5 = newTemp(Ity_I32); + assign( t5, getIReg(4, R_ESP) ); + + /* Reload all the registers, except %esp. */ + putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); + putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); + putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); + putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); + /* ignore saved %ESP */ + putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); + putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); + putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); + + /* and move %ESP back up */ + putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); + + DIP("popa%c\n", nameISize(sz)); + break; + + case 0x8F: /* POPL/POPW m32 */ + { Int len; + UChar rm = getIByte(delta); + + /* make sure this instruction is correct POP */ + if (epartIsReg(rm) || gregOfRM(rm) != 0) + goto decode_failure; + /* and has correct size */ + if (sz != 4 && sz != 2) + goto decode_failure; + ty = szToITy(sz); + + t1 = newTemp(Ity_I32); /* stack address */ + t3 = newTemp(ty); /* data */ + /* set t1 to ESP: t1 = ESP */ + assign( t1, getIReg(4, R_ESP) ); + /* load M[ESP] to virtual register t3: t3 = M[t1] */ + assign( t3, loadLE(ty, mkexpr(t1)) ); + + /* increase ESP; must be done before the STORE. Intel manual says: + If the ESP register is used as a base register for addressing + a destination operand in memory, the POP instruction computes + the effective address of the operand after it increments the + ESP register. + */ + putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) ); + + /* resolve MODR/M */ + addr = disAMode ( &len, sorb, delta, dis_buf); + storeLE( mkexpr(addr), mkexpr(t3) ); + + DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf); + + delta += len; + break; + } + + case 0x1F: /* POP %DS */ + dis_pop_segreg( R_DS, sz ); break; + case 0x07: /* POP %ES */ + dis_pop_segreg( R_ES, sz ); break; + case 0x17: /* POP %SS */ + dis_pop_segreg( R_SS, sz ); break; + + /* ------------------------ PUSH ----------------------- */ + + case 0x50: /* PUSH eAX */ + case 0x51: /* PUSH eCX */ + case 0x52: /* PUSH eDX */ + case 0x53: /* PUSH eBX */ + case 0x55: /* PUSH eBP */ + case 0x56: /* PUSH eSI */ + case 0x57: /* PUSH eDI */ + case 0x54: /* PUSH eSP */ + /* This is the Right Way, in that the value to be pushed is + established before %esp is changed, so that pushl %esp + correctly pushes the old value. */ + vassert(sz == 2 || sz == 4); + ty = sz==2 ? Ity_I16 : Ity_I32; + t1 = newTemp(ty); t2 = newTemp(Ity_I32); + assign(t1, getIReg(sz, opc-0x50)); + assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz))); + putIReg(4, R_ESP, mkexpr(t2) ); + storeLE(mkexpr(t2),mkexpr(t1)); + DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50)); + break; + + + case 0x68: /* PUSH Iv */ + d32 = getUDisp(sz,delta); delta += sz; + goto do_push_I; + case 0x6A: /* PUSH Ib, sign-extended to sz */ + d32 = getSDisp8(delta); delta += 1; + goto do_push_I; + do_push_I: + ty = szToITy(sz); + t1 = newTemp(Ity_I32); t2 = newTemp(ty); + assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); + putIReg(4, R_ESP, mkexpr(t1) ); + /* stop mkU16 asserting if d32 is a negative 16-bit number + (bug #132813) */ + if (ty == Ity_I16) + d32 &= 0xFFFF; + storeLE( mkexpr(t1), mkU(ty,d32) ); + DIP("push%c $0x%x\n", nameISize(sz), d32); + break; + + case 0x9C: /* PUSHF */ { + vassert(sz == 2 || sz == 4); + + t1 = newTemp(Ity_I32); + assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); + putIReg(4, R_ESP, mkexpr(t1) ); + + /* Calculate OSZACP, and patch in fixed fields as per + Intel docs. + - bit 1 is always 1 + - bit 9 is Interrupt Enable (should always be 1 in user mode?) + */ + t2 = newTemp(Ity_I32); + assign( t2, binop(Iop_Or32, + mk_x86g_calculate_eflags_all(), + mkU32( (1<<1)|(1<<9) ) )); + + /* Patch in the D flag. This can simply be a copy of bit 10 of + baseBlock[OFFB_DFLAG]. */ + t3 = newTemp(Ity_I32); + assign( t3, binop(Iop_Or32, + mkexpr(t2), + binop(Iop_And32, + IRExpr_Get(OFFB_DFLAG,Ity_I32), + mkU32(1<<10))) + ); + + /* And patch in the ID flag. */ + t4 = newTemp(Ity_I32); + assign( t4, binop(Iop_Or32, + mkexpr(t3), + binop(Iop_And32, + binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32), + mkU8(21)), + mkU32(1<<21))) + ); + + /* And patch in the AC flag. */ + t5 = newTemp(Ity_I32); + assign( t5, binop(Iop_Or32, + mkexpr(t4), + binop(Iop_And32, + binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32), + mkU8(18)), + mkU32(1<<18))) + ); + + /* if sz==2, the stored value needs to be narrowed. */ + if (sz == 2) + storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) ); + else + storeLE( mkexpr(t1), mkexpr(t5) ); + + DIP("pushf%c\n", nameISize(sz)); + break; + } + + case 0x60: /* PUSHA */ + /* This is almost certainly wrong for sz==2. So ... */ + if (sz != 4) goto decode_failure; + + /* This is the Right Way, in that the value to be pushed is + established before %esp is changed, so that pusha + correctly pushes the old %esp value. New value of %esp is + pushed at start. */ + /* t0 is the %ESP value we're going to push. */ + t0 = newTemp(Ity_I32); + assign( t0, getIReg(4, R_ESP) ); + + /* t5 will be the new %ESP value. */ + t5 = newTemp(Ity_I32); + assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); + + /* Update guest state before prodding memory. */ + putIReg(4, R_ESP, mkexpr(t5)); + + /* Dump all the registers. */ + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); + storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); + + DIP("pusha%c\n", nameISize(sz)); + break; + + case 0x0E: /* PUSH %CS */ + dis_push_segreg( R_CS, sz ); break; + case 0x1E: /* PUSH %DS */ + dis_push_segreg( R_DS, sz ); break; + case 0x06: /* PUSH %ES */ + dis_push_segreg( R_ES, sz ); break; + case 0x16: /* PUSH %SS */ + dis_push_segreg( R_SS, sz ); break; + + /* ------------------------ SCAS et al ----------------- */ + + case 0xA4: /* MOVS, no REP prefix */ + case 0xA5: + if (sorb != 0) + goto decode_failure; /* else dis_string_op asserts */ + dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); + break; + + case 0xA6: /* CMPSb, no REP prefix */ + case 0xA7: + if (sorb != 0) + goto decode_failure; /* else dis_string_op asserts */ + dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); + break; + + case 0xAA: /* STOS, no REP prefix */ + case 0xAB: + if (sorb != 0) + goto decode_failure; /* else dis_string_op asserts */ + dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb ); + break; + + case 0xAC: /* LODS, no REP prefix */ + case 0xAD: + if (sorb != 0) + goto decode_failure; /* else dis_string_op asserts */ + dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb ); + break; + + case 0xAE: /* SCAS, no REP prefix */ + case 0xAF: + if (sorb != 0) + goto decode_failure; /* else dis_string_op asserts */ + dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); + break; + + + case 0xFC: /* CLD */ + stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) ); + DIP("cld\n"); + break; + + case 0xFD: /* STD */ + stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) ); + DIP("std\n"); + break; + + case 0xF8: /* CLC */ + case 0xF9: /* STC */ + case 0xF5: /* CMC */ + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_I32); + assign( t0, mk_x86g_calculate_eflags_all() ); + switch (opc) { + case 0xF8: + assign( t1, binop(Iop_And32, mkexpr(t0), + mkU32(~X86G_CC_MASK_C))); + DIP("clc\n"); + break; + case 0xF9: + assign( t1, binop(Iop_Or32, mkexpr(t0), + mkU32(X86G_CC_MASK_C))); + DIP("stc\n"); + break; + case 0xF5: + assign( t1, binop(Iop_Xor32, mkexpr(t0), + mkU32(X86G_CC_MASK_C))); + DIP("cmc\n"); + break; + default: + vpanic("disInstr(x86)(clc/stc/cmc)"); + } + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); + /* Set NDEP even though it isn't used. This makes redundant-PUT + elimination of previous stores to this field work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + break; + + case 0xD6: /* SALC */ + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_I32); + assign( t0, binop(Iop_And32, + mk_x86g_calculate_eflags_c(), + mkU32(1)) ); + assign( t1, binop(Iop_Sar32, + binop(Iop_Shl32, mkexpr(t0), mkU8(31)), + mkU8(31)) ); + putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) ); + DIP("salc\n"); + break; + + /* REPNE prefix insn */ + case 0xF2: { + Addr32 eip_orig = guest_EIP_bbstart + delta_start; + if (sorb != 0) goto decode_failure; + abyte = getIByte(delta); delta++; + + if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } + dres.whatNext = Dis_StopHere; + + switch (abyte) { + /* According to the Intel manual, "repne movs" should never occur, but + * in practice it has happened, so allow for it here... */ + case 0xA4: sz = 1; /* REPNE MOVS */ + case 0xA5: + dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne movs" ); + break; + + case 0xA6: sz = 1; /* REPNE CMP */ + case 0xA7: + dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne cmps" ); + break; + + case 0xAA: sz = 1; /* REPNE STOS */ + case 0xAB: + dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne stos" ); + break; + + case 0xAE: sz = 1; /* REPNE SCAS */ + case 0xAF: + dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne scas" ); + break; + + default: + goto decode_failure; + } + break; + } + + /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE, + for the rest, it means REP) */ + case 0xF3: { + Addr32 eip_orig = guest_EIP_bbstart + delta_start; + if (sorb != 0) goto decode_failure; + abyte = getIByte(delta); delta++; + + if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } + dres.whatNext = Dis_StopHere; + + switch (abyte) { + case 0xA4: sz = 1; /* REP MOVS */ + case 0xA5: + dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig, + guest_EIP_bbstart+delta, "rep movs" ); + break; + + case 0xA6: sz = 1; /* REPE CMP */ + case 0xA7: + dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig, + guest_EIP_bbstart+delta, "repe cmps" ); + break; + + case 0xAA: sz = 1; /* REP STOS */ + case 0xAB: + dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig, + guest_EIP_bbstart+delta, "rep stos" ); + break; + + case 0xAC: sz = 1; /* REP LODS */ + case 0xAD: + dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig, + guest_EIP_bbstart+delta, "rep lods" ); + break; + + case 0xAE: sz = 1; /* REPE SCAS */ + case 0xAF: + dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig, + guest_EIP_bbstart+delta, "repe scas" ); + break; + + case 0x90: /* REP NOP (PAUSE) */ + /* a hint to the P4 re spin-wait loop */ + DIP("rep nop (P4 pause)\n"); + /* "observe" the hint. The Vex client needs to be careful not + to cause very long delays as a result, though. */ + jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta); + dres.whatNext = Dis_StopHere; + break; + + case 0xC3: /* REP RET -- same as normal ret? */ + dis_ret(0); + dres.whatNext = Dis_StopHere; + DIP("rep ret\n"); + break; + + default: + goto decode_failure; + } + break; + } + + /* ------------------------ XCHG ----------------------- */ + + /* XCHG reg,mem automatically asserts LOCK# even without a LOCK + prefix; hence it must be translated with an IRCAS (at least, the + memory variant). */ + case 0x86: /* XCHG Gb,Eb */ + sz = 1; + /* Fall through ... */ + case 0x87: /* XCHG Gv,Ev */ + modrm = getIByte(delta); + ty = szToITy(sz); + t1 = newTemp(ty); t2 = newTemp(ty); + if (epartIsReg(modrm)) { + assign(t1, getIReg(sz, eregOfRM(modrm))); + assign(t2, getIReg(sz, gregOfRM(modrm))); + putIReg(sz, gregOfRM(modrm), mkexpr(t1)); + putIReg(sz, eregOfRM(modrm), mkexpr(t2)); + delta++; + DIP("xchg%c %s, %s\n", + nameISize(sz), nameIReg(sz,gregOfRM(modrm)), + nameIReg(sz,eregOfRM(modrm))); + } else { + *expect_CAS = True; + addr = disAMode ( &alen, sorb, delta, dis_buf ); + assign( t1, loadLE(ty,mkexpr(addr)) ); + assign( t2, getIReg(sz,gregOfRM(modrm)) ); + casLE( mkexpr(addr), + mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); + putIReg( sz, gregOfRM(modrm), mkexpr(t1) ); + delta += alen; + DIP("xchg%c %s, %s\n", nameISize(sz), + nameIReg(sz,gregOfRM(modrm)), dis_buf); + } + break; + + case 0x90: /* XCHG eAX,eAX */ + DIP("nop\n"); + break; + case 0x91: /* XCHG eAX,eCX */ + case 0x92: /* XCHG eAX,eDX */ + case 0x93: /* XCHG eAX,eBX */ + case 0x94: /* XCHG eAX,eSP */ + case 0x95: /* XCHG eAX,eBP */ + case 0x96: /* XCHG eAX,eSI */ + case 0x97: /* XCHG eAX,eDI */ + codegen_xchg_eAX_Reg ( sz, opc - 0x90 ); + break; + + /* ------------------------ XLAT ----------------------- */ + + case 0xD7: /* XLAT */ + if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */ + putIReg( + 1, + R_EAX/*AL*/, + loadLE(Ity_I8, + handleSegOverride( + sorb, + binop(Iop_Add32, + getIReg(4, R_EBX), + unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/)))))); + + DIP("xlat%c [ebx]\n", nameISize(sz)); + break; + + /* ------------------------ IN / OUT ----------------------- */ + + case 0xE4: /* IN imm8, AL */ + sz = 1; + t1 = newTemp(Ity_I32); + abyte = getIByte(delta); delta++; + assign(t1, mkU32( abyte & 0xFF )); + DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); + goto do_IN; + case 0xE5: /* IN imm8, eAX */ + vassert(sz == 2 || sz == 4); + t1 = newTemp(Ity_I32); + abyte = getIByte(delta); delta++; + assign(t1, mkU32( abyte & 0xFF )); + DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); + goto do_IN; + case 0xEC: /* IN %DX, AL */ + sz = 1; + t1 = newTemp(Ity_I32); + assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); + DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), + nameIReg(sz,R_EAX)); + goto do_IN; + case 0xED: /* IN %DX, eAX */ + vassert(sz == 2 || sz == 4); + t1 = newTemp(Ity_I32); + assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); + DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), + nameIReg(sz,R_EAX)); + goto do_IN; + do_IN: { + /* At this point, sz indicates the width, and t1 is a 32-bit + value giving port number. */ + IRDirty* d; + vassert(sz == 1 || sz == 2 || sz == 4); + ty = szToITy(sz); + t2 = newTemp(Ity_I32); + d = unsafeIRDirty_1_N( + t2, + 0/*regparms*/, + "x86g_dirtyhelper_IN", + &x86g_dirtyhelper_IN, + mkIRExprVec_2( mkexpr(t1), mkU32(sz) ) + ); + /* do the call, dumping the result in t2. */ + stmt( IRStmt_Dirty(d) ); + putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) ); + break; + } + + case 0xE6: /* OUT AL, imm8 */ + sz = 1; + t1 = newTemp(Ity_I32); + abyte = getIByte(delta); delta++; + assign( t1, mkU32( abyte & 0xFF ) ); + DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); + goto do_OUT; + case 0xE7: /* OUT eAX, imm8 */ + vassert(sz == 2 || sz == 4); + t1 = newTemp(Ity_I32); + abyte = getIByte(delta); delta++; + assign( t1, mkU32( abyte & 0xFF ) ); + DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); + goto do_OUT; + case 0xEE: /* OUT AL, %DX */ + sz = 1; + t1 = newTemp(Ity_I32); + assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); + DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), + nameIReg(2,R_EDX)); + goto do_OUT; + case 0xEF: /* OUT eAX, %DX */ + vassert(sz == 2 || sz == 4); + t1 = newTemp(Ity_I32); + assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); + DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), + nameIReg(2,R_EDX)); + goto do_OUT; + do_OUT: { + /* At this point, sz indicates the width, and t1 is a 32-bit + value giving port number. */ + IRDirty* d; + vassert(sz == 1 || sz == 2 || sz == 4); + ty = szToITy(sz); + d = unsafeIRDirty_0_N( + 0/*regparms*/, + "x86g_dirtyhelper_OUT", + &x86g_dirtyhelper_OUT, + mkIRExprVec_3( mkexpr(t1), + widenUto32( getIReg(sz, R_EAX) ), + mkU32(sz) ) + ); + stmt( IRStmt_Dirty(d) ); + break; + } + + /* ------------------------ (Grp1 extensions) ---------- */ + + case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as + case 0x80, but only in 32-bit mode. */ + /* fallthru */ + case 0x80: /* Grp1 Ib,Eb */ + modrm = getIByte(delta); + am_sz = lengthAMode(delta); + sz = 1; + d_sz = 1; + d32 = getUChar(delta + am_sz); + delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); + break; + + case 0x81: /* Grp1 Iv,Ev */ + modrm = getIByte(delta); + am_sz = lengthAMode(delta); + d_sz = sz; + d32 = getUDisp(d_sz, delta + am_sz); + delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); + break; + + case 0x83: /* Grp1 Ib,Ev */ + modrm = getIByte(delta); + am_sz = lengthAMode(delta); + d_sz = 1; + d32 = getSDisp8(delta + am_sz); + delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); + break; + + /* ------------------------ (Grp2 extensions) ---------- */ + + case 0xC0: { /* Grp2 Ib,Eb */ + Bool decode_OK = True; + modrm = getIByte(delta); + am_sz = lengthAMode(delta); + d_sz = 1; + d32 = getUChar(delta + am_sz); + sz = 1; + delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, + mkU8(d32 & 0xFF), NULL, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + case 0xC1: { /* Grp2 Ib,Ev */ + Bool decode_OK = True; + modrm = getIByte(delta); + am_sz = lengthAMode(delta); + d_sz = 1; + d32 = getUChar(delta + am_sz); + delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, + mkU8(d32 & 0xFF), NULL, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + case 0xD0: { /* Grp2 1,Eb */ + Bool decode_OK = True; + modrm = getIByte(delta); + am_sz = lengthAMode(delta); + d_sz = 0; + d32 = 1; + sz = 1; + delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, + mkU8(d32), NULL, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + case 0xD1: { /* Grp2 1,Ev */ + Bool decode_OK = True; + modrm = getUChar(delta); + am_sz = lengthAMode(delta); + d_sz = 0; + d32 = 1; + delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, + mkU8(d32), NULL, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + case 0xD2: { /* Grp2 CL,Eb */ + Bool decode_OK = True; + modrm = getUChar(delta); + am_sz = lengthAMode(delta); + d_sz = 0; + sz = 1; + delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, + getIReg(1,R_ECX), "%cl", &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + case 0xD3: { /* Grp2 CL,Ev */ + Bool decode_OK = True; + modrm = getIByte(delta); + am_sz = lengthAMode(delta); + d_sz = 0; + delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, + getIReg(1,R_ECX), "%cl", &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + + /* ------------------------ (Grp3 extensions) ---------- */ + + case 0xF6: { /* Grp3 Eb */ + Bool decode_OK = True; + delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + case 0xF7: { /* Grp3 Ev */ + Bool decode_OK = True; + delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + + /* ------------------------ (Grp4 extensions) ---------- */ + + case 0xFE: { /* Grp4 Eb */ + Bool decode_OK = True; + delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + + /* ------------------------ (Grp5 extensions) ---------- */ + + case 0xFF: { /* Grp5 Ev */ + Bool decode_OK = True; + delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + + /* ------------------------ Escapes to 2-byte opcodes -- */ + + case 0x0F: { + opc = getIByte(delta); delta++; + switch (opc) { + + /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ + + case 0xBA: { /* Grp8 Ib,Ev */ + Bool decode_OK = False; + modrm = getUChar(delta); + am_sz = lengthAMode(delta); + d32 = getSDisp8(delta + am_sz); + delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm, + am_sz, sz, d32, &decode_OK ); + if (!decode_OK) + goto decode_failure; + break; + } + + /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ + + case 0xBC: /* BSF Gv,Ev */ + delta = dis_bs_E_G ( sorb, sz, delta, True ); + break; + case 0xBD: /* BSR Gv,Ev */ + delta = dis_bs_E_G ( sorb, sz, delta, False ); + break; + + /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ + + case 0xC8: /* BSWAP %eax */ + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: /* BSWAP %edi */ + /* AFAICS from the Intel docs, this only exists at size 4. */ + vassert(sz == 4); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + assign( t1, getIReg(4, opc-0xC8) ); + + assign( t2, + binop(Iop_Or32, + binop(Iop_Shl32, mkexpr(t1), mkU8(24)), + binop(Iop_Or32, + binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), + mkU32(0x00FF0000)), + binop(Iop_Or32, + binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), + mkU32(0x0000FF00)), + binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), + mkU32(0x000000FF) ) + ))) + ); + + putIReg(4, opc-0xC8, mkexpr(t2)); + DIP("bswapl %s\n", nameIReg(4, opc-0xC8)); + break; + + /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ + + case 0xA3: /* BT Gv,Ev */ + delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpNone ); + break; + case 0xB3: /* BTR Gv,Ev */ + delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpReset ); + break; + case 0xAB: /* BTS Gv,Ev */ + delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpSet ); + break; + case 0xBB: /* BTC Gv,Ev */ + delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpComp ); + break; + + /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ + + case 0x40: + case 0x41: + case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ + case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ + case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ + case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ + case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ + case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ + case 0x48: /* CMOVSb (cmov negative) */ + case 0x49: /* CMOVSb (cmov not negative) */ + case 0x4A: /* CMOVP (cmov parity even) */ + case 0x4B: /* CMOVNP (cmov parity odd) */ + case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ + case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ + case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ + case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ + delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta); + break; + + /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ + + case 0xB0: /* CMPXCHG Gb,Eb */ + delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta ); + break; + case 0xB1: /* CMPXCHG Gv,Ev */ + delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta ); + break; + + case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */ + IRTemp expdHi = newTemp(Ity_I32); + IRTemp expdLo = newTemp(Ity_I32); + IRTemp dataHi = newTemp(Ity_I32); + IRTemp dataLo = newTemp(Ity_I32); + IRTemp oldHi = newTemp(Ity_I32); + IRTemp oldLo = newTemp(Ity_I32); + IRTemp flags_old = newTemp(Ity_I32); + IRTemp flags_new = newTemp(Ity_I32); + IRTemp success = newTemp(Ity_I1); + + /* Translate this using a DCAS, even if there is no LOCK + prefix. Life is too short to bother with generating two + different translations for the with/without-LOCK-prefix + cases. */ + *expect_CAS = True; + + /* Decode, and generate address. */ + if (sz != 4) goto decode_failure; + modrm = getIByte(delta); + if (epartIsReg(modrm)) goto decode_failure; + if (gregOfRM(modrm) != 1) goto decode_failure; + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + + /* Get the expected and new values. */ + assign( expdHi, getIReg(4,R_EDX) ); + assign( expdLo, getIReg(4,R_EAX) ); + assign( dataHi, getIReg(4,R_ECX) ); + assign( dataLo, getIReg(4,R_EBX) ); + + /* Do the DCAS */ + stmt( IRStmt_CAS( + mkIRCAS( oldHi, oldLo, + Iend_LE, mkexpr(addr), + mkexpr(expdHi), mkexpr(expdLo), + mkexpr(dataHi), mkexpr(dataLo) + ))); + + /* success when oldHi:oldLo == expdHi:expdLo */ + assign( success, + binop(Iop_CasCmpEQ32, + binop(Iop_Or32, + binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)), + binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo)) + ), + mkU32(0) + )); + + /* If the DCAS is successful, that is to say oldHi:oldLo == + expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX, + which is where they came from originally. Both the actual + contents of these two regs, and any shadow values, are + unchanged. If the DCAS fails then we're putting into + EDX:EAX the value seen in memory. */ + putIReg(4, R_EDX, + IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), + mkexpr(oldHi), + mkexpr(expdHi) + )); + putIReg(4, R_EAX, + IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), + mkexpr(oldLo), + mkexpr(expdLo) + )); + + /* Copy the success bit into the Z flag and leave the others + unchanged */ + assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all())); + assign( + flags_new, + binop(Iop_Or32, + binop(Iop_And32, mkexpr(flags_old), + mkU32(~X86G_CC_MASK_Z)), + binop(Iop_Shl32, + binop(Iop_And32, + unop(Iop_1Uto32, mkexpr(success)), mkU32(1)), + mkU8(X86G_CC_SHIFT_Z)) )); + + stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); + stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); + stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); + /* Set NDEP even though it isn't used. This makes + redundant-PUT elimination of previous stores to this field + work better. */ + stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); + + /* Sheesh. Aren't you glad it was me and not you that had to + write and validate all this grunge? */ + + DIP("cmpxchg8b %s\n", dis_buf); + break; + } + + /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ + + case 0xA2: { /* CPUID */ + /* Uses dirty helper: + void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* ) + declared to mod eax, wr ebx, ecx, edx + */ + IRDirty* d = NULL; + HChar* fName = NULL; + void* fAddr = NULL; + if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) { + fName = "x86g_dirtyhelper_CPUID_sse2"; + fAddr = &x86g_dirtyhelper_CPUID_sse2; + } + else + if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) { + fName = "x86g_dirtyhelper_CPUID_sse1"; + fAddr = &x86g_dirtyhelper_CPUID_sse1; + } + else + if (archinfo->hwcaps == 0/*no SSE*/) { + fName = "x86g_dirtyhelper_CPUID_sse0"; + fAddr = &x86g_dirtyhelper_CPUID_sse0; + } else + vpanic("disInstr(x86)(cpuid)"); + + vassert(fName); vassert(fAddr); + d = unsafeIRDirty_0_N ( 0/*regparms*/, + fName, fAddr, mkIRExprVec_0() ); + /* declare guest state effects */ + d->needsBBP = True; + d->nFxState = 4; + d->fxState[0].fx = Ifx_Modify; + d->fxState[0].offset = OFFB_EAX; + d->fxState[0].size = 4; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_EBX; + d->fxState[1].size = 4; + d->fxState[2].fx = Ifx_Modify; + d->fxState[2].offset = OFFB_ECX; + d->fxState[2].size = 4; + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_EDX; + d->fxState[3].size = 4; + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); + /* CPUID is a serialising insn. So, just in case someone is + using it as a memory fence ... */ + stmt( IRStmt_MBE(Imbe_Fence) ); + DIP("cpuid\n"); + break; + } + + //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID)) + //-- goto decode_failure; + //-- + //-- t1 = newTemp(cb); + //-- t2 = newTemp(cb); + //-- t3 = newTemp(cb); + //-- t4 = newTemp(cb); + //-- uInstr0(cb, CALLM_S, 0); + //-- + //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1); + //-- uInstr1(cb, PUSH, 4, TempReg, t1); + //-- + //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); + //-- uLiteral(cb, 0); + //-- uInstr1(cb, PUSH, 4, TempReg, t2); + //-- + //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3); + //-- uLiteral(cb, 0); + //-- uInstr1(cb, PUSH, 4, TempReg, t3); + //-- + //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); + //-- uLiteral(cb, 0); + //-- uInstr1(cb, PUSH, 4, TempReg, t4); + //-- + //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID)); + //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); + //-- + //-- uInstr1(cb, POP, 4, TempReg, t4); + //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX); + //-- + //-- uInstr1(cb, POP, 4, TempReg, t3); + //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX); + //-- + //-- uInstr1(cb, POP, 4, TempReg, t2); + //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX); + //-- + //-- uInstr1(cb, POP, 4, TempReg, t1); + //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX); + //-- + //-- uInstr0(cb, CALLM_E, 0); + //-- DIP("cpuid\n"); + //-- break; + //-- + /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ + + case 0xB6: /* MOVZXb Eb,Gv */ + if (sz != 2 && sz != 4) + goto decode_failure; + delta = dis_movx_E_G ( sorb, delta, 1, sz, False ); + break; + + case 0xB7: /* MOVZXw Ew,Gv */ + if (sz != 4) + goto decode_failure; + delta = dis_movx_E_G ( sorb, delta, 2, 4, False ); + break; + + case 0xBE: /* MOVSXb Eb,Gv */ + if (sz != 2 && sz != 4) + goto decode_failure; + delta = dis_movx_E_G ( sorb, delta, 1, sz, True ); + break; + + case 0xBF: /* MOVSXw Ew,Gv */ + if (sz != 4) + goto decode_failure; + delta = dis_movx_E_G ( sorb, delta, 2, 4, True ); + break; + + //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ + //-- + //-- case 0xC3: /* MOVNTI Gv,Ev */ + //-- vg_assert(sz == 4); + //-- modrm = getUChar(eip); + //-- vg_assert(!epartIsReg(modrm)); + //-- t1 = newTemp(cb); + //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); + //-- pair = disAMode ( cb, sorb, eip, dis_buf ); + //-- t2 = LOW24(pair); + //-- eip += HI8(pair); + //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); + //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); + //-- break; + + /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ + + case 0xAF: /* IMUL Ev, Gv */ + delta = dis_mul_E_G ( sorb, sz, delta ); + break; + + /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ + + case 0x1F: + modrm = getUChar(delta); + if (epartIsReg(modrm)) goto decode_failure; + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + DIP("nop%c %s\n", nameISize(sz), dis_buf); + break; + + /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ + case 0x80: + case 0x81: + case 0x82: /* JBb/JNAEb (jump below) */ + case 0x83: /* JNBb/JAEb (jump not below) */ + case 0x84: /* JZb/JEb (jump zero) */ + case 0x85: /* JNZb/JNEb (jump not zero) */ + case 0x86: /* JBEb/JNAb (jump below or equal) */ + case 0x87: /* JNBEb/JAb (jump not below or equal) */ + case 0x88: /* JSb (jump negative) */ + case 0x89: /* JSb (jump not negative) */ + case 0x8A: /* JP (jump parity even) */ + case 0x8B: /* JNP/JPO (jump parity odd) */ + case 0x8C: /* JLb/JNGEb (jump less) */ + case 0x8D: /* JGEb/JNLb (jump greater or equal) */ + case 0x8E: /* JLEb/JNGb (jump less or equal) */ + case 0x8F: /* JGb/JNLEb (jump greater) */ + d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + getUDisp32(delta); + delta += 4; + jcc_01( (X86Condcode)(opc - 0x80), + (Addr32)(guest_EIP_bbstart+delta), + d32 ); + dres.whatNext = Dis_StopHere; + DIP("j%s-32 0x%x\n", name_X86Condcode(opc - 0x80), d32); + break; + + /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ + case 0x31: { /* RDTSC */ + IRTemp val = newTemp(Ity_I64); + IRExpr** args = mkIRExprVec_0(); + IRDirty* d = unsafeIRDirty_1_N ( + val, + 0/*regparms*/, + "x86g_dirtyhelper_RDTSC", + &x86g_dirtyhelper_RDTSC, + args + ); + /* execute the dirty call, dumping the result in val. */ + stmt( IRStmt_Dirty(d) ); + putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val))); + putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val))); + DIP("rdtsc\n"); + break; + } + + /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ + + case 0xA1: /* POP %FS */ + dis_pop_segreg( R_FS, sz ); break; + case 0xA9: /* POP %GS */ + dis_pop_segreg( R_GS, sz ); break; + + case 0xA0: /* PUSH %FS */ + dis_push_segreg( R_FS, sz ); break; + case 0xA8: /* PUSH %GS */ + dis_push_segreg( R_GS, sz ); break; + + /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ + case 0x90: + case 0x91: + case 0x92: /* set-Bb/set-NAEb (jump below) */ + case 0x93: /* set-NBb/set-AEb (jump not below) */ + case 0x94: /* set-Zb/set-Eb (jump zero) */ + case 0x95: /* set-NZb/set-NEb (jump not zero) */ + case 0x96: /* set-BEb/set-NAb (jump below or equal) */ + case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */ + case 0x98: /* set-Sb (jump negative) */ + case 0x99: /* set-Sb (jump not negative) */ + case 0x9A: /* set-P (jump parity even) */ + case 0x9B: /* set-NP (jump parity odd) */ + case 0x9C: /* set-Lb/set-NGEb (jump less) */ + case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */ + case 0x9E: /* set-LEb/set-NGb (jump less or equal) */ + case 0x9F: /* set-Gb/set-NLEb (jump greater) */ + t1 = newTemp(Ity_I8); + assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) ); + modrm = getIByte(delta); + if (epartIsReg(modrm)) { + delta++; + putIReg(1, eregOfRM(modrm), mkexpr(t1)); + DIP("set%s %s\n", name_X86Condcode(opc-0x90), + nameIReg(1,eregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta, dis_buf ); + delta += alen; + storeLE( mkexpr(addr), mkexpr(t1) ); + DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf); + } + break; + + /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ + + case 0xA4: /* SHLDv imm8,Gv,Ev */ + modrm = getIByte(delta); + d32 = delta + lengthAMode(delta); + vex_sprintf(dis_buf, "$%d", getIByte(d32)); + delta = dis_SHLRD_Gv_Ev ( + sorb, delta, modrm, sz, + mkU8(getIByte(d32)), True, /* literal */ + dis_buf, True ); + break; + case 0xA5: /* SHLDv %cl,Gv,Ev */ + modrm = getIByte(delta); + delta = dis_SHLRD_Gv_Ev ( + sorb, delta, modrm, sz, + getIReg(1,R_ECX), False, /* not literal */ + "%cl", True ); + break; + + case 0xAC: /* SHRDv imm8,Gv,Ev */ + modrm = getIByte(delta); + d32 = delta + lengthAMode(delta); + vex_sprintf(dis_buf, "$%d", getIByte(d32)); + delta = dis_SHLRD_Gv_Ev ( + sorb, delta, modrm, sz, + mkU8(getIByte(d32)), True, /* literal */ + dis_buf, False ); + break; + case 0xAD: /* SHRDv %cl,Gv,Ev */ + modrm = getIByte(delta); + delta = dis_SHLRD_Gv_Ev ( + sorb, delta, modrm, sz, + getIReg(1,R_ECX), False, /* not literal */ + "%cl", False ); + break; + + /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */ + + case 0x34: + /* Simple implementation needing a long explaination. + + sysenter is a kind of syscall entry. The key thing here + is that the return address is not known -- that is + something that is beyond Vex's knowledge. So this IR + forces a return to the scheduler, which can do what it + likes to simulate the systenter, but it MUST set this + thread's guest_EIP field with the continuation address + before resuming execution. If that doesn't happen, the + thread will jump to address zero, which is probably + fatal. + */ + + /* Note where we are, so we can back up the guest to this + point if the syscall needs to be restarted. */ + stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, + mkU32(guest_EIP_curr_instr) ) ); + jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/); + dres.whatNext = Dis_StopHere; + DIP("sysenter"); + break; + + /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ + + case 0xC0: { /* XADD Gb,Eb */ + Bool decodeOK; + delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK ); + if (!decodeOK) goto decode_failure; + break; + } + case 0xC1: { /* XADD Gv,Ev */ + Bool decodeOK; + delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK ); + if (!decodeOK) goto decode_failure; + break; + } + + /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ + + case 0x71: + case 0x72: + case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ + + case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ + case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ + case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ + case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xFC: + case 0xFD: + case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xEC: + case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xDC: + case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xF8: + case 0xF9: + case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xE8: + case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xD8: + case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x74: + case 0x75: + case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x64: + case 0x65: + case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ + case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ + case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x68: + case 0x69: + case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0x60: + case 0x61: + case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ + + case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xF2: + case 0xF3: + + case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xD2: + case 0xD3: + + case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ + case 0xE2: + { + Int delta0 = delta-1; + Bool decode_OK = False; + + /* If sz==2 this is SSE, and we assume sse idec has + already spotted those cases by now. */ + if (sz != 4) + goto decode_failure; + + delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 ); + if (!decode_OK) { + delta = delta0; + goto decode_failure; + } + break; + } + + case 0x77: /* EMMS */ + if (sz != 4) + goto decode_failure; + do_EMMS_preamble(); + DIP("emms\n"); + break; + + /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ + + default: + goto decode_failure; + } /* switch (opc) for the 2-byte opcodes */ + goto decode_success; + } /* case 0x0F: of primary opcode */ + + /* ------------------------ ??? ------------------------ */ + + default: + decode_failure: + /* All decode failures end up here. */ + vex_printf("vex x86->IR: unhandled instruction bytes: " + "0x%x 0x%x 0x%x 0x%x\n", + (Int)getIByte(delta_start+0), + (Int)getIByte(delta_start+1), + (Int)getIByte(delta_start+2), + (Int)getIByte(delta_start+3) ); + + /* Tell the dispatcher that this insn cannot be decoded, and so has + not been executed, and (is currently) the next to be executed. + EIP should be up-to-date since it made so at the start of each + insn, but nevertheless be paranoid and update it again right + now. */ + stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) ); + jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr); + dres.whatNext = Dis_StopHere; + dres.len = 0; + /* We also need to say that a CAS is not expected now, regardless + of what it might have been set to at the start of the function, + since the IR that we've emitted just above (to synthesis a + SIGILL) does not involve any CAS, and presumably no other IR has + been emitted for this (non-decoded) insn. */ + *expect_CAS = False; + return dres; + + } /* switch (opc) for the main (primary) opcode switch. */ + + decode_success: + /* All decode successes end up here. */ + DIP("\n"); + dres.len = delta - delta_start; + return dres; + } + + #undef DIP + #undef DIS + + + /*------------------------------------------------------------*/ + /*--- Top-level fn ---*/ + /*------------------------------------------------------------*/ + + /* Disassemble a single instruction into IR. The instruction + is located in host memory at &guest_code[delta]. */ + + DisResult disInstr_X86 ( IRSB* irsb_IN, + Bool put_IP, + Bool (*resteerOkFn) ( void*, Addr64 ), + void* callback_opaque, + UChar* guest_code_IN, + Long delta, + Addr64 guest_IP, + VexArch guest_arch, + VexArchInfo* archinfo, + VexAbiInfo* abiinfo, + Bool host_bigendian_IN ) + { + Int i, x1, x2; + Bool expect_CAS, has_CAS; + DisResult dres; + + /* Set globals (see top of this file) */ + vassert(guest_arch == VexArchX86); + guest_code = guest_code_IN; + irsb = irsb_IN; + host_is_bigendian = host_bigendian_IN; + guest_EIP_curr_instr = (Addr32)guest_IP; + guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta); + + x1 = irsb_IN->stmts_used; + expect_CAS = False; + dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, + callback_opaque, delta, archinfo ); + x2 = irsb_IN->stmts_used; + vassert(x2 >= x1); + + /* See comment at the top of disInstr_X86_WRK for meaning of + expect_CAS. Here, we (sanity-)check for the presence/absence of + IRCAS as directed by the returned expect_CAS value. */ + has_CAS = False; + for (i = x1; i < x2; i++) { + if (irsb_IN->stmts[i]->tag == Ist_CAS) + has_CAS = True; + } + + if (expect_CAS != has_CAS) { + /* inconsistency detected. re-disassemble the instruction so as + to generate a useful error message; then assert. */ + vex_traceflags |= VEX_TRACE_FE; + dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, + callback_opaque, delta, archinfo ); + for (i = x1; i < x2; i++) { + vex_printf("\t\t"); + ppIRStmt(irsb_IN->stmts[i]); + vex_printf("\n"); + } + /* Failure of this assertion is serious and denotes a bug in + disInstr. */ + vpanic("disInstr_X86: inconsistency in LOCK prefix handling"); + } + + return dres; + } + + + /*--------------------------------------------------------------------*/ + /*--- end guest_x86_toIR.c ---*/ + /*--------------------------------------------------------------------*/ ==== //depot/vendor/valgrind/VEX/priv/host-amd64/hdefs.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-amd64/hdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-amd64/isel.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-arm/hdefs.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-arm/hdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-arm/isel.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-generic/h_generic_regs.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-generic/h_generic_regs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-generic/h_generic_simd64.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-generic/h_generic_simd64.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-generic/reg_alloc2.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-ppc/hdefs.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-ppc/hdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-ppc/isel.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-x86/hdefs.c#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-x86/hdefs.h#1 - === ==== //depot/vendor/valgrind/VEX/priv/host-x86/isel.c#1 - === Index: VEX/priv/host_amd64_defs.c =========================================================================== *** /dev/null Sat May 26 10:11:03 2012 --- VEX/priv/host_amd64_defs.c Sat May 26 10:11:29 2012 *************** *** 0 **** --- 1,3550 ---- + + /*---------------------------------------------------------------*/ + /*--- ---*/ + /*--- This file (host_amd64_defs.c) is ---*/ + /*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/ + /*--- ---*/ + /*---------------------------------------------------------------*/ + + /* + This file is part of LibVEX, a library for dynamic binary + instrumentation and translation. + + Copyright (C) 2004-2009 OpenWorks LLP. All rights reserved. + + This library is made available under a dual licensing scheme. + + If you link LibVEX against other code all of which is itself + licensed under the GNU General Public License, version 2 dated June + 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL + v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL + is missing, you can obtain a copy of the GPL v2 from the Free + Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA + 02110-1301, USA. + + For any other uses of LibVEX, you must first obtain a commercial + license from OpenWorks LLP. Please contact info@open-works.co.uk + for information about commercial licensing. + + This software is provided by OpenWorks LLP "as is" and any express + or implied warranties, including, but not limited to, the implied + warranties of merchantability and fitness for a particular purpose + are disclaimed. In no event shall OpenWorks LLP be liable for any + direct, indirect, incidental, special, exemplary, or consequential + damages (including, but not limited to, procurement of substitute + goods or services; loss of use, data, or profits; or business + interruption) however caused and on any theory of liability, + whether in contract, strict liability, or tort (including + negligence or otherwise) arising in any way out of the use of this + software, even if advised of the possibility of such damage. + + Neither the names of the U.S. Department of Energy nor the + University of California nor the names of its contributors may be + used to endorse or promote products derived from this software + without prior written permission. + */ + + #include "libvex_basictypes.h" + #include "libvex.h" + #include "libvex_trc_values.h" + + #include "main_util.h" + #include "host_generic_regs.h" + #include "host_amd64_defs.h" + + + /* --------- Registers. --------- */ + + void ppHRegAMD64 ( HReg reg ) + { + Int r; + static HChar* ireg64_names[16] + = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; + /* Be generic for all virtual regs. */ + if (hregIsVirtual(reg)) { + ppHReg(reg); + return; + } + /* But specific for real regs. */ + switch (hregClass(reg)) { + case HRcInt64: + r = hregNumber(reg); + vassert(r >= 0 && r < 16); + vex_printf("%s", ireg64_names[r]); + return; + case HRcFlt64: + r = hregNumber(reg); + vassert(r >= 0 && r < 6); + vex_printf("%%fake%d", r); + return; + case HRcVec128: + r = hregNumber(reg); + vassert(r >= 0 && r < 16); + vex_printf("%%xmm%d", r); + return; + default: + vpanic("ppHRegAMD64"); + } + } + + static void ppHRegAMD64_lo32 ( HReg reg ) + { + Int r; + static HChar* ireg32_names[16] + = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", + "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; + /* Be generic for all virtual regs. */ + if (hregIsVirtual(reg)) { + ppHReg(reg); + vex_printf("d"); + return; + } + /* But specific for real regs. */ + switch (hregClass(reg)) { + case HRcInt64: + r = hregNumber(reg); + vassert(r >= 0 && r < 16); + vex_printf("%s", ireg32_names[r]); + return; + default: + vpanic("ppHRegAMD64_lo32: invalid regclass"); + } + } + + HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); } + HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); } + HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); } + HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); } + HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); } + HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); } + HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); } + HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); } + HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); } + HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); } + HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); } + HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); } + HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); } + HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); } + HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); } + HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); } + + //.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } + //.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } + //.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } + //.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } + //.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } + //.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } + //.. + HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); } + HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); } + HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); } + HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); } + HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); } + HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); } + HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); } + HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); } + HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); } + HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); } + HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); } + HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); } + HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); } + HReg hregAMD64_XMM13 ( void ) { return mkHReg(13, HRcVec128, False); } + HReg hregAMD64_XMM14 ( void ) { return mkHReg(14, HRcVec128, False); } + HReg hregAMD64_XMM15 ( void ) { return mkHReg(15, HRcVec128, False); } + + + void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr ) + { + #if 0 + *nregs = 6; + *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); + (*arr)[ 0] = hregAMD64_RSI(); + (*arr)[ 1] = hregAMD64_RDI(); + (*arr)[ 2] = hregAMD64_RBX(); + + (*arr)[ 3] = hregAMD64_XMM7(); + (*arr)[ 4] = hregAMD64_XMM8(); + (*arr)[ 5] = hregAMD64_XMM9(); + #endif + #if 1 + *nregs = 20; + *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); + (*arr)[ 0] = hregAMD64_RSI(); + (*arr)[ 1] = hregAMD64_RDI(); + (*arr)[ 2] = hregAMD64_R8(); + (*arr)[ 3] = hregAMD64_R9(); + (*arr)[ 4] = hregAMD64_R12(); + (*arr)[ 5] = hregAMD64_R13(); + (*arr)[ 6] = hregAMD64_R14(); + (*arr)[ 7] = hregAMD64_R15(); + (*arr)[ 8] = hregAMD64_RBX(); + + (*arr)[ 9] = hregAMD64_XMM3(); + (*arr)[10] = hregAMD64_XMM4(); + (*arr)[11] = hregAMD64_XMM5(); + (*arr)[12] = hregAMD64_XMM6(); + (*arr)[13] = hregAMD64_XMM7(); + (*arr)[14] = hregAMD64_XMM8(); + (*arr)[15] = hregAMD64_XMM9(); + (*arr)[16] = hregAMD64_XMM10(); + (*arr)[17] = hregAMD64_XMM11(); + (*arr)[18] = hregAMD64_XMM12(); + (*arr)[19] = hregAMD64_R10(); + #endif + } + + + /* --------- Condition codes, Intel encoding. --------- */ + + HChar* showAMD64CondCode ( AMD64CondCode cond ) + { + switch (cond) { + case Acc_O: return "o"; + case Acc_NO: return "no"; + case Acc_B: return "b"; + case Acc_NB: return "nb"; + case Acc_Z: return "z"; + case Acc_NZ: return "nz"; + case Acc_BE: return "be"; + case Acc_NBE: return "nbe"; + case Acc_S: return "s"; + case Acc_NS: return "ns"; + case Acc_P: return "p"; + case Acc_NP: return "np"; + case Acc_L: return "l"; + case Acc_NL: return "nl"; + case Acc_LE: return "le"; + case Acc_NLE: return "nle"; + case Acc_ALWAYS: return "ALWAYS"; + default: vpanic("ppAMD64CondCode"); + } + } + + + /* --------- AMD64AMode: memory address expressions. --------- */ + + AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) { + AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); + am->tag = Aam_IR; + am->Aam.IR.imm = imm32; + am->Aam.IR.reg = reg; + return am; + } + AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { + AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode)); + am->tag = Aam_IRRS; + am->Aam.IRRS.imm = imm32; + am->Aam.IRRS.base = base; + am->Aam.IRRS.index = indEx; + am->Aam.IRRS.shift = shift; + vassert(shift >= 0 && shift <= 3); + return am; + } + + //.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) { + //.. switch (am->tag) { + //.. case Xam_IR: + //.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); + //.. case Xam_IRRS: + //.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, + //.. am->Xam.IRRS.index, am->Xam.IRRS.shift ); + //.. default: + //.. vpanic("dopyAMD64AMode"); + //.. } + //.. } + + void ppAMD64AMode ( AMD64AMode* am ) { + switch (am->tag) { + case Aam_IR: + if (am->Aam.IR.imm == 0) + vex_printf("("); + else + vex_printf("0x%x(", am->Aam.IR.imm); + ppHRegAMD64(am->Aam.IR.reg); + vex_printf(")"); + return; + case Aam_IRRS: + vex_printf("0x%x(", am->Aam.IRRS.imm); + ppHRegAMD64(am->Aam.IRRS.base); + vex_printf(","); + ppHRegAMD64(am->Aam.IRRS.index); + vex_printf(",%d)", 1 << am->Aam.IRRS.shift); + return; + default: + vpanic("ppAMD64AMode"); + } + } + + static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) { + switch (am->tag) { + case Aam_IR: + addHRegUse(u, HRmRead, am->Aam.IR.reg); + return; + case Aam_IRRS: + addHRegUse(u, HRmRead, am->Aam.IRRS.base); + addHRegUse(u, HRmRead, am->Aam.IRRS.index); + return; + default: + vpanic("addRegUsage_AMD64AMode"); + } + } + + static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) { + switch (am->tag) { + case Aam_IR: + am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg); + return; + case Aam_IRRS: + am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base); + am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index); + return; + default: + vpanic("mapRegs_AMD64AMode"); + } + } + + /* --------- Operand, which can be reg, immediate or memory. --------- */ + + AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) { + AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); + op->tag = Armi_Imm; + op->Armi.Imm.imm32 = imm32; + return op; + } + AMD64RMI* AMD64RMI_Reg ( HReg reg ) { + AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); + op->tag = Armi_Reg; + op->Armi.Reg.reg = reg; + return op; + } + AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) { + AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI)); + op->tag = Armi_Mem; + op->Armi.Mem.am = am; + return op; + } + + void ppAMD64RMI ( AMD64RMI* op ) { + switch (op->tag) { + case Armi_Imm: + vex_printf("$0x%x", op->Armi.Imm.imm32); + return; + case Armi_Reg: + ppHRegAMD64(op->Armi.Reg.reg); + return; + case Armi_Mem: + ppAMD64AMode(op->Armi.Mem.am); + return; + default: + vpanic("ppAMD64RMI"); + } + } + + /* An AMD64RMI can only be used in a "read" context (what would it mean + to write or modify a literal?) and so we enumerate its registers + accordingly. */ + static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) { + switch (op->tag) { + case Armi_Imm: + return; + case Armi_Reg: + addHRegUse(u, HRmRead, op->Armi.Reg.reg); + return; + case Armi_Mem: + addRegUsage_AMD64AMode(u, op->Armi.Mem.am); + return; + default: + vpanic("addRegUsage_AMD64RMI"); + } + } + + static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) { + switch (op->tag) { + case Armi_Imm: + return; + case Armi_Reg: + op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg); + return; + case Armi_Mem: + mapRegs_AMD64AMode(m, op->Armi.Mem.am); + return; + default: + vpanic("mapRegs_AMD64RMI"); + } + } + + + /* --------- Operand, which can be reg or immediate only. --------- */ + + AMD64RI* AMD64RI_Imm ( UInt imm32 ) { + AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); + op->tag = Ari_Imm; + op->Ari.Imm.imm32 = imm32; + return op; + } + AMD64RI* AMD64RI_Reg ( HReg reg ) { + AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI)); + op->tag = Ari_Reg; + op->Ari.Reg.reg = reg; + return op; + } + + void ppAMD64RI ( AMD64RI* op ) { + switch (op->tag) { + case Ari_Imm: + vex_printf("$0x%x", op->Ari.Imm.imm32); + return; + case Ari_Reg: + ppHRegAMD64(op->Ari.Reg.reg); + return; + default: + vpanic("ppAMD64RI"); + } + } + + /* An AMD64RI can only be used in a "read" context (what would it mean + to write or modify a literal?) and so we enumerate its registers + accordingly. */ + static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) { + switch (op->tag) { + case Ari_Imm: + return; + case Ari_Reg: + addHRegUse(u, HRmRead, op->Ari.Reg.reg); + return; + default: + vpanic("addRegUsage_AMD64RI"); + } + } + + static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) { + switch (op->tag) { + case Ari_Imm: + return; + case Ari_Reg: + op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg); + return; + default: + vpanic("mapRegs_AMD64RI"); + } + } + + + /* --------- Operand, which can be reg or memory only. --------- */ + + AMD64RM* AMD64RM_Reg ( HReg reg ) { + AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); + op->tag = Arm_Reg; + op->Arm.Reg.reg = reg; + return op; + } + AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) { + AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM)); + op->tag = Arm_Mem; + op->Arm.Mem.am = am; + return op; + } + + void ppAMD64RM ( AMD64RM* op ) { + switch (op->tag) { + case Arm_Mem: + ppAMD64AMode(op->Arm.Mem.am); + return; + case Arm_Reg: + ppHRegAMD64(op->Arm.Reg.reg); + return; + default: + vpanic("ppAMD64RM"); + } + } + + /* Because an AMD64RM can be both a source or destination operand, we + have to supply a mode -- pertaining to the operand as a whole -- + indicating how it's being used. */ + static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) { + switch (op->tag) { + case Arm_Mem: + /* Memory is read, written or modified. So we just want to + know the regs read by the amode. */ + addRegUsage_AMD64AMode(u, op->Arm.Mem.am); + return; + case Arm_Reg: + /* reg is read, written or modified. Add it in the + appropriate way. */ + addHRegUse(u, mode, op->Arm.Reg.reg); + return; + default: + vpanic("addRegUsage_AMD64RM"); + } + } + + static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op ) + { + switch (op->tag) { + case Arm_Mem: + mapRegs_AMD64AMode(m, op->Arm.Mem.am); + return; + case Arm_Reg: + op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg); + return; + default: + vpanic("mapRegs_AMD64RM"); + } + } + + + /* --------- Instructions. --------- */ + + static HChar* showAMD64ScalarSz ( Int sz ) { + switch (sz) { + case 2: return "w"; + case 4: return "l"; + case 8: return "q"; + default: vpanic("showAMD64ScalarSz"); + } + } + + HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) { + switch (op) { + case Aun_NOT: return "not"; + case Aun_NEG: return "neg"; + default: vpanic("showAMD64UnaryOp"); + } + } + + HChar* showAMD64AluOp ( AMD64AluOp op ) { + switch (op) { + case Aalu_MOV: return "mov"; + case Aalu_CMP: return "cmp"; + case Aalu_ADD: return "add"; + case Aalu_SUB: return "sub"; + case Aalu_ADC: return "adc"; + case Aalu_SBB: return "sbb"; + case Aalu_AND: return "and"; + case Aalu_OR: return "or"; + case Aalu_XOR: return "xor"; + case Aalu_MUL: return "imul"; + default: vpanic("showAMD64AluOp"); + } + } + + HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) { + switch (op) { + case Ash_SHL: return "shl"; + case Ash_SHR: return "shr"; + case Ash_SAR: return "sar"; + default: vpanic("showAMD64ShiftOp"); + } + } + + HChar* showA87FpOp ( A87FpOp op ) { + switch (op) { + //.. case Xfp_ADD: return "add"; + //.. case Xfp_SUB: return "sub"; + //.. case Xfp_MUL: return "mul"; + //.. case Xfp_DIV: return "div"; + case Afp_SCALE: return "scale"; + case Afp_ATAN: return "atan"; + case Afp_YL2X: return "yl2x"; + case Afp_YL2XP1: return "yl2xp1"; + case Afp_PREM: return "prem"; + case Afp_PREM1: return "prem1"; + case Afp_SQRT: return "sqrt"; + //.. case Xfp_ABS: return "abs"; + //.. case Xfp_NEG: return "chs"; + //.. case Xfp_MOV: return "mov"; + case Afp_SIN: return "sin"; + case Afp_COS: return "cos"; + case Afp_TAN: return "tan"; + case Afp_ROUND: return "round"; + case Afp_2XM1: return "2xm1"; + default: vpanic("showA87FpOp"); + } + } + + HChar* showAMD64SseOp ( AMD64SseOp op ) { + switch (op) { + case Asse_MOV: return "movups"; + case Asse_ADDF: return "add"; + case Asse_SUBF: return "sub"; + case Asse_MULF: return "mul"; + case Asse_DIVF: return "div"; + case Asse_MAXF: return "max"; + case Asse_MINF: return "min"; + case Asse_CMPEQF: return "cmpFeq"; + case Asse_CMPLTF: return "cmpFlt"; + case Asse_CMPLEF: return "cmpFle"; + case Asse_CMPUNF: return "cmpFun"; + case Asse_RCPF: return "rcp"; + case Asse_RSQRTF: return "rsqrt"; + case Asse_SQRTF: return "sqrt"; + case Asse_AND: return "and"; + case Asse_OR: return "or"; + case Asse_XOR: return "xor"; + case Asse_ANDN: return "andn"; + case Asse_ADD8: return "paddb"; + case Asse_ADD16: return "paddw"; + case Asse_ADD32: return "paddd"; + case Asse_ADD64: return "paddq"; + case Asse_QADD8U: return "paddusb"; + case Asse_QADD16U: return "paddusw"; + case Asse_QADD8S: return "paddsb"; + case Asse_QADD16S: return "paddsw"; + case Asse_SUB8: return "psubb"; + case Asse_SUB16: return "psubw"; + case Asse_SUB32: return "psubd"; + case Asse_SUB64: return "psubq"; + case Asse_QSUB8U: return "psubusb"; + case Asse_QSUB16U: return "psubusw"; + case Asse_QSUB8S: return "psubsb"; + case Asse_QSUB16S: return "psubsw"; + case Asse_MUL16: return "pmullw"; + case Asse_MULHI16U: return "pmulhuw"; + case Asse_MULHI16S: return "pmulhw"; + case Asse_AVG8U: return "pavgb"; + case Asse_AVG16U: return "pavgw"; + case Asse_MAX16S: return "pmaxw"; + case Asse_MAX8U: return "pmaxub"; + case Asse_MIN16S: return "pminw"; + case Asse_MIN8U: return "pminub"; + case Asse_CMPEQ8: return "pcmpeqb"; + case Asse_CMPEQ16: return "pcmpeqw"; + case Asse_CMPEQ32: return "pcmpeqd"; + case Asse_CMPGT8S: return "pcmpgtb"; + case Asse_CMPGT16S: return "pcmpgtw"; + case Asse_CMPGT32S: return "pcmpgtd"; + case Asse_SHL16: return "psllw"; + case Asse_SHL32: return "pslld"; + case Asse_SHL64: return "psllq"; + case Asse_SHR16: return "psrlw"; + case Asse_SHR32: return "psrld"; + case Asse_SHR64: return "psrlq"; + case Asse_SAR16: return "psraw"; + case Asse_SAR32: return "psrad"; + case Asse_PACKSSD: return "packssdw"; + case Asse_PACKSSW: return "packsswb"; + case Asse_PACKUSW: return "packuswb"; + case Asse_UNPCKHB: return "punpckhb"; + case Asse_UNPCKHW: return "punpckhw"; + case Asse_UNPCKHD: return "punpckhd"; + case Asse_UNPCKHQ: return "punpckhq"; + case Asse_UNPCKLB: return "punpcklb"; + case Asse_UNPCKLW: return "punpcklw"; + case Asse_UNPCKLD: return "punpckld"; + case Asse_UNPCKLQ: return "punpcklq"; + default: vpanic("showAMD64SseOp"); + } + } + + AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Imm64; + i->Ain.Imm64.imm64 = imm64; + i->Ain.Imm64.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Alu64R; + i->Ain.Alu64R.op = op; + i->Ain.Alu64R.src = src; + i->Ain.Alu64R.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Alu64M; + i->Ain.Alu64M.op = op; + i->Ain.Alu64M.src = src; + i->Ain.Alu64M.dst = dst; + vassert(op != Aalu_MUL); + return i; + } + AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Sh64; + i->Ain.Sh64.op = op; + i->Ain.Sh64.src = src; + i->Ain.Sh64.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Test64; + i->Ain.Test64.imm32 = imm32; + i->Ain.Test64.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Unary64; + i->Ain.Unary64.op = op; + i->Ain.Unary64.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Lea64; + i->Ain.Lea64.am = am; + i->Ain.Lea64.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_MulL; + i->Ain.MulL.syned = syned; + i->Ain.MulL.src = src; + return i; + } + AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Div; + i->Ain.Div.syned = syned; + i->Ain.Div.sz = sz; + i->Ain.Div.src = src; + vassert(sz == 4 || sz == 8); + return i; + } + //.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_Sh3232; + //.. i->Xin.Sh3232.op = op; + //.. i->Xin.Sh3232.amt = amt; + //.. i->Xin.Sh3232.src = src; + //.. i->Xin.Sh3232.dst = dst; + //.. vassert(op == Xsh_SHL || op == Xsh_SHR); + //.. return i; + //.. } + AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Push; + i->Ain.Push.src = src; + return i; + } + AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Call; + i->Ain.Call.cond = cond; + i->Ain.Call.target = target; + i->Ain.Call.regparms = regparms; + vassert(regparms >= 0 && regparms <= 6); + return i; + } + AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Goto; + i->Ain.Goto.cond = cond; + i->Ain.Goto.dst = dst; + i->Ain.Goto.jk = jk; + return i; + } + AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_CMov64; + i->Ain.CMov64.cond = cond; + i->Ain.CMov64.src = src; + i->Ain.CMov64.dst = dst; + vassert(cond != Acc_ALWAYS); + return i; + } + AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_MovZLQ; + i->Ain.MovZLQ.src = src; + i->Ain.MovZLQ.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, + AMD64AMode* src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_LoadEX; + i->Ain.LoadEX.szSmall = szSmall; + i->Ain.LoadEX.syned = syned; + i->Ain.LoadEX.src = src; + i->Ain.LoadEX.dst = dst; + vassert(szSmall == 1 || szSmall == 2 || szSmall == 4); + return i; + } + AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Store; + i->Ain.Store.sz = sz; + i->Ain.Store.src = src; + i->Ain.Store.dst = dst; + vassert(sz == 1 || sz == 2 || sz == 4); + return i; + } + AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Set64; + i->Ain.Set64.cond = cond; + i->Ain.Set64.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Bsfr64; + i->Ain.Bsfr64.isFwds = isFwds; + i->Ain.Bsfr64.src = src; + i->Ain.Bsfr64.dst = dst; + return i; + } + AMD64Instr* AMD64Instr_MFence ( void ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_MFence; + return i; + } + AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_ACAS; + i->Ain.ACAS.addr = addr; + i->Ain.ACAS.sz = sz; + vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); + return i; + } + AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_DACAS; + i->Ain.DACAS.addr = addr; + i->Ain.DACAS.sz = sz; + vassert(sz == 8 || sz == 4); + return i; + } + + AMD64Instr* AMD64Instr_A87Free ( Int nregs ) + { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87Free; + i->Ain.A87Free.nregs = nregs; + vassert(nregs >= 1 && nregs <= 7); + return i; + } + AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush ) + { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87PushPop; + i->Ain.A87PushPop.addr = addr; + i->Ain.A87PushPop.isPush = isPush; + return i; + } + AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ) + { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87FpOp; + i->Ain.A87FpOp.op = op; + return i; + } + AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ) + { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87LdCW; + i->Ain.A87LdCW.addr = addr; + return i; + } + AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ) + { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87StSW; + i->Ain.A87StSW.addr = addr; + return i; + } + + //.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_FpUnary; + //.. i->Xin.FpUnary.op = op; + //.. i->Xin.FpUnary.src = src; + //.. i->Xin.FpUnary.dst = dst; + //.. return i; + //.. } + //.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_FpBinary; + //.. i->Xin.FpBinary.op = op; + //.. i->Xin.FpBinary.srcL = srcL; + //.. i->Xin.FpBinary.srcR = srcR; + //.. i->Xin.FpBinary.dst = dst; + //.. return i; + //.. } + //.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_FpLdSt; + //.. i->Xin.FpLdSt.isLoad = isLoad; + //.. i->Xin.FpLdSt.sz = sz; + //.. i->Xin.FpLdSt.reg = reg; + //.. i->Xin.FpLdSt.addr = addr; + //.. vassert(sz == 4 || sz == 8); + //.. return i; + //.. } + //.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, + //.. HReg reg, AMD64AMode* addr ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_FpLdStI; + //.. i->Xin.FpLdStI.isLoad = isLoad; + //.. i->Xin.FpLdStI.sz = sz; + //.. i->Xin.FpLdStI.reg = reg; + //.. i->Xin.FpLdStI.addr = addr; + //.. vassert(sz == 2 || sz == 4 || sz == 8); + //.. return i; + //.. } + //.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_Fp64to32; + //.. i->Xin.Fp64to32.src = src; + //.. i->Xin.Fp64to32.dst = dst; + //.. return i; + //.. } + //.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_FpCMov; + //.. i->Xin.FpCMov.cond = cond; + //.. i->Xin.FpCMov.src = src; + //.. i->Xin.FpCMov.dst = dst; + //.. vassert(cond != Xcc_ALWAYS); + //.. return i; + //.. } + AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_LdMXCSR; + i->Ain.LdMXCSR.addr = addr; + return i; + } + //.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_FpStSW_AX; + //.. return i; + //.. } + AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseUComIS; + i->Ain.SseUComIS.sz = toUChar(sz); + i->Ain.SseUComIS.srcL = srcL; + i->Ain.SseUComIS.srcR = srcR; + i->Ain.SseUComIS.dst = dst; + vassert(sz == 4 || sz == 8); + return i; + } + AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseSI2SF; + i->Ain.SseSI2SF.szS = toUChar(szS); + i->Ain.SseSI2SF.szD = toUChar(szD); + i->Ain.SseSI2SF.src = src; + i->Ain.SseSI2SF.dst = dst; + vassert(szS == 4 || szS == 8); + vassert(szD == 4 || szD == 8); + return i; + } + AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseSF2SI; + i->Ain.SseSF2SI.szS = toUChar(szS); + i->Ain.SseSF2SI.szD = toUChar(szD); + i->Ain.SseSF2SI.src = src; + i->Ain.SseSF2SI.dst = dst; + vassert(szS == 4 || szS == 8); + vassert(szD == 4 || szD == 8); + return i; + } + AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ) + { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseSDSS; + i->Ain.SseSDSS.from64 = from64; + i->Ain.SseSDSS.src = src; + i->Ain.SseSDSS.dst = dst; + return i; + } + + //.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) { + //.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + //.. i->tag = Xin_SseConst; + //.. i->Xin.SseConst.con = con; + //.. i->Xin.SseConst.dst = dst; + //.. vassert(hregClass(dst) == HRcVec128); + //.. return i; + //.. } + AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, + HReg reg, AMD64AMode* addr ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseLdSt; + i->Ain.SseLdSt.isLoad = isLoad; + i->Ain.SseLdSt.sz = toUChar(sz); + i->Ain.SseLdSt.reg = reg; + i->Ain.SseLdSt.addr = addr; + vassert(sz == 4 || sz == 8 || sz == 16); + return i; + } + AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr ) + { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseLdzLO; + i->Ain.SseLdzLO.sz = sz; + i->Ain.SseLdzLO.reg = reg; + i->Ain.SseLdzLO.addr = addr; + vassert(sz == 4 || sz == 8); + return i; + } + AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Sse32Fx4; + i->Ain.Sse32Fx4.op = op; + i->Ain.Sse32Fx4.src = src; + i->Ain.Sse32Fx4.dst = dst; + vassert(op != Asse_MOV); + return i; + } + AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Sse32FLo; + i->Ain.Sse32FLo.op = op; + i->Ain.Sse32FLo.src = src; + i->Ain.Sse32FLo.dst = dst; + vassert(op != Asse_MOV); + return i; + } + AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Sse64Fx2; + i->Ain.Sse64Fx2.op = op; + i->Ain.Sse64Fx2.src = src; + i->Ain.Sse64Fx2.dst = dst; + vassert(op != Asse_MOV); + return i; + } + AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_Sse64FLo; + i->Ain.Sse64FLo.op = op; + i->Ain.Sse64FLo.src = src; + i->Ain.Sse64FLo.dst = dst; + vassert(op != Asse_MOV); + return i; + } + AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseReRg; + i->Ain.SseReRg.op = op; + i->Ain.SseReRg.src = re; + i->Ain.SseReRg.dst = rg; + return i; + } + AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseCMov; + i->Ain.SseCMov.cond = cond; + i->Ain.SseCMov.src = src; + i->Ain.SseCMov.dst = dst; + vassert(cond != Acc_ALWAYS); + return i; + } + AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_SseShuf; + i->Ain.SseShuf.order = order; + i->Ain.SseShuf.src = src; + i->Ain.SseShuf.dst = dst; + vassert(order >= 0 && order <= 0xFF); + return i; + } + + void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) + { + vassert(mode64 == True); + switch (i->tag) { + case Ain_Imm64: + vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64); + ppHRegAMD64(i->Ain.Imm64.dst); + return; + case Ain_Alu64R: + vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op)); + ppAMD64RMI(i->Ain.Alu64R.src); + vex_printf(","); + ppHRegAMD64(i->Ain.Alu64R.dst); + return; + case Ain_Alu64M: + vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op)); + ppAMD64RI(i->Ain.Alu64M.src); + vex_printf(","); + ppAMD64AMode(i->Ain.Alu64M.dst); + return; + case Ain_Sh64: + vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op)); + if (i->Ain.Sh64.src == 0) + vex_printf("%%cl,"); + else + vex_printf("$%d,", (Int)i->Ain.Sh64.src); + ppHRegAMD64(i->Ain.Sh64.dst); + return; + case Ain_Test64: + vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32); + ppHRegAMD64(i->Ain.Test64.dst); + return; + case Ain_Unary64: + vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op)); + ppHRegAMD64(i->Ain.Unary64.dst); + return; + case Ain_Lea64: + vex_printf("leaq "); + ppAMD64AMode(i->Ain.Lea64.am); + vex_printf(","); + ppHRegAMD64(i->Ain.Lea64.dst); + return; + case Ain_MulL: + vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u'); + ppAMD64RM(i->Ain.MulL.src); + return; + case Ain_Div: + vex_printf("%cdiv%s ", + i->Ain.Div.syned ? 's' : 'u', + showAMD64ScalarSz(i->Ain.Div.sz)); + ppAMD64RM(i->Ain.Div.src); + return; + //.. case Xin_Sh3232: + //.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op)); + //.. if (i->Xin.Sh3232.amt == 0) + //.. vex_printf(" %%cl,"); + //.. else + //.. vex_printf(" $%d,", i->Xin.Sh3232.amt); + //.. ppHRegAMD64(i->Xin.Sh3232.src); + //.. vex_printf(","); + //.. ppHRegAMD64(i->Xin.Sh3232.dst); + //.. return; + case Ain_Push: + vex_printf("pushq "); + ppAMD64RMI(i->Ain.Push.src); + return; + case Ain_Call: + vex_printf("call%s[%d] ", + i->Ain.Call.cond==Acc_ALWAYS + ? "" : showAMD64CondCode(i->Ain.Call.cond), + i->Ain.Call.regparms ); + vex_printf("0x%llx", i->Ain.Call.target); + break; + case Ain_Goto: + if (i->Ain.Goto.cond != Acc_ALWAYS) { + vex_printf("if (%%rflags.%s) { ", + showAMD64CondCode(i->Ain.Goto.cond)); + } + if (i->Ain.Goto.jk != Ijk_Boring + && i->Ain.Goto.jk != Ijk_Call + && i->Ain.Goto.jk != Ijk_Ret) { + vex_printf("movl $"); + ppIRJumpKind(i->Ain.Goto.jk); + vex_printf(",%%ebp ; "); + } + vex_printf("movq "); + ppAMD64RI(i->Ain.Goto.dst); + vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx"); + if (i->Ain.Goto.cond != Acc_ALWAYS) { + vex_printf(" }"); + } + return; + case Ain_CMov64: + vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond)); + ppAMD64RM(i->Ain.CMov64.src); + vex_printf(","); + ppHRegAMD64(i->Ain.CMov64.dst); + return; + case Ain_MovZLQ: + vex_printf("movzlq "); + ppHRegAMD64_lo32(i->Ain.MovZLQ.src); + vex_printf(","); + ppHRegAMD64(i->Ain.MovZLQ.dst); + return; + case Ain_LoadEX: + if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) { + vex_printf("movl "); + ppAMD64AMode(i->Ain.LoadEX.src); + vex_printf(","); + ppHRegAMD64_lo32(i->Ain.LoadEX.dst); + } else { + vex_printf("mov%c%cq ", + i->Ain.LoadEX.syned ? 's' : 'z', + i->Ain.LoadEX.szSmall==1 + ? 'b' + : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l')); + ppAMD64AMode(i->Ain.LoadEX.src); + vex_printf(","); + ppHRegAMD64(i->Ain.LoadEX.dst); + } + return; + case Ain_Store: + vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b' + : (i->Ain.Store.sz==2 ? 'w' : 'l')); + ppHRegAMD64(i->Ain.Store.src); + vex_printf(","); + ppAMD64AMode(i->Ain.Store.dst); + return; + case Ain_Set64: + vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond)); + ppHRegAMD64(i->Ain.Set64.dst); + return; + case Ain_Bsfr64: + vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r'); + ppHRegAMD64(i->Ain.Bsfr64.src); + vex_printf(","); + ppHRegAMD64(i->Ain.Bsfr64.dst); + return; + case Ain_MFence: + vex_printf("mfence" ); + return; + case Ain_ACAS: + vex_printf("lock cmpxchg%c ", + i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w' + : i->Ain.ACAS.sz==4 ? 'l' : 'q' ); + vex_printf("{%%rax->%%rbx},"); + ppAMD64AMode(i->Ain.ACAS.addr); + return; + case Ain_DACAS: + vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},", + (Int)(2 * i->Ain.DACAS.sz)); + ppAMD64AMode(i->Ain.DACAS.addr); + return; + case Ain_A87Free: + vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs ); + break; + case Ain_A87PushPop: + vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl "); + ppAMD64AMode(i->Ain.A87PushPop.addr); + break; + case Ain_A87FpOp: + vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op)); + break; + case Ain_A87LdCW: + vex_printf("fldcw "); + ppAMD64AMode(i->Ain.A87LdCW.addr); + break; + case Ain_A87StSW: + vex_printf("fstsw "); + ppAMD64AMode(i->Ain.A87StSW.addr); + break; + //.. case Xin_FpUnary: + //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op)); + //.. ppHRegAMD64(i->Xin.FpUnary.src); + //.. vex_printf(","); + //.. ppHRegAMD64(i->Xin.FpUnary.dst); + //.. break; + //.. case Xin_FpBinary: + //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op)); + //.. ppHRegAMD64(i->Xin.FpBinary.srcL); + //.. vex_printf(","); + //.. ppHRegAMD64(i->Xin.FpBinary.srcR); + //.. vex_printf(","); + //.. ppHRegAMD64(i->Xin.FpBinary.dst); + //.. break; + //.. case Xin_FpLdSt: + //.. if (i->Xin.FpLdSt.isLoad) { + //.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); + //.. ppAMD64AMode(i->Xin.FpLdSt.addr); + //.. vex_printf(", "); + //.. ppHRegAMD64(i->Xin.FpLdSt.reg); + //.. } else { + //.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); + //.. ppHRegAMD64(i->Xin.FpLdSt.reg); + //.. vex_printf(", "); + //.. ppAMD64AMode(i->Xin.FpLdSt.addr); + //.. } + //.. return; + //.. case Xin_FpLdStI: + //.. if (i->Xin.FpLdStI.isLoad) { + //.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : + //.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); + //.. ppAMD64AMode(i->Xin.FpLdStI.addr); + //.. vex_printf(", "); + //.. ppHRegAMD64(i->Xin.FpLdStI.reg); + //.. } else { + //.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : + //.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); + //.. ppHRegAMD64(i->Xin.FpLdStI.reg); + //.. vex_printf(", "); + //.. ppAMD64AMode(i->Xin.FpLdStI.addr); + //.. } + //.. return; + //.. case Xin_Fp64to32: + //.. vex_printf("gdtof "); + //.. ppHRegAMD64(i->Xin.Fp64to32.src); + //.. vex_printf(","); + //.. ppHRegAMD64(i->Xin.Fp64to32.dst); + //.. return; + //.. case Xin_FpCMov: + //.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond)); + //.. ppHRegAMD64(i->Xin.FpCMov.src); + //.. vex_printf(","); + //.. ppHRegAMD64(i->Xin.FpCMov.dst); + //.. return; + //.. case Xin_FpLdStCW: + //.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw "); + //.. ppAMD64AMode(i->Xin.FpLdStCW.addr); + //.. return; + //.. case Xin_FpStSW_AX: + //.. vex_printf("fstsw %%ax"); + //.. return; + case Ain_LdMXCSR: + vex_printf("ldmxcsr "); + ppAMD64AMode(i->Ain.LdMXCSR.addr); + break; + case Ain_SseUComIS: + vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d"); + ppHRegAMD64(i->Ain.SseUComIS.srcL); + vex_printf(","); + ppHRegAMD64(i->Ain.SseUComIS.srcR); + vex_printf(" ; pushfq ; popq "); + ppHRegAMD64(i->Ain.SseUComIS.dst); + break; + case Ain_SseSI2SF: + vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d"); + (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) + (i->Ain.SseSI2SF.src); + vex_printf(","); + ppHRegAMD64(i->Ain.SseSI2SF.dst); + break; + case Ain_SseSF2SI: + vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d"); + ppHRegAMD64(i->Ain.SseSF2SI.src); + vex_printf(","); + (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64) + (i->Ain.SseSF2SI.dst); + break; + case Ain_SseSDSS: + vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd "); + ppHRegAMD64(i->Ain.SseSDSS.src); + vex_printf(","); + ppHRegAMD64(i->Ain.SseSDSS.dst); + break; + //.. case Xin_SseConst: + //.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); + //.. ppHRegAMD64(i->Xin.SseConst.dst); + //.. break; + case Ain_SseLdSt: + switch (i->Ain.SseLdSt.sz) { + case 4: vex_printf("movss "); break; + case 8: vex_printf("movsd "); break; + case 16: vex_printf("movups "); break; + default: vassert(0); + } + if (i->Ain.SseLdSt.isLoad) { + ppAMD64AMode(i->Ain.SseLdSt.addr); + vex_printf(","); + ppHRegAMD64(i->Ain.SseLdSt.reg); + } else { + ppHRegAMD64(i->Ain.SseLdSt.reg); + vex_printf(","); + ppAMD64AMode(i->Ain.SseLdSt.addr); + } + return; + case Ain_SseLdzLO: + vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d"); + ppAMD64AMode(i->Ain.SseLdzLO.addr); + vex_printf(","); + ppHRegAMD64(i->Ain.SseLdzLO.reg); + return; + case Ain_Sse32Fx4: + vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op)); + ppHRegAMD64(i->Ain.Sse32Fx4.src); + vex_printf(","); + ppHRegAMD64(i->Ain.Sse32Fx4.dst); + return; + case Ain_Sse32FLo: + vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op)); + ppHRegAMD64(i->Ain.Sse32FLo.src); + vex_printf(","); + ppHRegAMD64(i->Ain.Sse32FLo.dst); + return; + case Ain_Sse64Fx2: + vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op)); + ppHRegAMD64(i->Ain.Sse64Fx2.src); + vex_printf(","); + ppHRegAMD64(i->Ain.Sse64Fx2.dst); + return; + case Ain_Sse64FLo: + vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op)); + ppHRegAMD64(i->Ain.Sse64FLo.src); + vex_printf(","); + ppHRegAMD64(i->Ain.Sse64FLo.dst); + return; + case Ain_SseReRg: + vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op)); + ppHRegAMD64(i->Ain.SseReRg.src); + vex_printf(","); + ppHRegAMD64(i->Ain.SseReRg.dst); + return; + case Ain_SseCMov: + vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond)); + ppHRegAMD64(i->Ain.SseCMov.src); + vex_printf(","); + ppHRegAMD64(i->Ain.SseCMov.dst); + return; + case Ain_SseShuf: + vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order); + ppHRegAMD64(i->Ain.SseShuf.src); + vex_printf(","); + ppHRegAMD64(i->Ain.SseShuf.dst); + return; + + default: + vpanic("ppAMD64Instr"); + } + } + + /* --------- Helpers for register allocation. --------- */ + + void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) + { + Bool unary; + vassert(mode64 == True); + initHRegUsage(u); + switch (i->tag) { + case Ain_Imm6