--- Makefile.kmk	2014-01-15 14:25:20.000000000 -0500
+++ Makefile.kmk	2014-01-14 15:21:31.000000000 -0500
@@ -42,8 +42,6 @@ endif
 ifneq ($(KBUILD_TARGET),win)
  VBoxRemPrimary_TEMPLATE       = VBOXR3NP
  VBoxRemPrimary_TOOL.freebsd   = VBoxGccFreeBSD
- # workaround the regparm bug in gcc <= 3.3
- VBoxRemPrimary_DEFS           = $(if $(VBOX_GCC_BUGGY_REGPARM),GCC_WITH_BUGGY_REGPARM,)
 else
  VBoxRemPrimary_TEMPLATE       = DUMMY
  VBoxRemPrimary_TOOL.win.x86   = MINGW32
@@ -59,8 +57,6 @@ else
  VBoxRemPrimary_CFLAGS.release += -fno-gcse -O2
  VBoxRemPrimary_CFLAGS.profile = $(VBoxRemPrimary_CFLAGS.release)
  VBoxRemPrimary_DEFS          += IN_RING3 $(ARCH_BITS_DEFS)
- # Workaround the regparm bug in gcc <= 3.3.
- VBoxRemPrimary_DEFS.win.x86  += GCC_WITH_BUGGY_REGPARM
  # Missing fpclassify. Is there a better define or flag for this?
  VBoxRemPrimary_DEFS.solaris  += __C99FEATURES__
 endif # win
@@ -112,7 +108,7 @@ VBoxRemPrimary_SOURCES         = \
 	tcg-runtime.c \
 	tcg/tcg.c \
 	tcg/tcg-dyngen.c \
-	fpu/softfloat-native.c \
+	fpu/softfloat.c \
 	target-i386/op_helper.c \
 	target-i386/helper.c \
 	target-i386/translate.c
--- Sun/config-host.h	2013-12-18 11:11:37.000000000 -0500
+++ Sun/config-host.h	2014-01-14 15:21:31.000000000 -0500
@@ -40,7 +40,6 @@
 #  define HAVE_BYTESWAP_H 1
 # endif
 #endif
-#define QEMU_VERSION "0.13.0"
+#define QEMU_VERSION "0.15.1"
 #define CONFIG_UNAME_RELEASE ""
 #define CONFIG_QEMU_SHAREDIR "."
-
--- Sun/config.h	2013-12-18 11:11:37.000000000 -0500
+++ Sun/config.h	2014-01-14 15:21:31.000000000 -0500
@@ -22,9 +22,16 @@
 #define CONFIG_SOFTMMU 1
 #define TARGET_PHYS_ADDR_BITS 64
 
+#define TARGET_SHORT_ALIGNMENT 2
+#define TARGET_INT_ALIGNMENT   4
+#define TARGET_LLONG_ALIGNMENT 8
+
 #ifdef VBOX_WITH_64_BITS_GUESTS
 # if defined(__x86_64__) || defined (VBOX_ENABLE_VBOXREM64)
 #  define TARGET_X86_64
+#  define TARGET_LONG_ALIGNMENT 8
+# else
+#  define TARGET_LONG_ALIGNMENT 4
 # endif
 #endif
 
--- Sun/testmath.c	2014-01-15 14:25:21.000000000 -0500
+++ Sun/testmath.c	2014-01-14 15:21:31.000000000 -0500
@@ -409,11 +409,7 @@ struct myenv
     unsigned int fpuc;
     unsigned char fptags[8];   /* 0 = valid, 1 = empty */
     union {
-#ifdef USE_X86LDOUBLE
-        CPU86_LDouble d __attribute__((aligned(16)));
-#else
-        CPU86_LDouble d;
-#endif
+        floatx80 d __attribute__((aligned(16)));
     } fpregs[8];
 
 } my_env, env_org, env_res, *env = &my_env;
@@ -673,11 +669,7 @@ static void helper_fxam_ST0(void)
     /* XXX: test fptags too */
     expdif = EXPD(temp);
     if (expdif == MAXEXPD) {
-#ifdef USE_X86LDOUBLE
         if (MANTD(temp) == 0x8000000000000000ULL)
-#else
-        if (MANTD(temp) == 0)
-#endif
             env->fpus |=  0x500 /*Infinity*/;
         else
             env->fpus |=  0x100 /*NaN*/;
--- VBoxRecompiler.c	2013-12-18 11:11:37.000000000 -0500
+++ VBoxRecompiler.c	2014-01-14 15:21:31.000000000 -0500
@@ -341,9 +341,9 @@ REMR3DECL(int) REMR3Init(PVM pVM)
     /*
      * Register ram types.
      */
-    pVM->rem.s.iMMIOMemType    = cpu_register_io_memory(g_apfnMMIORead, g_apfnMMIOWrite, &pVM->rem.s.Env);
+    pVM->rem.s.iMMIOMemType    = cpu_register_io_memory(g_apfnMMIORead, g_apfnMMIOWrite, &pVM->rem.s.Env, DEVICE_NATIVE_ENDIAN);
     AssertReleaseMsg(pVM->rem.s.iMMIOMemType >= 0, ("pVM->rem.s.iMMIOMemType=%d\n", pVM->rem.s.iMMIOMemType));
-    pVM->rem.s.iHandlerMemType = cpu_register_io_memory(g_apfnHandlerRead, g_apfnHandlerWrite, pVM);
+    pVM->rem.s.iHandlerMemType = cpu_register_io_memory(g_apfnHandlerRead, g_apfnHandlerWrite, pVM, DEVICE_NATIVE_ENDIAN);
     AssertReleaseMsg(pVM->rem.s.iHandlerMemType >= 0, ("pVM->rem.s.iHandlerMemType=%d\n", pVM->rem.s.iHandlerMemType));
     Log2(("REM: iMMIOMemType=%d iHandlerMemType=%d\n", pVM->rem.s.iMMIOMemType, pVM->rem.s.iHandlerMemType));
 
@@ -783,7 +783,7 @@ REMR3DECL(int) REMR3Step(PVM pVM, PVMCPU
      * pending interrupts and suchlike.
      */
     interrupt_request = pVM->rem.s.Env.interrupt_request;
-    Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER  | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER)));
+    Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER)));
     pVM->rem.s.Env.interrupt_request = 0;
     cpu_single_step(&pVM->rem.s.Env, 1);
 
@@ -923,7 +923,7 @@ REMR3DECL(int) REMR3EmulateInstruction(P
     if (RT_SUCCESS(rc))
     {
         int interrupt_request = pVM->rem.s.Env.interrupt_request;
-        Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER)));
+        Assert(!(interrupt_request & ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_EXTERNAL_HARD | CPU_INTERRUPT_EXTERNAL_EXIT | CPU_INTERRUPT_EXTERNAL_FLUSH_TLB | CPU_INTERRUPT_EXTERNAL_TIMER)));
 #ifdef REM_USE_QEMU_SINGLE_STEP_FOR_LOGGING
         cpu_single_step(&pVM->rem.s.Env, 0);
 #endif
@@ -2488,7 +2488,7 @@ REMR3DECL(int)  REMR3State(PVM pVM, PVMC
      * Clear old interrupt request flags; Check for pending hardware interrupts.
      * (See @remark for why we don't check for other FFs.)
      */
-    pVM->rem.s.Env.interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB | CPU_INTERRUPT_TIMER);
+    pVM->rem.s.Env.interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB);
     if (    pVM->rem.s.u32PendingInterrupt != REM_NO_PENDING_IRQ
         ||  VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
         pVM->rem.s.Env.interrupt_request |= CPU_INTERRUPT_HARD;
--- bswap.h	2013-12-18 11:11:37.000000000 -0500
+++ bswap.h	2014-01-14 15:21:31.000000000 -0500
@@ -144,6 +144,7 @@ CPU_CONVERT(le, 64, uint64_t)
 
 #define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
 #define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)
+#define cpu_to_be64wu(p, v) cpu_to_be64w(p, v)
 
 #else
 
@@ -201,6 +202,20 @@ static inline void cpu_to_be32wu(uint32_
     p1[3] = v & 0xff;
 }
 
+static inline void cpu_to_be64wu(uint64_t *p, uint64_t v)
+{
+    uint8_t *p1 = (uint8_t *)p;
+
+    p1[0] = v >> 56;
+    p1[1] = v >> 48;
+    p1[2] = v >> 40;
+    p1[3] = v >> 32;
+    p1[4] = v >> 24;
+    p1[5] = v >> 16;
+    p1[6] = v >> 8;
+    p1[7] = v & 0xff;
+}
+
 #endif
 
 #ifdef HOST_WORDS_BIGENDIAN
--- cache-utils.h	2013-12-18 11:11:37.000000000 -0500
+++ cache-utils.h	2014-01-14 15:21:31.000000000 -0500
@@ -9,7 +9,7 @@ struct qemu_cache_conf {
 
 extern struct qemu_cache_conf qemu_cache_conf;
 
-extern void qemu_cache_utils_init(char **envp);
+void qemu_cache_utils_init(char **envp);
 
 /* mildly adjusted code from tcg-dyngen.c */
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
--- compiler.h	1969-12-31 19:00:00.000000000 -0500
+++ compiler.h	2014-01-14 15:21:31.000000000 -0500
@@ -0,0 +1,34 @@
+/* public domain */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#include "config-host.h"
+
+#define QEMU_NORETURN __attribute__ ((__noreturn__))
+#ifdef CONFIG_GCC_ATTRIBUTE_WARN_UNUSED_RESULT
+#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define QEMU_WARN_UNUSED_RESULT
+#endif
+
+#define QEMU_BUILD_BUG_ON(x) \
+    typedef char qemu_build_bug_on__##__LINE__[(x)?-1:1];
+
+#if defined __GNUC__
+# if (__GNUC__ < 4) || \
+     defined(__GNUC_MINOR__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 4)
+   /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */
+#  define GCC_ATTR __attribute__((__unused__, format(printf, 1, 2)))
+#  define GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m)))
+# else
+   /* Use gnu_printf when supported (qemu uses standard format strings). */
+#  define GCC_ATTR __attribute__((__unused__, format(gnu_printf, 1, 2)))
+#  define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+# endif
+#else
+#define GCC_ATTR /**/
+#define GCC_FMT_ATTR(n, m)
+#endif
+
+#endif /* COMPILER_H */
--- cpu-all.h	2013-12-18 11:11:37.000000000 -0500
+++ cpu-all.h	2014-01-14 15:21:31.000000000 -0500
@@ -140,8 +140,7 @@ typedef union {
    endian ! */
 typedef union {
     float64 d;
-#if defined(HOST_WORDS_BIGENDIAN) \
-    || (defined(__arm__) && !defined(__VFP_FP__) && !defined(CONFIG_SOFTFLOAT))
+#if defined(HOST_WORDS_BIGENDIAN)
     struct {
         uint32_t upper;
         uint32_t lower;
@@ -155,11 +154,17 @@ typedef union {
     uint64_t ll;
 } CPU_DoubleU;
 
-#ifdef TARGET_SPARC
+typedef union {
+     floatx80 d;
+     struct {
+         uint64_t lower;
+         uint16_t upper;
+     } l;
+} CPU_LDoubleU;
+
 typedef union {
     float128 q;
-#if defined(HOST_WORDS_BIGENDIAN) \
-    || (defined(__arm__) && !defined(__VFP_FP__) && !defined(CONFIG_SOFTFLOAT))
+#if defined(HOST_WORDS_BIGENDIAN)
     struct {
         uint32_t upmost;
         uint32_t upper;
@@ -183,7 +188,6 @@ typedef union {
     } ll;
 #endif
 } CPU_QuadU;
-#endif
 
 /* CPU memory access without any memory or io remapping */
 
@@ -913,34 +917,70 @@ int page_check_range(target_ulong start,
 CPUState *cpu_copy(CPUState *env);
 CPUState *qemu_get_cpu(int cpu);
 
-void cpu_dump_state(CPUState *env, FILE *f,
-                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+#define CPU_DUMP_CODE 0x00010000
+
+void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
                     int flags);
-void cpu_dump_statistics (CPUState *env, FILE *f,
-                          int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
-                          int flags);
+void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
+                         int flags);
 
 void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...)
 #ifndef VBOX
-    __attribute__ ((__format__ (__printf__, 2, 3)));
+    GCC_FMT_ATTR(2, 3);
 #else  /* VBOX */
     ;
 #endif /* VBOX */
 extern CPUState *first_cpu;
 extern CPUState *cpu_single_env;
 
-#define CPU_INTERRUPT_HARD   0x02 /* hardware interrupt pending */
-#define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
-#define CPU_INTERRUPT_TIMER  0x08 /* internal timer exception pending */
-#define CPU_INTERRUPT_FIQ    0x10 /* Fast interrupt pending.  */
-#define CPU_INTERRUPT_HALT   0x20 /* CPU halt wanted */
-#define CPU_INTERRUPT_SMI    0x40 /* (x86 only) SMI interrupt pending */
-#define CPU_INTERRUPT_DEBUG  0x80 /* Debug event occured.  */
-#define CPU_INTERRUPT_VIRQ   0x100 /* virtual interrupt pending.  */
-#define CPU_INTERRUPT_NMI    0x200 /* NMI pending. */
-#define CPU_INTERRUPT_INIT   0x400 /* INIT pending. */
-#define CPU_INTERRUPT_SIPI   0x800 /* SIPI pending. */
-#define CPU_INTERRUPT_MCE    0x1000 /* (x86 only) MCE pending. */
+/* Flags for use in ENV->INTERRUPT_PENDING.
+
+   The numbers assigned here are non-sequential in order to preserve
+   binary compatibility with the vmstate dump.  Bit 0 (0x0001) was
+   previously used for CPU_INTERRUPT_EXIT, and is cleared when loading
+   the vmstate dump.  */
+
+/* External hardware interrupt pending.  This is typically used for
+   interrupts from devices.  */
+#define CPU_INTERRUPT_HARD        0x0002
+
+/* Exit the current TB.  This is typically used when some system-level device
+   makes some change to the memory mapping.  E.g. the a20 line change.  */
+#define CPU_INTERRUPT_EXITTB      0x0004
+
+/* Halt the CPU.  */
+#define CPU_INTERRUPT_HALT        0x0020
+
+/* Debug event pending.  */
+#define CPU_INTERRUPT_DEBUG       0x0080
+
+/* Several target-specific external hardware interrupts.  Each target/cpu.h
+   should define proper names based on these defines.  */
+#define CPU_INTERRUPT_TGT_EXT_0   0x0008
+#define CPU_INTERRUPT_TGT_EXT_1   0x0010
+#define CPU_INTERRUPT_TGT_EXT_2   0x0040
+#define CPU_INTERRUPT_TGT_EXT_3   0x0200
+#define CPU_INTERRUPT_TGT_EXT_4   0x1000
+
+/* Several target-specific internal interrupts.  These differ from the
+   preceeding target-specific interrupts in that they are intended to
+   originate from within the cpu itself, typically in response to some
+   instruction being executed.  These, therefore, are not masked while
+   single-stepping within the debugger.  */
+#define CPU_INTERRUPT_TGT_INT_0   0x0100
+#define CPU_INTERRUPT_TGT_INT_1   0x0400
+#define CPU_INTERRUPT_TGT_INT_2   0x0800
+
+/* First unused bit: 0x2000.  */
+
+/* The set of all bits that should be masked when single-stepping.  */
+#define CPU_INTERRUPT_SSTEP_MASK \
+    (CPU_INTERRUPT_HARD          \
+     | CPU_INTERRUPT_TGT_EXT_0   \
+     | CPU_INTERRUPT_TGT_EXT_1   \
+     | CPU_INTERRUPT_TGT_EXT_2   \
+     | CPU_INTERRUPT_TGT_EXT_3   \
+     | CPU_INTERRUPT_TGT_EXT_4)
 
 #ifdef VBOX
 /** Executes a single instruction. cpu_exec() will normally return EXCP_SINGLE_INSTR. */
@@ -960,12 +1000,25 @@ extern CPUState *cpu_single_env;
 /** Exit current TB to process an external DMA request. */
 # define CPU_INTERRUPT_EXTERNAL_DMA             0x80000000
 #endif /* VBOX */
-void cpu_interrupt(CPUState *s, int mask);
+
+#ifndef CONFIG_USER_ONLY
+typedef void (*CPUInterruptHandler)(CPUState *, int);
+
+extern CPUInterruptHandler cpu_interrupt_handler;
+
+static inline void cpu_interrupt(CPUState *s, int mask)
+{
+    cpu_interrupt_handler(s, mask);
+}
+#else /* USER_ONLY */
+void cpu_interrupt(CPUState *env, int mask);
+#endif /* USER_ONLY */
+
 void cpu_reset_interrupt(CPUState *env, int mask);
 
 void cpu_exit(CPUState *s);
 
-int qemu_cpu_has_work(CPUState *env);
+bool qemu_cpu_has_work(CPUState *env);
 
 /* Breakpoint/watchpoint flags */
 #define BP_MEM_READ           0x01
@@ -1035,10 +1088,14 @@ extern int phys_ram_fd;
 extern ram_addr_t ram_size;
 #endif /* !VBOX */
 
+/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
+#define RAM_PREALLOC_MASK   (1 << 0)
+
 typedef struct RAMBlock {
     uint8_t *host;
     ram_addr_t offset;
     ram_addr_t length;
+    uint32_t flags;
     char idstr[256];
     QLIST_ENTRY(RAMBlock) next;
 #if defined(__linux__) && !defined(TARGET_S390X)
@@ -1167,16 +1224,18 @@ int cpu_physical_memory_get_dirty_tracki
 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
                                    target_phys_addr_t end_addr);
 
-void dump_exec_info(FILE *f,
-                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+int cpu_physical_log_start(target_phys_addr_t start_addr,
+                           ram_addr_t size);
+
+int cpu_physical_log_stop(target_phys_addr_t start_addr,
+                          ram_addr_t size);
+
+void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 #endif /* !CONFIG_USER_ONLY */
 
 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
                         uint8_t *buf, int len, int is_write);
 
-void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
-                        uint64_t mcg_status, uint64_t addr, uint64_t misc);
-
 #ifdef VBOX
 void tb_invalidate_virt(CPUState *env, uint32_t eip);
 #endif /* VBOX */
--- cpu-common.h	2013-12-18 11:11:37.000000000 -0500
+++ cpu-common.h	2014-01-14 15:21:31.000000000 -0500
@@ -20,6 +20,12 @@
 
 #if !defined(CONFIG_USER_ONLY)
 
+enum device_endian {
+    DEVICE_NATIVE_ENDIAN,
+    DEVICE_BIG_ENDIAN,
+    DEVICE_LITTLE_ENDIAN,
+};
+
 /* address in the RAM (different from a physical address) */
 typedef uintptr_t ram_addr_t;
 
@@ -28,10 +34,21 @@ typedef uintptr_t ram_addr_t;
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
 
-void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
-                                         ram_addr_t size,
-                                         ram_addr_t phys_offset,
-                                         ram_addr_t region_offset);
+void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
+                                      ram_addr_t size,
+                                      ram_addr_t phys_offset,
+                                      ram_addr_t region_offset,
+                                      bool log_dirty);
+
+static inline void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
+                                                       ram_addr_t size,
+                                                       ram_addr_t phys_offset,
+                                                       ram_addr_t region_offset)
+{
+    cpu_register_physical_memory_log(start_addr, size, phys_offset,
+                                     region_offset, false);
+}
+
 static inline void cpu_register_physical_memory(target_phys_addr_t start_addr,
                                                 ram_addr_t size,
                                                 ram_addr_t phys_offset)
@@ -45,28 +62,36 @@ ram_addr_t qemu_ram_alloc_from_ptr(Devic
                         ram_addr_t size, void *host);
 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size);
 void qemu_ram_free(ram_addr_t addr);
+void qemu_ram_free_from_ptr(ram_addr_t addr);
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should only be used for ram local to a device.  */
 void *qemu_get_ram_ptr(ram_addr_t addr);
+void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size);
+/* Same but slower, to use for migration, where the order of
+ * RAMBlocks must not change. */
+void *qemu_safe_ram_ptr(ram_addr_t addr);
+void qemu_put_ram_ptr(void *addr);
 /* This should not be used by devices.  */
-ram_addr_t qemu_ram_addr_from_host(void *ptr);
+int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 #endif /* !VBOX */
 
 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
                            CPUWriteMemoryFunc * const *mem_write,
-                           void *opaque);
+                           void *opaque, enum device_endian endian);
 void cpu_unregister_io_memory(int table_address);
 
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                             int len, int is_write);
 static inline void cpu_physical_memory_read(target_phys_addr_t addr,
-                                            uint8_t *buf, int len)
+                                            void *buf, int len)
 {
     cpu_physical_memory_rw(addr, buf, len, 0);
 }
 static inline void cpu_physical_memory_write(target_phys_addr_t addr,
-                                             const uint8_t *buf, int len)
+                                             const void *buf, int len)
 {
-    cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1);
+    cpu_physical_memory_rw(addr, (void *)buf, len, 1);
 }
 void *cpu_physical_memory_map(target_phys_addr_t addr,
                               target_phys_addr_t *plen,
@@ -82,12 +107,17 @@ struct CPUPhysMemoryClient {
     void (*set_memory)(struct CPUPhysMemoryClient *client,
                        target_phys_addr_t start_addr,
                        ram_addr_t size,
-                       ram_addr_t phys_offset);
+                       ram_addr_t phys_offset,
+                       bool log_dirty);
     int (*sync_dirty_bitmap)(struct CPUPhysMemoryClient *client,
                              target_phys_addr_t start_addr,
                              target_phys_addr_t end_addr);
     int (*migration_log)(struct CPUPhysMemoryClient *client,
                          int enable);
+    int (*log_start)(struct CPUPhysMemoryClient *client,
+                     target_phys_addr_t phys_addr, ram_addr_t size);
+    int (*log_stop)(struct CPUPhysMemoryClient *client,
+                    target_phys_addr_t phys_addr, ram_addr_t size);
     QLIST_ENTRY(CPUPhysMemoryClient) list;
 };
 
@@ -106,15 +136,30 @@ void qemu_unregister_coalesced_mmio(targ
 void qemu_flush_coalesced_mmio_buffer(void);
 
 uint32_t ldub_phys(target_phys_addr_t addr);
+uint32_t lduw_le_phys(target_phys_addr_t addr);
+uint32_t lduw_be_phys(target_phys_addr_t addr);
+uint32_t ldl_le_phys(target_phys_addr_t addr);
+uint32_t ldl_be_phys(target_phys_addr_t addr);
+uint64_t ldq_le_phys(target_phys_addr_t addr);
+uint64_t ldq_be_phys(target_phys_addr_t addr);
+void stb_phys(target_phys_addr_t addr, uint32_t val);
+void stw_le_phys(target_phys_addr_t addr, uint32_t val);
+void stw_be_phys(target_phys_addr_t addr, uint32_t val);
+void stl_le_phys(target_phys_addr_t addr, uint32_t val);
+void stl_be_phys(target_phys_addr_t addr, uint32_t val);
+void stq_le_phys(target_phys_addr_t addr, uint64_t val);
+void stq_be_phys(target_phys_addr_t addr, uint64_t val);
+
+#ifdef NEED_CPU_H
 uint32_t lduw_phys(target_phys_addr_t addr);
 uint32_t ldl_phys(target_phys_addr_t addr);
 uint64_t ldq_phys(target_phys_addr_t addr);
 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val);
 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val);
-void stb_phys(target_phys_addr_t addr, uint32_t val);
 void stw_phys(target_phys_addr_t addr, uint32_t val);
 void stl_phys(target_phys_addr_t addr, uint32_t val);
 void stq_phys(target_phys_addr_t addr, uint64_t val);
+#endif
 
 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
                                    const uint8_t *buf, int len);
--- cpu-defs.h	2013-12-18 11:11:37.000000000 -0500
+++ cpu-defs.h	2014-01-14 15:21:31.000000000 -0500
@@ -51,16 +51,22 @@
 
 #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
 
+typedef int16_t target_short __attribute__ ((aligned(TARGET_SHORT_ALIGNMENT)));
+typedef uint16_t target_ushort __attribute__((aligned(TARGET_SHORT_ALIGNMENT)));
+typedef int32_t target_int __attribute__((aligned(TARGET_INT_ALIGNMENT)));
+typedef uint32_t target_uint __attribute__((aligned(TARGET_INT_ALIGNMENT)));
+typedef int64_t target_llong __attribute__((aligned(TARGET_LLONG_ALIGNMENT)));
+typedef uint64_t target_ullong __attribute__((aligned(TARGET_LLONG_ALIGNMENT)));
 /* target_ulong is the type of a virtual address */
 #if TARGET_LONG_SIZE == 4
-typedef int32_t target_long;
-typedef uint32_t target_ulong;
+typedef int32_t target_long __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
+typedef uint32_t target_ulong __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
 #define TARGET_FMT_lx "%08x"
 #define TARGET_FMT_ld "%d"
 #define TARGET_FMT_lu "%u"
 #elif TARGET_LONG_SIZE == 8
-typedef int64_t target_long;
-typedef uint64_t target_ulong;
+typedef int64_t target_long __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
+typedef uint64_t target_ulong __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
 #define TARGET_FMT_lx "%016" PRIx64
 #define TARGET_FMT_ld "%" PRId64
 #define TARGET_FMT_lu "%" PRIu64
@@ -217,6 +223,7 @@ typedef struct CPUWatchpoint {
     int nr_cores;  /* number of cores within this CPU package */        \
     int nr_threads;/* number of threads within this CPU */              \
     int running; /* Nonzero if cpu is currently running(usermode).  */  \
+    int thread_id;                                                      \
     /* user data */                                                     \
     void *opaque;                                                       \
                                                                         \
@@ -225,6 +232,7 @@ typedef struct CPUWatchpoint {
     uint32_t stopped; /* Artificially stopped */                        \
     struct QemuThread *thread;                                          \
     struct QemuCond *halt_cond;                                         \
+    int thread_kicked;                                                  \
     struct qemu_work_item *queued_work_first, *queued_work_last;        \
     const char *cpu_model_str;                                          \
     struct KVMState *kvm_state;                                         \
--- cpu-exec.c	2013-12-18 11:11:37.000000000 -0500
+++ cpu-exec.c	2014-01-14 15:21:31.000000000 -0500
@@ -27,45 +27,21 @@
  */
 
 #include "config.h"
-#include "exec.h"
+#include "cpu.h"
 #include "disas.h"
 #include "tcg.h"
-#include "kvm.h"
 #include "qemu-barrier.h"
 
-#if !defined(CONFIG_SOFTMMU)
-#undef EAX
-#undef ECX
-#undef EDX
-#undef EBX
-#undef ESP
-#undef EBP
-#undef ESI
-#undef EDI
-#undef EIP
-#include <signal.h>
-#ifdef __linux__
-#include <sys/ucontext.h>
-#endif
-#endif
-
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-// Work around ugly bugs in glibc that mangle global register contents
-#undef env
-#define env cpu_single_env
-#endif
-
 int tb_invalidated_flag;
 
 //#define CONFIG_DEBUG_EXEC
-//#define DEBUG_SIGNAL
 
-int qemu_cpu_has_work(CPUState *env)
+bool qemu_cpu_has_work(CPUState *env)
 {
     return cpu_has_work(env);
 }
 
-void cpu_loop_exit(void)
+void cpu_loop_exit(CPUState *env)
 {
     env->current_tb = NULL;
     longjmp(env->jmp_env, 1);
@@ -74,41 +50,20 @@ void cpu_loop_exit(void)
 /* exit the current TB from a signal handler. The host registers are
    restored in a state compatible with the CPU emulator
  */
-void cpu_resume_from_signal(CPUState *env1, void *puc)
+#if defined(CONFIG_SOFTMMU)
+void cpu_resume_from_signal(CPUState *env, void *puc)
 {
-#if !defined(CONFIG_SOFTMMU)
-#ifdef __linux__
-    struct ucontext *uc = puc;
-#elif defined(__OpenBSD__)
-    struct sigcontext *uc = puc;
-#endif
-#endif
-
-    env = env1;
-
     /* XXX: restore cpu registers saved in host registers */
 
-#if !defined(CONFIG_SOFTMMU)
-    if (puc) {
-        /* XXX: use siglongjmp ? */
-#ifdef __linux__
-#ifdef __ia64
-        sigprocmask(SIG_SETMASK, (sigset_t *)&uc->uc_sigmask, NULL);
-#else
-        sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
-#endif
-#elif defined(__OpenBSD__)
-        sigprocmask(SIG_SETMASK, &uc->sc_mask, NULL);
-#endif
-    }
-#endif
     env->exception_index = -1;
     longjmp(env->jmp_env, 1);
 }
+#endif
 
 /* Execute the code without caching the generated code. An interpreter
    could be used if available. */
-static void cpu_exec_nocache(int max_cycles, TranslationBlock *orig_tb)
+static void cpu_exec_nocache(CPUState *env, int max_cycles,
+                             TranslationBlock *orig_tb)
 {
     uintptr_t next_tb;
     TranslationBlock *tb;
@@ -122,11 +77,7 @@ static void cpu_exec_nocache(int max_cyc
                      max_cycles);
     env->current_tb = tb;
     /* execute the generated code */
-#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM)
-    tcg_qemu_tb_exec(tb->tc_ptr, next_tb);
-#else
-    next_tb = tcg_qemu_tb_exec(tb->tc_ptr);
-#endif
+    next_tb = tcg_qemu_tb_exec(env, tb->tc_ptr);
     env->current_tb = NULL;
 
     if ((next_tb & 3) == 2) {
@@ -138,7 +89,8 @@ static void cpu_exec_nocache(int max_cyc
     tb_free(tb);
 }
 
-static TranslationBlock *tb_find_slow(target_ulong pc,
+static TranslationBlock *tb_find_slow(CPUState *env,
+                                      target_ulong pc,
                                       target_ulong cs_base,
                                       uint64_t flags)
 {
@@ -181,12 +133,18 @@ static TranslationBlock *tb_find_slow(ta
     tb = tb_gen_code(env, pc, cs_base, flags, 0);
 
  found:
+    /* Move the last found TB to the head of the list */
+    if (likely(*ptb1)) {
+        *ptb1 = tb->phys_hash_next;
+        tb->phys_hash_next = tb_phys_hash[h];
+        tb_phys_hash[h] = tb;
+    }
     /* we add the TB in the virtual pc hash table */
     env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
     return tb;
 }
 
-static inline TranslationBlock *tb_find_fast(void)
+static inline TranslationBlock *tb_find_fast(CPUState *env)
 {
     TranslationBlock *tb;
     target_ulong cs_base, pc;
@@ -199,7 +157,7 @@ static inline TranslationBlock *tb_find_
     tb = env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
     if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
                  tb->flags != flags)) {
-        tb = tb_find_slow(pc, cs_base, flags);
+        tb = tb_find_slow(env, pc, cs_base, flags);
     }
     return tb;
 }
@@ -218,53 +176,48 @@ static void cpu_handle_debug_exception(C
 {
     CPUWatchpoint *wp;
 
-    if (!env->watchpoint_hit)
-        QTAILQ_FOREACH(wp, &env->watchpoints, entry)
+    if (!env->watchpoint_hit) {
+        QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
             wp->flags &= ~BP_WATCHPOINT_HIT;
-
-    if (debug_excp_handler)
+        }
+    }
+    if (debug_excp_handler) {
         debug_excp_handler(env);
+    }
 }
 
 /* main execution loop */
 
 volatile sig_atomic_t exit_request;
 
-int cpu_exec(CPUState *env1)
+int cpu_exec(CPUState *env)
 {
-    volatile host_reg_t saved_env_reg;
     int ret VBOX_ONLY(= 0), interrupt_request;
     TranslationBlock *tb;
     uint8_t *tc_ptr;
     uintptr_t next_tb;
 
 # ifndef VBOX
-    if (cpu_halted(env1) == EXCP_HALTED)
-        return EXCP_HALTED;
+    if (env->halted) {
+        if (!cpu_has_work(env)) {
+            return EXCP_HALTED;
+        }
+        env->halted = 0;
+    }
 # endif /* !VBOX */
 
-    cpu_single_env = env1;
-
-    /* the access to env below is actually saving the global register's
-       value, so that files not including target-xyz/exec.h are free to
-       use it.  */
-    QEMU_BUILD_BUG_ON (sizeof (saved_env_reg) != sizeof (env));
-    saved_env_reg = (host_reg_t) env;
-    barrier();
-    env = env1;
+    cpu_single_env = env;
 
     if (unlikely(exit_request)) {
         env->exit_request = 1;
     }
 
 #if defined(TARGET_I386)
-    if (!kvm_enabled()) {
-        /* put eflags in CPU temporary format */
-        CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
-        DF = 1 - (2 * ((env->eflags >> 10) & 1));
-        CC_OP = CC_OP_EFLAGS;
-        env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
-    }
+    /* put eflags in CPU temporary format */
+    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((env->eflags >> 10) & 1));
+    CC_OP = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_SPARC)
 #elif defined(TARGET_M68K)
     env->cc_op = CC_OP_FLAGS;
@@ -272,7 +225,9 @@ int cpu_exec(CPUState *env1)
     env->cc_x = (env->sr >> 4) & 1;
 #elif defined(TARGET_ALPHA)
 #elif defined(TARGET_ARM)
+#elif defined(TARGET_UNICORE32)
 #elif defined(TARGET_PPC)
+#elif defined(TARGET_LM32)
 #elif defined(TARGET_MICROBLAZE)
 #elif defined(TARGET_MIPS)
 #elif defined(TARGET_SH4)
@@ -289,11 +244,6 @@ int cpu_exec(CPUState *env1)
     /* prepare setjmp context for exception handling */
     for(;;) {
         if (setjmp(env->jmp_env) == 0) {
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-#undef env
-                    env = cpu_single_env;
-#define env cpu_single_env
-#endif
 #ifdef VBOX
             env->current_tb = NULL; /* probably not needed, but whatever... */
 
@@ -304,7 +254,7 @@ int cpu_exec(CPUState *env1)
                 env->exception_index = EXCP_RC;
                 ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_RC);
                 ret = env->exception_index;
-                cpu_loop_exit();
+                cpu_loop_exit(env);
             }
 #endif
 
@@ -316,8 +266,9 @@ int cpu_exec(CPUState *env1)
 #ifdef VBOX /* because of the above stuff */
                     env->exception_index = -1;
 #endif
-                    if (ret == EXCP_DEBUG)
+                    if (ret == EXCP_DEBUG) {
                         cpu_handle_debug_exception(env);
+                    }
                     break;
                 } else {
 #if defined(CONFIG_USER_ONLY)
@@ -325,97 +276,46 @@ int cpu_exec(CPUState *env1)
                        which will be handled outside the cpu execution
                        loop */
 #if defined(TARGET_I386)
-                    do_interrupt_user(env->exception_index,
-                                      env->exception_is_int,
-                                      env->error_code,
-                                      env->exception_next_eip);
-                    /* successfully delivered */
-                    env->old_exception = -1;
+                    do_interrupt(env);
 #endif
                     ret = env->exception_index;
                     break;
 #else
-#if defined(TARGET_I386)
-                    /* simulate a real cpu exception. On i386, it can
-                       trigger new exceptions, but we do not handle
-                       double or triple faults yet. */
 #  ifdef VBOX
                     RAWEx_ProfileStart(env, STATS_IRQ_HANDLING);
                     Log(("do_interrupt: vec=%#x int=%d pc=%04x:%RGv\n", env->exception_index, env->exception_is_int,
                          env->segs[R_CS].selector, (RTGCPTR)env->exception_next_eip));
 #  endif /* VBOX */
-#  ifdef IEM_VERIFICATION_MODE /* Ugly hack*/
-                    do_interrupt(env->exception_index,
-                                 env->exception_is_int && env->exception_is_int != 0x42,
-                                 env->error_code,
-                                 env->exception_next_eip,
-                                 env->exception_is_int == 0x42);
-#  else
-                    do_interrupt(env->exception_index,
-                                 env->exception_is_int,
-                                 env->error_code,
-                                 env->exception_next_eip, 0);
-#  endif
-                    /* successfully delivered */
-                    env->old_exception = -1;
+                    do_interrupt(env);
 #  ifdef VBOX
                     RAWEx_ProfileStop(env, STATS_IRQ_HANDLING);
 #  endif /* VBOX */
-#elif defined(TARGET_PPC)
-                    do_interrupt(env);
-#elif defined(TARGET_MICROBLAZE)
-                    do_interrupt(env);
-#elif defined(TARGET_MIPS)
-                    do_interrupt(env);
-#elif defined(TARGET_SPARC)
-                    do_interrupt(env);
-#elif defined(TARGET_ARM)
-                    do_interrupt(env);
-#elif defined(TARGET_SH4)
-                    do_interrupt(env);
-#elif defined(TARGET_ALPHA)
-                    do_interrupt(env);
-#elif defined(TARGET_CRIS)
-                    do_interrupt(env);
-#elif defined(TARGET_M68K)
-                    do_interrupt(0);
-#endif
                     env->exception_index = -1;
 #endif
                 }
             }
 
-# ifndef VBOX
-            if (kvm_enabled()) {
-                kvm_cpu_exec(env);
-                longjmp(env->jmp_env, 1);
-            }
-# endif /* !VBOX */
-
             next_tb = 0; /* force lookup of first TB */
             for(;;) {
                 interrupt_request = env->interrupt_request;
                 if (unlikely(interrupt_request)) {
                     if (unlikely(env->singlestep_enabled & SSTEP_NOIRQ)) {
                         /* Mask out external interrupts for this step. */
-                        interrupt_request &= ~(CPU_INTERRUPT_HARD |
-                                               CPU_INTERRUPT_FIQ |
-                                               CPU_INTERRUPT_SMI |
-                                               CPU_INTERRUPT_NMI);
+                        interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
                     }
                     if (interrupt_request & CPU_INTERRUPT_DEBUG) {
                         env->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
                         env->exception_index = EXCP_DEBUG;
-                        cpu_loop_exit();
+                        cpu_loop_exit(env);
                     }
 #if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
     defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
-    defined(TARGET_MICROBLAZE)
+    defined(TARGET_MICROBLAZE) || defined(TARGET_LM32) || defined(TARGET_UNICORE32)
                     if (interrupt_request & CPU_INTERRUPT_HALT) {
                         env->interrupt_request &= ~CPU_INTERRUPT_HALT;
                         env->halted = 1;
                         env->exception_index = EXCP_HLT;
-                        cpu_loop_exit();
+                        cpu_loop_exit(env);
                     }
 #endif
 #if defined(TARGET_I386)
@@ -448,42 +348,42 @@ int cpu_exec(CPUState *env1)
                                )
                             {
                                 env->exception_index = ret = EXCP_SINGLE_INSTR;
-                                cpu_loop_exit();
+                                cpu_loop_exit(env);
                             }
                         }
                         /* Clear CPU_INTERRUPT_SINGLE_INSTR and leave CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT set. */
                         ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_SINGLE_INSTR);
 #  ifdef IEM_VERIFICATION_MODE
                         env->exception_index = ret = EXCP_SINGLE_INSTR;
-                        cpu_loop_exit();
+                        cpu_loop_exit(env);
 #  endif
                     }
 # endif /* VBOX */
 
 # ifndef VBOX /** @todo reconcile our code with the following...  */
                     if (interrupt_request & CPU_INTERRUPT_INIT) {
-                            svm_check_intercept(SVM_EXIT_INIT);
+                            svm_check_intercept(env, SVM_EXIT_INIT);
                             do_cpu_init(env);
                             env->exception_index = EXCP_HALTED;
-                            cpu_loop_exit();
+                            cpu_loop_exit(env);
                     } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
                             do_cpu_sipi(env);
                     } else if (env->hflags2 & HF2_GIF_MASK) {
                         if ((interrupt_request & CPU_INTERRUPT_SMI) &&
                             !(env->hflags & HF_SMM_MASK)) {
-                            svm_check_intercept(SVM_EXIT_SMI);
+                            svm_check_intercept(env, SVM_EXIT_SMI);
                             env->interrupt_request &= ~CPU_INTERRUPT_SMI;
-                            do_smm_enter();
+                            do_smm_enter(env);
                             next_tb = 0;
                         } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
                                    !(env->hflags2 & HF2_NMI_MASK)) {
                             env->interrupt_request &= ~CPU_INTERRUPT_NMI;
                             env->hflags2 |= HF2_NMI_MASK;
-                            do_interrupt(EXCP02_NMI, 0, 0, 0, 1);
+                            do_interrupt_x86_hardirq(env, EXCP02_NMI, 1);
                             next_tb = 0;
 			} else if (interrupt_request & CPU_INTERRUPT_MCE) {
                             env->interrupt_request &= ~CPU_INTERRUPT_MCE;
-                            do_interrupt(EXCP12_MCHK, 0, 0, 0, 0);
+                            do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0);
                             next_tb = 0;
                         } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
                                    (((env->hflags2 & HF2_VINTR_MASK) &&
@@ -492,16 +392,11 @@ int cpu_exec(CPUState *env1)
                                      (env->eflags & IF_MASK &&
                                       !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
                             int intno;
-                            svm_check_intercept(SVM_EXIT_INTR);
+                            svm_check_intercept(env, SVM_EXIT_INTR);
                             env->interrupt_request &= ~(CPU_INTERRUPT_HARD | CPU_INTERRUPT_VIRQ);
                             intno = cpu_get_pic_interrupt(env);
                             qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing hardware INT=0x%02x\n", intno);
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-#undef env
-                    env = cpu_single_env;
-#define env cpu_single_env
-#endif
-                            do_interrupt(intno, 0, 0, 0, 1);
+                            do_interrupt_x86_hardirq(env, intno, 1);
                             /* ensure that no TB jump will be modified as
                                the program flow was changed */
                             next_tb = 0;
@@ -511,10 +406,10 @@ int cpu_exec(CPUState *env1)
                                    !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
                             int intno;
                             /* FIXME: this should respect TPR */
-                            svm_check_intercept(SVM_EXIT_VINTR);
+                            svm_check_intercept(env, SVM_EXIT_VINTR);
                             intno = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_vector));
                             qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno);
-                            do_interrupt(intno, 0, 0, 0, 1);
+                            do_interrupt_x86_hardirq(env, intno, 1);
                             env->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
                             next_tb = 0;
 #endif
@@ -525,7 +420,7 @@ int cpu_exec(CPUState *env1)
                     if ((interrupt_request & CPU_INTERRUPT_SMI) &&
                         !(env->hflags & HF_SMM_MASK)) {
                         env->interrupt_request &= ~CPU_INTERRUPT_SMI;
-                        do_smm_enter();
+                        do_smm_enter(env);
                         next_tb = 0;
                     }
                     else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
@@ -539,7 +434,7 @@ int cpu_exec(CPUState *env1)
                         if (intno >= 0)
                         {
                             Log(("do_interrupt %d\n", intno));
-                            do_interrupt(intno, 0, 0, 0, 1);
+                            do_interrupt_x86_hardirq(env, intno, 1);
                         }
                         /* ensure that no TB jump will be modified as
                            the program flow was changed */
@@ -558,6 +453,13 @@ int cpu_exec(CPUState *env1)
                             env->interrupt_request &= ~CPU_INTERRUPT_HARD;
                         next_tb = 0;
                     }
+#elif defined(TARGET_LM32)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD)
+                        && (env->ie & IE_IE)) {
+                        env->exception_index = EXCP_IRQ;
+                        do_interrupt(env);
+                        next_tb = 0;
+                    }
 #elif defined(TARGET_MICROBLAZE)
                     if ((interrupt_request & CPU_INTERRUPT_HARD)
                         && (env->sregs[SR_MSR] & MSR_IE)
@@ -569,11 +471,7 @@ int cpu_exec(CPUState *env1)
                     }
 #elif defined(TARGET_MIPS)
                     if ((interrupt_request & CPU_INTERRUPT_HARD) &&
-                        (env->CP0_Status & env->CP0_Cause & CP0Ca_IP_mask) &&
-                        (env->CP0_Status & (1 << CP0St_IE)) &&
-                        !(env->CP0_Status & (1 << CP0St_EXL)) &&
-                        !(env->CP0_Status & (1 << CP0St_ERL)) &&
-                        !(env->hflags & MIPS_HFLAG_DM)) {
+                        cpu_mips_hw_interrupts_pending(env)) {
                         /* Raise it */
                         env->exception_index = EXCP_EXT_INTERRUPT;
                         env->error_code = 0;
@@ -595,9 +493,6 @@ int cpu_exec(CPUState *env1)
                                 next_tb = 0;
                             }
                         }
-		    } else if (interrupt_request & CPU_INTERRUPT_TIMER) {
-			//do_interrupt(0, 0, 0, 0, 0);
-			env->interrupt_request &= ~CPU_INTERRUPT_TIMER;
 		    }
 #elif defined(TARGET_ARM)
                     if (interrupt_request & CPU_INTERRUPT_FIQ
@@ -612,7 +507,7 @@ int cpu_exec(CPUState *env1)
                        jump normally, then does the exception return when the
                        CPU tries to execute code at the magic address.
                        This will cause the magic PC value to be pushed to
-                       the stack if an interrupt occured at the wrong time.
+                       the stack if an interrupt occurred at the wrong time.
                        We avoid this by disabling interrupts when
                        pc contains a magic address.  */
                     if (interrupt_request & CPU_INTERRUPT_HARD
@@ -622,16 +517,49 @@ int cpu_exec(CPUState *env1)
                         do_interrupt(env);
                         next_tb = 0;
                     }
-#elif defined(TARGET_SH4)
-                    if (interrupt_request & CPU_INTERRUPT_HARD) {
+#elif defined(TARGET_UNICORE32)
+                    if (interrupt_request & CPU_INTERRUPT_HARD
+                        && !(env->uncached_asr & ASR_I)) {
                         do_interrupt(env);
                         next_tb = 0;
                     }
-#elif defined(TARGET_ALPHA)
+#elif defined(TARGET_SH4)
                     if (interrupt_request & CPU_INTERRUPT_HARD) {
                         do_interrupt(env);
                         next_tb = 0;
                     }
+#elif defined(TARGET_ALPHA)
+                    {
+                        int idx = -1;
+                        /* ??? This hard-codes the OSF/1 interrupt levels.  */
+		        switch (env->pal_mode ? 7 : env->ps & PS_INT_MASK) {
+                        case 0 ... 3:
+                            if (interrupt_request & CPU_INTERRUPT_HARD) {
+                                idx = EXCP_DEV_INTERRUPT;
+                            }
+                            /* FALLTHRU */
+                        case 4:
+                            if (interrupt_request & CPU_INTERRUPT_TIMER) {
+                                idx = EXCP_CLK_INTERRUPT;
+                            }
+                            /* FALLTHRU */
+                        case 5:
+                            if (interrupt_request & CPU_INTERRUPT_SMP) {
+                                idx = EXCP_SMP_INTERRUPT;
+                            }
+                            /* FALLTHRU */
+                        case 6:
+                            if (interrupt_request & CPU_INTERRUPT_MCHK) {
+                                idx = EXCP_MCHK;
+                            }
+                        }
+                        if (idx >= 0) {
+                            env->exception_index = idx;
+                            env->error_code = 0;
+                            do_interrupt(env);
+                            next_tb = 0;
+                        }
+                    }
 #elif defined(TARGET_CRIS)
                     if (interrupt_request & CPU_INTERRUPT_HARD
                         && (env->pregs[PR_CCS] & I_FLAG)
@@ -656,11 +584,17 @@ int cpu_exec(CPUState *env1)
                            provide/save the vector when the interrupt is
                            first signalled.  */
                         env->exception_index = env->pending_vector;
-                        do_interrupt(1);
+                        do_interrupt_m68k_hardirq(env);
+                        next_tb = 0;
+                    }
+#elif defined(TARGET_S390X) && !defined(CONFIG_USER_ONLY)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                        (env->psw.mask & PSW_MASK_EXT)) {
+                        do_interrupt(env);
                         next_tb = 0;
                     }
 #endif
-                   /* Don't use the cached interupt_request value,
+                   /* Don't use the cached interrupt_request value,
                       do_interrupt may have updated the EXITTB flag. */
                     if (env->interrupt_request & CPU_INTERRUPT_EXITTB) {
 #ifndef VBOX
@@ -678,7 +612,7 @@ int cpu_exec(CPUState *env1)
                         env->exception_index = EXCP_RC;
                         ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_RC);
                         ret = env->exception_index;
-                        cpu_loop_exit();
+                        cpu_loop_exit(env);
                     }
                     if (interrupt_request & (CPU_INTERRUPT_EXTERNAL_EXIT)) {
                         ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~(CPU_INTERRUPT_EXTERNAL_EXIT));
@@ -689,7 +623,7 @@ int cpu_exec(CPUState *env1)
                 if (unlikely(env->exit_request)) {
                     env->exit_request = 0;
                     env->exception_index = EXCP_INTERRUPT;
-                    cpu_loop_exit();
+                    cpu_loop_exit(env);
                 }
 
 #ifdef VBOX
@@ -704,7 +638,7 @@ int cpu_exec(CPUState *env1)
                 {
                     RAWEx_ProfileStop(env, STATS_RAW_CHECK);
                     ret = env->exception_index;
-                    cpu_loop_exit();
+                    cpu_loop_exit(env);
                 }
                 RAWEx_ProfileStop(env, STATS_RAW_CHECK);
 #endif /* VBOX */
@@ -713,7 +647,8 @@ int cpu_exec(CPUState *env1)
                 if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
                     /* restore flags in standard format */
 #if defined(TARGET_I386)
-                    env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+                    env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
+                        | (DF & DF_MASK);
                     log_cpu_state(env, X86_DUMP_CCOP);
                     env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_M68K)
@@ -731,7 +666,7 @@ int cpu_exec(CPUState *env1)
                 RAWEx_ProfileStart(env, STATS_TLB_LOOKUP);
 #endif /*VBOX*/
                 spin_lock(&tb_lock);
-                tb = tb_find_fast();
+                tb = tb_find_fast(env);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
                    doing it in tb_find_slow */
                 if (tb_invalidated_flag) {
@@ -772,21 +707,12 @@ int cpu_exec(CPUState *env1)
                 /* execute the generated code */
 #ifdef VBOX
                     RAWEx_ProfileStart(env, STATS_QEMU_RUN_EMULATED_CODE);
-#endif
-#if defined(__sparc__) && !defined(CONFIG_SOLARIS)
-#undef env
-                    env = cpu_single_env;
-#define env cpu_single_env
-#endif
                     Log5(("REM: tb=%p tc_ptr=%p %04x:%08RGv\n", tb, tc_ptr, env->segs[R_CS].selector, (RTGCPTR)env->eip));
-#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM)
-                    tcg_qemu_tb_exec(tc_ptr, next_tb);
-#else
-                    next_tb = tcg_qemu_tb_exec(tc_ptr);
 #endif
+                    next_tb = tcg_qemu_tb_exec(env, tc_ptr);
+#ifdef VBOX
                     if (next_tb)
                         Log5(("REM: next_tb=%p %04x:%08RGv\n", next_tb, env->segs[R_CS].selector, (RTGCPTR)env->eip));
-#ifdef VBOX
                     RAWEx_ProfileStop(env, STATS_QEMU_RUN_EMULATED_CODE);
 #endif
                     if ((next_tb & 3) == 2) {
@@ -809,11 +735,11 @@ int cpu_exec(CPUState *env1)
                         } else {
                             if (insns_left > 0) {
                                 /* Execute remaining instructions.  */
-                                cpu_exec_nocache(insns_left, tb);
+                                cpu_exec_nocache(env, insns_left, tb);
                             }
                             env->exception_index = EXCP_INTERRUPT;
                             next_tb = 0;
-                            cpu_loop_exit();
+                            cpu_loop_exit(env);
                         }
                     }
                 }
@@ -821,6 +747,10 @@ int cpu_exec(CPUState *env1)
                 /* reset soft MMU for next block (it can currently
                    only be set by a memory fault) */
             } /* for(;;) */
+        } else {
+            /* Reload env after longjmp - the compiler may have smashed all
+             * local variables as longjmp is marked 'noreturn'. */
+            env = cpu_single_env;
         }
 #ifdef VBOX_HIGH_RES_TIMERS_HACK
         /* NULL the current_tb here so cpu_interrupt() doesn't do anything
@@ -843,11 +773,14 @@ int cpu_exec(CPUState *env1)
 
 #if defined(TARGET_I386)
     /* restore flags in standard format */
-    env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
+    env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
+        | (DF & DF_MASK);
 #elif defined(TARGET_ARM)
     /* XXX: Save/restore host fpu exception state?.  */
+#elif defined(TARGET_UNICORE32)
 #elif defined(TARGET_SPARC)
 #elif defined(TARGET_PPC)
+#elif defined(TARGET_LM32)
 #elif defined(TARGET_M68K)
     cpu_m68k_flush_flags(env, env->cc_op);
     env->cc_op = CC_OP_FLAGS;
@@ -864,603 +797,9 @@ int cpu_exec(CPUState *env1)
 #error unsupported target CPU
 #endif
 
-    /* restore global registers */
-    barrier();
-    env = (void *) saved_env_reg;
-
 # ifndef VBOX /* we might be using elsewhere, we only have one. */
     /* fail safe : never use cpu_single_env outside cpu_exec() */
     cpu_single_env = NULL;
 # endif
     return ret;
 }
-
-/* must only be called from the generated code as an exception can be
-   generated */
-void tb_invalidate_page_range(target_ulong start, target_ulong end)
-{
-    /* XXX: cannot enable it yet because it yields to MMU exception
-       where NIP != read address on PowerPC */
-#if 0
-    target_ulong phys_addr;
-    phys_addr = get_phys_addr_code(env, start);
-    tb_invalidate_phys_page_range(phys_addr, phys_addr + end - start, 0);
-#endif
-}
-
-#if defined(TARGET_I386) && defined(CONFIG_USER_ONLY)
-
-void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector)
-{
-    CPUX86State *saved_env;
-
-    saved_env = env;
-    env = s;
-    if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
-        selector &= 0xffff;
-        cpu_x86_load_seg_cache(env, seg_reg, selector,
-                               (selector << 4), 0xffff, 0);
-    } else {
-        helper_load_seg(seg_reg, selector);
-    }
-    env = saved_env;
-}
-
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32)
-{
-    CPUX86State *saved_env;
-
-    saved_env = env;
-    env = s;
-
-    helper_fsave(ptr, data32);
-
-    env = saved_env;
-}
-
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32)
-{
-    CPUX86State *saved_env;
-
-    saved_env = env;
-    env = s;
-
-    helper_frstor(ptr, data32);
-
-    env = saved_env;
-}
-
-#endif /* TARGET_I386 */
-
-#if !defined(CONFIG_SOFTMMU)
-
-#if defined(TARGET_I386)
-#define EXCEPTION_ACTION raise_exception_err(env->exception_index, env->error_code)
-#else
-#define EXCEPTION_ACTION cpu_loop_exit()
-#endif
-
-/* 'pc' is the host PC at which the exception was raised. 'address' is
-   the effective address of the memory exception. 'is_write' is 1 if a
-   write caused the exception and otherwise 0'. 'old_set' is the
-   signal set which should be restored */
-static inline int handle_cpu_signal(uintptr_t pc, uintptr_t address,
-                                    int is_write, sigset_t *old_set,
-                                    void *puc)
-{
-    TranslationBlock *tb;
-    int ret;
-
-    if (cpu_single_env)
-        env = cpu_single_env; /* XXX: find a correct solution for multithread */
-#if defined(DEBUG_SIGNAL)
-    qemu_printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
-                pc, address, is_write, *(unsigned long *)old_set);
-#endif
-    /* XXX: locking issue */
-    if (is_write && page_unprotect(h2g(address), pc, puc)) {
-        return 1;
-    }
-
-    /* see if it is an MMU fault */
-    ret = cpu_handle_mmu_fault(env, address, is_write, MMU_USER_IDX, 0);
-    if (ret < 0)
-        return 0; /* not an MMU fault */
-    if (ret == 0)
-        return 1; /* the MMU fault was handled without causing real CPU fault */
-    /* now we have a real cpu fault */
-    tb = tb_find_pc(pc);
-    if (tb) {
-        /* the PC is inside the translated code. It means that we have
-           a virtual CPU fault */
-        cpu_restore_state(tb, env, pc, puc);
-    }
-
-    /* we restore the process signal mask as the sigreturn should
-       do it (XXX: use sigsetjmp) */
-    sigprocmask(SIG_SETMASK, old_set, NULL);
-    EXCEPTION_ACTION;
-
-    /* never comes here */
-    return 1;
-}
-
-#if defined(__i386__)
-
-#if defined(__APPLE__)
-# include <sys/ucontext.h>
-
-# define EIP_sig(context)  (*((unsigned long*)&(context)->uc_mcontext->ss.eip))
-# define TRAP_sig(context)    ((context)->uc_mcontext->es.trapno)
-# define ERROR_sig(context)   ((context)->uc_mcontext->es.err)
-# define MASK_sig(context)    ((context)->uc_sigmask)
-#elif defined (__NetBSD__)
-# include <ucontext.h>
-
-# define EIP_sig(context)     ((context)->uc_mcontext.__gregs[_REG_EIP])
-# define TRAP_sig(context)    ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
-# define ERROR_sig(context)   ((context)->uc_mcontext.__gregs[_REG_ERR])
-# define MASK_sig(context)    ((context)->uc_sigmask)
-#elif defined (__FreeBSD__) || defined(__DragonFly__)
-# include <ucontext.h>
-
-# define EIP_sig(context)  (*((unsigned long*)&(context)->uc_mcontext.mc_eip))
-# define TRAP_sig(context)    ((context)->uc_mcontext.mc_trapno)
-# define ERROR_sig(context)   ((context)->uc_mcontext.mc_err)
-# define MASK_sig(context)    ((context)->uc_sigmask)
-#elif defined(__OpenBSD__)
-# define EIP_sig(context)     ((context)->sc_eip)
-# define TRAP_sig(context)    ((context)->sc_trapno)
-# define ERROR_sig(context)   ((context)->sc_err)
-# define MASK_sig(context)    ((context)->sc_mask)
-#else
-# define EIP_sig(context)     ((context)->uc_mcontext.gregs[REG_EIP])
-# define TRAP_sig(context)    ((context)->uc_mcontext.gregs[REG_TRAPNO])
-# define ERROR_sig(context)   ((context)->uc_mcontext.gregs[REG_ERR])
-# define MASK_sig(context)    ((context)->uc_sigmask)
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-#if defined(__NetBSD__) || defined (__FreeBSD__) || defined(__DragonFly__)
-    ucontext_t *uc = puc;
-#elif defined(__OpenBSD__)
-    struct sigcontext *uc = puc;
-#else
-    struct ucontext *uc = puc;
-#endif
-    uintptr_t pc;
-    int trapno;
-
-#ifndef REG_EIP
-/* for glibc 2.1 */
-#define REG_EIP    EIP
-#define REG_ERR    ERR
-#define REG_TRAPNO TRAPNO
-#endif
-    pc = EIP_sig(uc);
-    trapno = TRAP_sig(uc);
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             trapno == 0xe ?
-                             (ERROR_sig(uc) >> 1) & 1 : 0,
-                             &MASK_sig(uc), puc);
-}
-
-#elif defined(__x86_64__)
-
-#ifdef __NetBSD__
-#define PC_sig(context)       _UC_MACHINE_PC(context)
-#define TRAP_sig(context)     ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
-#define ERROR_sig(context)    ((context)->uc_mcontext.__gregs[_REG_ERR])
-#define MASK_sig(context)     ((context)->uc_sigmask)
-#elif defined(__OpenBSD__)
-#define PC_sig(context)       ((context)->sc_rip)
-#define TRAP_sig(context)     ((context)->sc_trapno)
-#define ERROR_sig(context)    ((context)->sc_err)
-#define MASK_sig(context)     ((context)->sc_mask)
-#elif defined (__FreeBSD__) || defined(__DragonFly__)
-#include <ucontext.h>
-
-#define PC_sig(context)  (*((unsigned long*)&(context)->uc_mcontext.mc_rip))
-#define TRAP_sig(context)     ((context)->uc_mcontext.mc_trapno)
-#define ERROR_sig(context)    ((context)->uc_mcontext.mc_err)
-#define MASK_sig(context)     ((context)->uc_sigmask)
-#else
-#define PC_sig(context)       ((context)->uc_mcontext.gregs[REG_RIP])
-#define TRAP_sig(context)     ((context)->uc_mcontext.gregs[REG_TRAPNO])
-#define ERROR_sig(context)    ((context)->uc_mcontext.gregs[REG_ERR])
-#define MASK_sig(context)     ((context)->uc_sigmask)
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    uintptr_t pc;
-#if defined(__NetBSD__) || defined (__FreeBSD__) || defined(__DragonFly__)
-    ucontext_t *uc = puc;
-#elif defined(__OpenBSD__)
-    struct sigcontext *uc = puc;
-#else
-    struct ucontext *uc = puc;
-#endif
-
-    pc = PC_sig(uc);
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             TRAP_sig(uc) == 0xe ?
-                             (ERROR_sig(uc) >> 1) & 1 : 0,
-                             &MASK_sig(uc), puc);
-}
-
-#elif defined(_ARCH_PPC)
-
-/***********************************************************************
- * signal context platform-specific definitions
- * From Wine
- */
-#ifdef linux
-/* All Registers access - only for local access */
-# define REG_sig(reg_name, context)		((context)->uc_mcontext.regs->reg_name)
-/* Gpr Registers access  */
-# define GPR_sig(reg_num, context)		REG_sig(gpr[reg_num], context)
-# define IAR_sig(context)			REG_sig(nip, context)	/* Program counter */
-# define MSR_sig(context)			REG_sig(msr, context)   /* Machine State Register (Supervisor) */
-# define CTR_sig(context)			REG_sig(ctr, context)   /* Count register */
-# define XER_sig(context)			REG_sig(xer, context) /* User's integer exception register */
-# define LR_sig(context)			REG_sig(link, context) /* Link register */
-# define CR_sig(context)			REG_sig(ccr, context) /* Condition register */
-/* Float Registers access  */
-# define FLOAT_sig(reg_num, context)		(((double*)((char*)((context)->uc_mcontext.regs+48*4)))[reg_num])
-# define FPSCR_sig(context)			(*(int*)((char*)((context)->uc_mcontext.regs+(48+32*2)*4)))
-/* Exception Registers access */
-# define DAR_sig(context)			REG_sig(dar, context)
-# define DSISR_sig(context)			REG_sig(dsisr, context)
-# define TRAP_sig(context)			REG_sig(trap, context)
-#endif /* linux */
-
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <ucontext.h>
-# define IAR_sig(context)		((context)->uc_mcontext.mc_srr0)
-# define MSR_sig(context)		((context)->uc_mcontext.mc_srr1)
-# define CTR_sig(context)		((context)->uc_mcontext.mc_ctr)
-# define XER_sig(context)		((context)->uc_mcontext.mc_xer)
-# define LR_sig(context)		((context)->uc_mcontext.mc_lr)
-# define CR_sig(context)		((context)->uc_mcontext.mc_cr)
-/* Exception Registers access */
-# define DAR_sig(context)		((context)->uc_mcontext.mc_dar)
-# define DSISR_sig(context)		((context)->uc_mcontext.mc_dsisr)
-# define TRAP_sig(context)		((context)->uc_mcontext.mc_exc)
-#endif /* __FreeBSD__|| __FreeBSD_kernel__ */
-
-#ifdef __APPLE__
-# include <sys/ucontext.h>
-typedef struct ucontext SIGCONTEXT;
-/* All Registers access - only for local access */
-# define REG_sig(reg_name, context)		((context)->uc_mcontext->ss.reg_name)
-# define FLOATREG_sig(reg_name, context)	((context)->uc_mcontext->fs.reg_name)
-# define EXCEPREG_sig(reg_name, context)	((context)->uc_mcontext->es.reg_name)
-# define VECREG_sig(reg_name, context)		((context)->uc_mcontext->vs.reg_name)
-/* Gpr Registers access */
-# define GPR_sig(reg_num, context)		REG_sig(r##reg_num, context)
-# define IAR_sig(context)			REG_sig(srr0, context)	/* Program counter */
-# define MSR_sig(context)			REG_sig(srr1, context)  /* Machine State Register (Supervisor) */
-# define CTR_sig(context)			REG_sig(ctr, context)
-# define XER_sig(context)			REG_sig(xer, context) /* Link register */
-# define LR_sig(context)			REG_sig(lr, context)  /* User's integer exception register */
-# define CR_sig(context)			REG_sig(cr, context)  /* Condition register */
-/* Float Registers access */
-# define FLOAT_sig(reg_num, context)		FLOATREG_sig(fpregs[reg_num], context)
-# define FPSCR_sig(context)			((double)FLOATREG_sig(fpscr, context))
-/* Exception Registers access */
-# define DAR_sig(context)			EXCEPREG_sig(dar, context)     /* Fault registers for coredump */
-# define DSISR_sig(context)			EXCEPREG_sig(dsisr, context)
-# define TRAP_sig(context)			EXCEPREG_sig(exception, context) /* number of powerpc exception taken */
-#endif /* __APPLE__ */
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-    ucontext_t *uc = puc;
-#else
-    struct ucontext *uc = puc;
-#endif
-    uintptr_t pc;
-    int is_write;
-
-    pc = IAR_sig(uc);
-    is_write = 0;
-#if 0
-    /* ppc 4xx case */
-    if (DSISR_sig(uc) & 0x00800000)
-        is_write = 1;
-#else
-    if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000))
-        is_write = 1;
-#endif
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write, &uc->uc_sigmask, puc);
-}
-
-#elif defined(__alpha__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                           void *puc)
-{
-    siginfo_t *info = pinfo;
-    struct ucontext *uc = puc;
-    uint32_t *pc = uc->uc_mcontext.sc_pc;
-    uint32_t insn = *pc;
-    int is_write = 0;
-
-    /* XXX: need kernel patch to get write flag faster */
-    switch (insn >> 26) {
-    case 0x0d: // stw
-    case 0x0e: // stb
-    case 0x0f: // stq_u
-    case 0x24: // stf
-    case 0x25: // stg
-    case 0x26: // sts
-    case 0x27: // stt
-    case 0x2c: // stl
-    case 0x2d: // stq
-    case 0x2e: // stl_c
-    case 0x2f: // stq_c
-	is_write = 1;
-    }
-
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write, &uc->uc_sigmask, puc);
-}
-#elif defined(__sparc__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    int is_write;
-    uint32_t insn;
-#if !defined(__arch64__) || defined(CONFIG_SOLARIS)
-    uint32_t *regs = (uint32_t *)(info + 1);
-    void *sigmask = (regs + 20);
-    /* XXX: is there a standard glibc define ? */
-    uintptr_t pc = regs[1];
-#else
-#ifdef __linux__
-    struct sigcontext *sc = puc;
-    uintptr_t pc = sc->sigc_regs.tpc;
-    void *sigmask = (void *)sc->sigc_mask;
-#elif defined(__OpenBSD__)
-    struct sigcontext *uc = puc;
-    uintptr_t pc = uc->sc_pc;
-    void *sigmask = (void *)(uintptr_t)uc->sc_mask;
-#endif
-#endif
-
-    /* XXX: need kernel patch to get write flag faster */
-    is_write = 0;
-    insn = *(uint32_t *)pc;
-    if ((insn >> 30) == 3) {
-      switch((insn >> 19) & 0x3f) {
-      case 0x05: // stb
-      case 0x15: // stba
-      case 0x06: // sth
-      case 0x16: // stha
-      case 0x04: // st
-      case 0x14: // sta
-      case 0x07: // std
-      case 0x17: // stda
-      case 0x0e: // stx
-      case 0x1e: // stxa
-      case 0x24: // stf
-      case 0x34: // stfa
-      case 0x27: // stdf
-      case 0x37: // stdfa
-      case 0x26: // stqf
-      case 0x36: // stqfa
-      case 0x25: // stfsr
-      case 0x3c: // casa
-      case 0x3e: // casxa
-	is_write = 1;
-	break;
-      }
-    }
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write, sigmask, NULL);
-}
-
-#elif defined(__arm__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    struct ucontext *uc = puc;
-    uintptr_t pc;
-    int is_write;
-
-#if (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3))
-    pc = uc->uc_mcontext.gregs[R15];
-#else
-    pc = uc->uc_mcontext.arm_pc;
-#endif
-    /* XXX: compute is_write */
-    is_write = 0;
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write,
-                             &uc->uc_sigmask, puc);
-}
-
-#elif defined(__mc68000)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    struct ucontext *uc = puc;
-    uintptr_t pc;
-    int is_write;
-
-    pc = uc->uc_mcontext.gregs[16];
-    /* XXX: compute is_write */
-    is_write = 0;
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write,
-                             &uc->uc_sigmask, puc);
-}
-
-#elif defined(__ia64)
-
-#ifndef __ISR_VALID
-  /* This ought to be in <bits/siginfo.h>... */
-# define __ISR_VALID	1
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
-{
-    siginfo_t *info = pinfo;
-    struct ucontext *uc = puc;
-    uintptr_t ip;
-    int is_write = 0;
-
-    ip = uc->uc_mcontext.sc_ip;
-    switch (host_signum) {
-      case SIGILL:
-      case SIGFPE:
-      case SIGSEGV:
-      case SIGBUS:
-      case SIGTRAP:
-	  if (info->si_code && (info->si_segvflags & __ISR_VALID))
-	      /* ISR.W (write-access) is bit 33:  */
-	      is_write = (info->si_isr >> 33) & 1;
-	  break;
-
-      default:
-	  break;
-    }
-    return handle_cpu_signal(ip, (uintptr_t)info->si_addr,
-                             is_write,
-                             (sigset_t *)&uc->uc_sigmask, puc);
-}
-
-#elif defined(__s390__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    struct ucontext *uc = puc;
-    uintptr_t pc;
-    uint16_t *pinsn;
-    int is_write = 0;
-
-    pc = uc->uc_mcontext.psw.addr;
-
-    /* ??? On linux, the non-rt signal handler has 4 (!) arguments instead
-       of the normal 2 arguments.  The 3rd argument contains the "int_code"
-       from the hardware which does in fact contain the is_write value.
-       The rt signal handler, as far as I can tell, does not give this value
-       at all.  Not that we could get to it from here even if it were.  */
-    /* ??? This is not even close to complete, since it ignores all
-       of the read-modify-write instructions.  */
-    pinsn = (uint16_t *)pc;
-    switch (pinsn[0] >> 8) {
-    case 0x50: /* ST */
-    case 0x42: /* STC */
-    case 0x40: /* STH */
-        is_write = 1;
-        break;
-    case 0xc4: /* RIL format insns */
-        switch (pinsn[0] & 0xf) {
-        case 0xf: /* STRL */
-        case 0xb: /* STGRL */
-        case 0x7: /* STHRL */
-            is_write = 1;
-        }
-        break;
-    case 0xe3: /* RXY format insns */
-        switch (pinsn[2] & 0xff) {
-        case 0x50: /* STY */
-        case 0x24: /* STG */
-        case 0x72: /* STCY */
-        case 0x70: /* STHY */
-        case 0x8e: /* STPQ */
-        case 0x3f: /* STRVH */
-        case 0x3e: /* STRV */
-        case 0x2f: /* STRVG */
-            is_write = 1;
-        }
-        break;
-    }
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write, &uc->uc_sigmask, puc);
-}
-
-#elif defined(__mips__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    struct ucontext *uc = puc;
-    greg_t pc = uc->uc_mcontext.pc;
-    int is_write;
-
-    /* XXX: compute is_write */
-    is_write = 0;
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write, &uc->uc_sigmask, puc);
-}
-
-#elif defined(__hppa__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    struct siginfo *info = pinfo;
-    struct ucontext *uc = puc;
-    uintptr_t pc = uc->uc_mcontext.sc_iaoq[0];
-    uint32_t insn = *(uint32_t *)pc;
-    int is_write = 0;
-
-    /* XXX: need kernel patch to get write flag faster.  */
-    switch (insn >> 26) {
-    case 0x1a: /* STW */
-    case 0x19: /* STH */
-    case 0x18: /* STB */
-    case 0x1b: /* STWM */
-        is_write = 1;
-        break;
-
-    case 0x09: /* CSTWX, FSTWX, FSTWS */
-    case 0x0b: /* CSTDX, FSTDX, FSTDS */
-        /* Distinguish from coprocessor load ... */
-        is_write = (insn >> 9) & 1;
-        break;
-
-    case 0x03:
-        switch ((insn >> 6) & 15) {
-        case 0xa: /* STWS */
-        case 0x9: /* STHS */
-        case 0x8: /* STBS */
-        case 0xe: /* STWAS */
-        case 0xc: /* STBYS */
-            is_write = 1;
-        }
-        break;
-    }
-
-    return handle_cpu_signal(pc, (uintptr_t)info->si_addr,
-                             is_write, &uc->uc_sigmask, puc);
-}
-
-#else
-
-#error host CPU specific signal handler needed
-
-#endif
-
-#endif /* !defined(CONFIG_SOFTMMU) */
--- cutils.c	2013-12-18 11:11:37.000000000 -0500
+++ cutils.c	2014-01-14 15:21:31.000000000 -0500
@@ -23,6 +23,7 @@
  */
 #include "qemu-common.h"
 #include "host-utils.h"
+#include <math.h>
 
 #ifdef VBOX
 # include "osdep.h"
@@ -665,30 +666,50 @@ void qemu_iovec_add(QEMUIOVector *qiov, 
 }
 
 /*
- * Copies iovecs from src to the end dst until src is completely copied or the
- * total size of the copied iovec reaches size. The size of the last copied
- * iovec is changed in order to fit the specified total size if it isn't a
- * perfect fit already.
+ * Copies iovecs from src to the end of dst. It starts copying after skipping
+ * the given number of bytes in src and copies until src is completely copied
+ * or the total size of the copied iovec reaches size.The size of the last
+ * copied iovec is changed in order to fit the specified total size if it isn't
+ * a perfect fit already.
  */
-void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
+void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
+    size_t size)
 {
     int i;
     size_t done;
+    void *iov_base;
+    uint64_t iov_len;
 
     assert(dst->nalloc != -1);
 
     done = 0;
     for (i = 0; (i < src->niov) && (done != size); i++) {
-        if (done + src->iov[i].iov_len > size) {
-            qemu_iovec_add(dst, src->iov[i].iov_base, size - done);
+        if (skip >= src->iov[i].iov_len) {
+            /* Skip the whole iov */
+            skip -= src->iov[i].iov_len;
+            continue;
+        } else {
+            /* Skip only part (or nothing) of the iov */
+            iov_base = (uint8_t*) src->iov[i].iov_base + skip;
+            iov_len = src->iov[i].iov_len - skip;
+            skip = 0;
+        }
+
+        if (done + iov_len > size) {
+            qemu_iovec_add(dst, iov_base, size - done);
             break;
         } else {
-            qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len);
+            qemu_iovec_add(dst, iov_base, iov_len);
         }
-        done += src->iov[i].iov_len;
+        done += iov_len;
     }
 }
 
+void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
+{
+    qemu_iovec_copy(dst, src, 0, size);
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
@@ -731,6 +752,49 @@ void qemu_iovec_from_buffer(QEMUIOVector
     }
 }
 
+void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count)
+{
+    size_t n;
+    int i;
+
+    for (i = 0; i < qiov->niov && count; ++i) {
+        n = MIN(count, qiov->iov[i].iov_len);
+        memset(qiov->iov[i].iov_base, c, n);
+        count -= n;
+    }
+}
+
+void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
+                            size_t skip)
+{
+    int i;
+    size_t done;
+    void *iov_base;
+    uint64_t iov_len;
+
+    done = 0;
+    for (i = 0; (i < qiov->niov) && (done != count); i++) {
+        if (skip >= qiov->iov[i].iov_len) {
+            /* Skip the whole iov */
+            skip -= qiov->iov[i].iov_len;
+            continue;
+        } else {
+            /* Skip only part (or nothing) of the iov */
+            iov_base = (uint8_t*) qiov->iov[i].iov_base + skip;
+            iov_len = qiov->iov[i].iov_len - skip;
+            skip = 0;
+        }
+
+        if (done + iov_len > count) {
+            memset(iov_base, c, count - done);
+            break;
+        } else {
+            memset(iov_base, c, iov_len);
+        }
+        done += iov_len;
+    }
+}
+
 #ifndef _WIN32
 /* Sets a specific flag */
 int fcntl_setfl(int fd, int flag)
@@ -748,5 +812,99 @@ int fcntl_setfl(int fd, int flag)
 }
 #endif
 
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. Default without any postfix
+ * is MB. End pointer will be returned in *end, if not NULL. A valid
+ * value must be terminated by whitespace, ',' or '\0'. Return -1 on
+ * error.
+ */
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix)
+{
+    int64_t retval = -1;
+    char *endptr;
+    unsigned char c, d;
+    int mul_required = 0;
+    double val, mul, integral, fraction;
+
+    errno = 0;
+    val = strtod(nptr, &endptr);
+    if (isnan(val) || endptr == nptr || errno != 0) {
+        goto fail;
+    }
+    fraction = modf(val, &integral);
+    if (fraction != 0) {
+        mul_required = 1;
+    }
+    /*
+     * Any whitespace character is fine for terminating the number,
+     * in addition we accept ',' to handle strings where the size is
+     * part of a multi token argument.
+     */
+    c = *endptr;
+    d = c;
+    if (qemu_isspace(c) || c == '\0' || c == ',') {
+        c = 0;
+        if (default_suffix) {
+            d = default_suffix;
+        } else {
+            d = c;
+        }
+    }
+    switch (qemu_toupper(d)) {
+    case STRTOSZ_DEFSUFFIX_B:
+        mul = 1;
+        if (mul_required) {
+            goto fail;
+        }
+        break;
+    case STRTOSZ_DEFSUFFIX_KB:
+        mul = 1 << 10;
+        break;
+    case 0:
+        if (mul_required) {
+            goto fail;
+        }
+    case STRTOSZ_DEFSUFFIX_MB:
+        mul = 1ULL << 20;
+        break;
+    case STRTOSZ_DEFSUFFIX_GB:
+        mul = 1ULL << 30;
+        break;
+    case STRTOSZ_DEFSUFFIX_TB:
+        mul = 1ULL << 40;
+        break;
+    default:
+        goto fail;
+    }
+    /*
+     * If not terminated by whitespace, ',', or \0, increment endptr
+     * to point to next character, then check that we are terminated
+     * by an appropriate separating character, ie. whitespace, ',', or
+     * \0. If not, we are seeing trailing garbage, thus fail.
+     */
+    if (c != 0) {
+        endptr++;
+        if (!qemu_isspace(*endptr) && *endptr != ',' && *endptr != 0) {
+            goto fail;
+        }
+    }
+    if ((val * mul >= INT64_MAX) || val < 0) {
+        goto fail;
+    }
+    retval = val * mul;
+
+fail:
+    if (end) {
+        *end = endptr;
+    }
+
+    return retval;
+}
+
+int64_t strtosz(const char *nptr, char **end)
+{
+    return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
+}
 #endif /* !VBOX */
 
--- disas.h	2013-12-18 11:11:37.000000000 -0500
+++ disas.h	2014-01-14 15:21:31.000000000 -0500
@@ -9,11 +9,8 @@ void disas(FILE *out, void *code, unsign
 void target_disas(FILE *out, target_ulong code, target_ulong size, int flags);
 
 #ifndef VBOX
-/* The usual mess... FIXME: Remove this condition once dyngen-exec.h is gone */
-#ifndef __DYNGEN_EXEC_H__
 void monitor_disas(Monitor *mon, CPUState *env,
                    target_ulong pc, int nb_insn, int is_physical, int flags);
-#endif
 #endif /*!VBOX*/
 
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
--- dyngen-exec.h	2013-12-18 11:11:37.000000000 -0500
+++ dyngen-exec.h	2014-01-14 15:21:31.000000000 -0500
@@ -30,20 +30,8 @@
 #define __DYNGEN_EXEC_H__
 
 #ifndef VBOX
-/* prevent Solaris from trying to typedef FILE in gcc's
-   include/floatingpoint.h which will conflict with the
-   definition down below */
-#ifdef __sun__
-#define _FILEDEFED
-#endif
-#endif /* !VBOX */
 
-/* NOTE: standard headers should be used with special care at this
-   point because host CPU registers are used as global variables. Some
-   host headers do not allow that. */
-#include <stddef.h>
-#ifndef VBOX
-#include <stdint.h>
+#include "qemu-common.h"
 
 #ifdef __OpenBSD__
 #include <sys/types.h>
@@ -52,15 +40,6 @@
 /* XXX: This may be wrong for 64-bit ILP32 hosts.  */
 typedef void * host_reg_t;
 
-#ifdef CONFIG_BSD
-typedef struct __sFILE FILE;
-#else
-typedef struct FILE FILE;
-#endif
-extern int fprintf(FILE *, const char *, ...);
-extern int fputs(const char *, FILE *);
-extern int printf(const char *, ...);
-
 #else  /* VBOX */
 
 /* XXX: This may be wrong for 64-bit ILP32 hosts.  */
--- elf.h	2013-12-18 11:11:37.000000000 -0500
+++ elf.h	2014-01-14 15:21:31.000000000 -0500
@@ -104,6 +104,9 @@ typedef int64_t  Elf64_Sxword;
 
 #define EM_H8_300H      47      /* Hitachi H8/300H */
 #define EM_H8S          48      /* Hitachi H8S     */
+#define EM_LATTICEMICO32 138    /* LatticeMico32 */
+
+#define EM_UNICORE32    110     /* UniCore32 */
 
 /*
  * This is an interim value that we will use until the committee comes
@@ -147,8 +150,37 @@ typedef int64_t  Elf64_Sxword;
 #define DT_DEBUG	21
 #define DT_TEXTREL	22
 #define DT_JMPREL	23
+#define DT_BINDNOW	24
+#define DT_INIT_ARRAY	25
+#define DT_FINI_ARRAY	26
+#define DT_INIT_ARRAYSZ	27
+#define DT_FINI_ARRAYSZ	28
+#define DT_RUNPATH	29
+#define DT_FLAGS	30
+#define DT_LOOS		0x6000000d
+#define DT_HIOS		0x6ffff000
 #define DT_LOPROC	0x70000000
 #define DT_HIPROC	0x7fffffff
+
+/* DT_ entries which fall between DT_VALRNGLO and DT_VALRNDHI use
+   the d_val field of the Elf*_Dyn structure.  I.e. they contain scalars.  */
+#define DT_VALRNGLO	0x6ffffd00
+#define DT_VALRNGHI	0x6ffffdff
+
+/* DT_ entries which fall between DT_ADDRRNGLO and DT_ADDRRNGHI use
+   the d_ptr field of the Elf*_Dyn structure.  I.e. they contain pointers.  */
+#define DT_ADDRRNGLO	0x6ffffe00
+#define DT_ADDRRNGHI	0x6ffffeff
+
+#define	DT_VERSYM	0x6ffffff0
+#define DT_RELACOUNT	0x6ffffff9
+#define DT_RELCOUNT	0x6ffffffa
+#define DT_FLAGS_1	0x6ffffffb
+#define DT_VERDEF	0x6ffffffc
+#define DT_VERDEFNUM	0x6ffffffd
+#define DT_VERNEED	0x6ffffffe
+#define DT_VERNEEDNUM	0x6fffffff
+
 #define DT_MIPS_RLD_VERSION	0x70000001
 #define DT_MIPS_TIME_STAMP	0x70000002
 #define DT_MIPS_ICHECKSUM	0x70000003
@@ -207,6 +239,21 @@ typedef int64_t  Elf64_Sxword;
 #define AT_PLATFORM 15  /* string identifying CPU for optimizations */
 #define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
 #define AT_CLKTCK 17	/* frequency at which times() increments */
+#define AT_FPUCW  18	/* info about fpu initialization by kernel */
+#define AT_DCACHEBSIZE	19	/* data cache block size */
+#define AT_ICACHEBSIZE	20	/* instruction cache block size */
+#define AT_UCACHEBSIZE	21	/* unified cache block size */
+#define AT_IGNOREPPC	22	/* ppc only; entry should be ignored */
+#define AT_SECURE	23	/* boolean, was exec suid-like? */
+#define AT_BASE_PLATFORM 24	/* string identifying real platforms */
+#define AT_RANDOM	25	/* address of 16 random bytes */
+#define AT_EXECFN	31	/* filename of the executable */
+#define AT_SYSINFO	32	/* address of kernel entry point */
+#define AT_SYSINFO_EHDR	33	/* address of kernel vdso */
+#define AT_L1I_CACHESHAPE 34	/* shapes of the caches: */
+#define AT_L1D_CACHESHAPE 35	/*   bits 0-3: cache associativity.  */
+#define AT_L2_CACHESHAPE  36	/*   bits 4-7: log2 of line size.  */
+#define AT_L3_CACHESHAPE  37	/*   val&~255: cache size.  */
 
 typedef struct dynamic{
   Elf32_Sword d_tag;
@@ -1147,6 +1194,25 @@ typedef struct elf64_note {
   Elf64_Word n_type;	/* Content type */
 } Elf64_Nhdr;
 
+
+/* This data structure represents a PT_LOAD segment.  */
+struct elf32_fdpic_loadseg {
+  /* Core address to which the segment is mapped.  */
+  Elf32_Addr addr;
+  /* VMA recorded in the program header.  */
+  Elf32_Addr p_vaddr;
+  /* Size of this segment in memory.  */
+  Elf32_Word p_memsz;
+};
+struct elf32_fdpic_loadmap {
+  /* Protocol version number, must be zero.  */
+  Elf32_Half version;
+  /* Number of segments in this map.  */
+  Elf32_Half nsegs;
+  /* The actual memory map.  */
+  struct elf32_fdpic_loadseg segs[/*nsegs*/];
+};
+
 #ifdef ELF_CLASS
 #if ELF_CLASS == ELFCLASS32
 
--- exec-all.h	2013-12-18 11:11:37.000000000 -0500
+++ exec-all.h	2014-01-15 13:48:58.000000000 -0500
@@ -62,10 +62,11 @@ typedef ram_addr_t tb_page_addr_t;
 #define DISAS_UPDATE  2 /* cpu state was modified dynamically */
 #define DISAS_TB_JUMP 3 /* only pc was modified statically */
 
+struct TranslationBlock;
 typedef struct TranslationBlock TranslationBlock;
 
 /* XXX: make safe guess about sizes */
-#define MAX_OP_PER_INSTR 96
+#define MAX_OP_PER_INSTR 208
 
 #if HOST_LONG_BITS == 32
 #define MAX_OPC_PARAM_PER_ARG 2
@@ -99,26 +100,24 @@ extern uint16_t gen_opc_icount[OPC_BUF_S
 
 void gen_intermediate_code(CPUState *env, struct TranslationBlock *tb);
 void gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb);
-void gen_pc_load(CPUState *env, struct TranslationBlock *tb,
-                 uintptr_t searched_pc, int pc_pos, void *puc);
+void restore_state_to_opc(CPUState *env, struct TranslationBlock *tb,
+                          int pc_pos);
 
 void cpu_gen_init(void);
 int cpu_gen_code(CPUState *env, struct TranslationBlock *tb,
                  int *gen_code_size_ptr);
 int cpu_restore_state(struct TranslationBlock *tb,
-                      CPUState *env, uintptr_t searched_pc,
-                      void *puc);
+                      CPUState *env, uintptr_t searched_pc);
 void cpu_resume_from_signal(CPUState *env1, void *puc);
 void cpu_io_recompile(CPUState *env, void *retaddr);
 TranslationBlock *tb_gen_code(CPUState *env,
                               target_ulong pc, target_ulong cs_base, int flags,
                               int cflags);
 void cpu_exec_init(CPUState *env);
-void QEMU_NORETURN cpu_loop_exit(void);
+void QEMU_NORETURN cpu_loop_exit(CPUState *env1);
 int page_unprotect(target_ulong address, uintptr_t pc, void *puc);
 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
                                    int is_cpu_write_access);
-void tb_invalidate_page_range(target_ulong start, target_ulong end);
 void tlb_flush_page(CPUState *env, target_ulong addr);
 void tlb_flush(CPUState *env, int flush_global);
 #if !defined(CONFIG_USER_ONLY)
@@ -206,10 +205,9 @@ static inline unsigned int tb_jmp_cache_
 
 static inline unsigned int tb_phys_hash_func(tb_page_addr_t pc)
 {
-    return pc & (CODE_GEN_PHYS_HASH_SIZE - 1);
+    return (pc >> 2) & (CODE_GEN_PHYS_HASH_SIZE - 1);
 }
 
-TranslationBlock *tb_alloc(target_ulong pc);
 void tb_free(TranslationBlock *tb);
 void tb_flush(CPUState *env);
 void tb_link_page(TranslationBlock *tb,
@@ -221,7 +219,7 @@ extern TranslationBlock *tb_phys_hash[CO
 #if defined(USE_DIRECT_JUMP)
 
 #if defined(_ARCH_PPC)
-extern void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
+void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
 #define tb_set_jmp_target1 ppc_tb_set_jmp_target
 #elif defined(__i386__) || defined(__x86_64__)
 static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
@@ -233,9 +231,7 @@ static inline void tb_set_jmp_target1(ui
 #elif defined(__arm__)
 static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
 {
-#if QEMU_GNUC_PREREQ(4, 1)
-    void __clear_cache(char *beg, char *end);
-#else
+#if !QEMU_GNUC_PREREQ(4, 1)
     register unsigned long _beg __asm ("a1");
     register unsigned long _end __asm ("a2");
     register unsigned long _flg __asm ("a3");
@@ -247,7 +243,7 @@ static inline void tb_set_jmp_target1(ui
         | (((addr - (jmp_addr + 8)) >> 2) & 0xffffff);
 
 #if QEMU_GNUC_PREREQ(4, 1)
-    __clear_cache((char *) jmp_addr, (char *) jmp_addr + 4);
+    __builtin___clear_cache((char *) jmp_addr, (char *) jmp_addr + 4);
 #else
     /* flush icache */
     _beg = jmp_addr;
@@ -365,8 +361,8 @@ static inline tb_page_addr_t get_page_ad
         return remR3PhysGetPhysicalAddressCode(env1, addr,
                                                &env1->tlb_table[mmu_idx][page_index],
                                                env1->iotlb[mmu_idx][page_index]);
-# elif defined(TARGET_SPARC) || defined(TARGET_MIPS)
-        do_unassigned_access(addr, 0, 1, 0, 4);
+# elif defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
+        cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
 #else
         cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
 #endif
@@ -379,7 +375,7 @@ static inline tb_page_addr_t get_page_ad
 # else
     p = (void *)(uintptr_t)addr
         + env1->tlb_table[mmu_idx][page_index].addend;
-    return qemu_ram_addr_from_host(p);
+    return qemu_ram_addr_from_host_nofail(p);
 # endif
 }
 #endif
@@ -396,5 +392,4 @@ extern int singlestep;
 extern volatile sig_atomic_t exit_request;
 #endif /*!VBOX*/
 
-
 #endif
--- exec.c	2013-12-18 11:11:37.000000000 -0500
+++ exec.c	2014-01-15 15:45:59.000000000 -0500
@@ -34,13 +34,6 @@
 #include <sys/types.h>
 #include <sys/mman.h>
 #endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-#include <inttypes.h>
 #else /* VBOX */
 # include <stdlib.h>
 # include <stdio.h>
@@ -50,9 +43,8 @@
 # include <VBox/vmm/pgm.h> /* PGM_DYNAMIC_RAM_ALLOC */
 #endif /* VBOX */
 
-#include "cpu.h"
-#include "exec-all.h"
 #include "qemu-common.h"
+#include "cpu.h"
 #include "tcg.h"
 #ifndef VBOX
 #include "hw/hw.h"
@@ -60,10 +52,12 @@
 #endif /* !VBOX */
 #include "osdep.h"
 #include "kvm.h"
+#ifndef VBOX
+#include "hw/xen.h"
+#endif
 #include "qemu-timer.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
-#include <signal.h>
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <sys/param.h>
 #if __FreeBSD_version >= 700104
@@ -79,6 +73,11 @@
 #include <libutil.h>
 #endif
 #endif
+#else /* !CONFIG_USER_ONLY */
+#ifndef VBOX
+#include "xen-mapcache.h"
+#include "trace.h"
+#endif
 #endif
 
 //#define DEBUG_TB_INVALIDATE
@@ -147,10 +146,12 @@ CPUState *first_cpu;
 /* current CPU in the current thread. It is only valid inside
    cpu_exec() */
 CPUState *cpu_single_env;
+#ifndef VBOX
 /* 0 = Do not count executed instructions.
    1 = Precise instruction counting.
    2 = Adaptive rate instruction counting.  */
 int use_icount = 0;
+#endif
 /* Current instruction counter.  While executing translated code this may
    include some instructions that have not yet been executed.  */
 int64_t qemu_icount;
@@ -603,7 +604,8 @@ static void code_gen_alloc(uintptr_t tb_
             exit(1);
         }
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__)
     {
         int flags;
         void *addr = NULL;
@@ -616,6 +618,13 @@ static void code_gen_alloc(uintptr_t tb_
         /* Cannot map more than that */
         if (code_gen_buffer_size > (800 * 1024 * 1024))
             code_gen_buffer_size = (800 * 1024 * 1024);
+#elif defined(__sparc_v9__)
+        // Map the buffer below 2G, so we can use direct calls and branches
+        flags |= MAP_FIXED;
+        addr = (void *) 0x60000000UL;
+        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
+            code_gen_buffer_size = (512 * 1024 * 1024);
+        }
 #endif
         code_gen_buffer = mmap(addr, code_gen_buffer_size,
                                PROT_WRITE | PROT_READ | PROT_EXEC,
@@ -637,7 +646,7 @@ static void code_gen_alloc(uintptr_t tb_
     map_exec(code_gen_prologue, _1K);
 #endif
     code_gen_buffer_max_size = code_gen_buffer_size -
-        (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
+        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
     tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
 }
@@ -724,6 +733,11 @@ void cpu_exec_init(CPUState *env)
     env->numa_node = 0;
     QTAILQ_INIT(&env->breakpoints);
     QTAILQ_INIT(&env->watchpoints);
+#ifndef VBOX
+#ifndef CONFIG_USER_ONLY
+    env->thread_id = qemu_get_thread_id();
+#endif
+#endif
     *penv = env;
 #ifndef VBOX
 #if defined(CONFIG_USER_ONLY)
@@ -737,6 +751,32 @@ void cpu_exec_init(CPUState *env)
 #endif /* !VBOX */
 }
 
+/* Allocate a new translation block. Flush the translation buffer if
+   too many translation blocks or too much generated code. */
+static TranslationBlock *tb_alloc(target_ulong pc)
+{
+    TranslationBlock *tb;
+
+    if (nb_tbs >= code_gen_max_blocks ||
+        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
+        return NULL;
+    tb = &tbs[nb_tbs++];
+    tb->pc = pc;
+    tb->cflags = 0;
+    return tb;
+}
+
+void tb_free(TranslationBlock *tb)
+{
+    /* In practice this is mostly used for single use temporary TB
+       Ignore the hard cases and just back up if this TB happens to
+       be the last one generated.  */
+    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
+        code_gen_ptr = tb->tc_ptr;
+        nb_tbs--;
+    }
+}
+
 static inline void invalidate_page_bitmap(PageDesc *p)
 {
     if (p->code_bitmap) {
@@ -1182,8 +1222,7 @@ void tb_invalidate_phys_page_range(tb_pa
                 restore the CPU state */
 
                 current_tb_modified = 1;
-                cpu_restore_state(current_tb, env,
-                                  env->mem_io_pc, NULL);
+                cpu_restore_state(current_tb, env, env->mem_io_pc);
                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                      &current_flags);
             }
@@ -1291,7 +1330,7 @@ static void tb_invalidate_phys_page(tb_p
                    restore the CPU state */
 
             current_tb_modified = 1;
-            cpu_restore_state(current_tb, env, pc, puc);
+            cpu_restore_state(current_tb, env, pc);
             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                  &current_flags);
         }
@@ -1318,12 +1357,16 @@ static inline void tb_alloc_page(Transla
                                  unsigned int n, tb_page_addr_t page_addr)
 {
     PageDesc *p;
-    TranslationBlock *last_first_tb;
+#ifndef CONFIG_USER_ONLY
+    bool page_already_protected;
+#endif
 
     tb->page_addr[n] = page_addr;
     p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
     tb->page_next[n] = p->first_tb;
-    last_first_tb = p->first_tb;
+#ifndef CONFIG_USER_ONLY
+    page_already_protected = p->first_tb != NULL;
+#endif
     p->first_tb = (TranslationBlock *)((intptr_t)tb | n);
     invalidate_page_bitmap(p);
 
@@ -1359,7 +1402,7 @@ static inline void tb_alloc_page(Transla
     /* if some code is already present, then the pages are already
        protected. So we handle the case where only the first TB is
        allocated in a physical page */
-    if (!last_first_tb) {
+    if (!page_already_protected) {
         tlb_protect_code(page_addr);
     }
 #endif
@@ -1367,32 +1410,6 @@ static inline void tb_alloc_page(Transla
 #endif /* TARGET_HAS_SMC */
 }
 
-/* Allocate a new translation block. Flush the translation buffer if
-   too many translation blocks or too much generated code. */
-TranslationBlock *tb_alloc(target_ulong pc)
-{
-    TranslationBlock *tb;
-
-    if (nb_tbs >= code_gen_max_blocks ||
-        (code_gen_ptr - code_gen_buffer) >= VBOX_ONLY((uintptr_t))code_gen_buffer_max_size)
-        return NULL;
-    tb = &tbs[nb_tbs++];
-    tb->pc = pc;
-    tb->cflags = 0;
-    return tb;
-}
-
-void tb_free(TranslationBlock *tb)
-{
-    /* In practice this is mostly used for single use temporary TB
-       Ignore the hard cases and just back up if this TB happens to
-       be the last one generated.  */
-    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
-        code_gen_ptr = tb->tc_ptr;
-        nb_tbs--;
-    }
-}
-
 /* add a new TB and link it to the physical page tables. phys_page2 is
    (-1) to indicate that only one page contains the TB. */
 void tb_link_page(TranslationBlock *tb,
@@ -1784,44 +1801,51 @@ static void cpu_unlink_tb(CPUState *env)
     spin_unlock(&interrupt_lock);
 }
 
+#ifndef CONFIG_USER_ONLY
 /* mask must never be zero, except for A20 change call */
-void cpu_interrupt(CPUState *env, int mask)
+static void tcg_handle_interrupt(CPUState *env, int mask)
 {
+#ifndef VBOX
     int old_mask;
 
     old_mask = env->interrupt_request;
-#ifndef VBOX
     env->interrupt_request |= mask;
-#else  /* VBOX */
-    VM_ASSERT_EMT(env->pVM);
-    ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request, mask);
-#endif /* VBOX */
 
-#ifndef VBOX
-#ifndef CONFIG_USER_ONLY
     /*
      * If called from iothread context, wake the target cpu in
      * case its halted.
      */
-    if (!qemu_cpu_self(env)) {
+    if (!qemu_cpu_is_self(env)) {
         qemu_cpu_kick(env);
         return;
     }
-#endif
-#endif /* !VBOX */
 
     if (use_icount) {
         env->icount_decr.u16.high = 0xffff;
-#ifndef CONFIG_USER_ONLY
         if (!can_do_io(env)
             && (mask & ~old_mask) != 0) {
             cpu_abort(env, "Raised interrupt while not in I/O function");
         }
-#endif
     } else {
         cpu_unlink_tb(env);
     }
+#else  /* VBOX */
+    VM_ASSERT_EMT(env->pVM);
+    ASMAtomicOrS32((int32_t volatile *)&env->interrupt_request, mask);
+    cpu_unlink_tb(env);
+#endif /* VBOX */
+}
+
+CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
+
+#else /* CONFIG_USER_ONLY */
+
+void cpu_interrupt(CPUState *env, int mask)
+{
+    env->interrupt_request |= mask;
+    cpu_unlink_tb(env);
 }
+#endif /* CONFIG_USER_ONLY */
 
 void cpu_reset_interrupt(CPUState *env, int mask)
 {
@@ -1880,17 +1904,18 @@ static QLIST_HEAD(memory_client_list, CP
     = QLIST_HEAD_INITIALIZER(memory_client_list);
 
 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
-				  ram_addr_t size,
-				  ram_addr_t phys_offset)
+                                  ram_addr_t size,
+                                  ram_addr_t phys_offset,
+                                  bool log_dirty)
 {
     CPUPhysMemoryClient *client;
     QLIST_FOREACH(client, &memory_client_list, list) {
-        client->set_memory(client, start_addr, size, phys_offset);
+        client->set_memory(client, start_addr, size, phys_offset, log_dirty);
     }
 }
 
 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
-					target_phys_addr_t end)
+                                        target_phys_addr_t end)
 {
     CPUPhysMemoryClient *client;
     QLIST_FOREACH(client, &memory_client_list, list) {
@@ -1912,8 +1937,21 @@ static int cpu_notify_migration_log(int 
     return 0;
 }
 
-static void phys_page_for_each_1(CPUPhysMemoryClient *client,
-                                 int level, void **lp)
+struct last_map {
+    target_phys_addr_t start_addr;
+    ram_addr_t size;
+    ram_addr_t phys_offset;
+};
+
+/* The l1_phys_map provides the upper P_L1_BITs of the guest physical
+ * address.  Each intermediate table provides the next L2_BITs of guest
+ * physical address space.  The number of levels vary based on host and
+ * guest configuration, making it efficient to build the final guest
+ * physical address by seeding the L1 offset and shifting and adding in
+ * each L2 offset as we recurse through them. */
+static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
+                                 void **lp, target_phys_addr_t addr,
+                                 struct last_map *map)
 {
     int i;
 
@@ -1922,16 +1960,32 @@ static void phys_page_for_each_1(CPUPhys
     }
     if (level == 0) {
         PhysPageDesc *pd = *lp;
+        addr <<= L2_BITS + TARGET_PAGE_BITS;
         for (i = 0; i < L2_SIZE; ++i) {
             if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
-                client->set_memory(client, pd[i].region_offset,
-                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
+                target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
+
+                if (map->size &&
+                    start_addr == map->start_addr + map->size &&
+                    pd[i].phys_offset == map->phys_offset + map->size) {
+
+                    map->size += TARGET_PAGE_SIZE;
+                    continue;
+                } else if (map->size) {
+                    client->set_memory(client, map->start_addr,
+                                       map->size, map->phys_offset, false);
+                }
+
+                map->start_addr = start_addr;
+                map->size = TARGET_PAGE_SIZE;
+                map->phys_offset = pd[i].phys_offset;
             }
         }
     } else {
         void **pp = *lp;
         for (i = 0; i < L2_SIZE; ++i) {
-            phys_page_for_each_1(client, level - 1, pp + i);
+            phys_page_for_each_1(client, level - 1, pp + i,
+                                 (addr << L2_BITS) | i, map);
         }
     }
 }
@@ -1939,9 +1993,15 @@ static void phys_page_for_each_1(CPUPhys
 static void phys_page_for_each(CPUPhysMemoryClient *client)
 {
     int i;
+    struct last_map map = { };
+
     for (i = 0; i < P_L1_SIZE; ++i) {
         phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
-                             l1_phys_map + 1);
+                             l1_phys_map + i, i, &map);
+    }
+    if (map.size) {
+        client->set_memory(client, map.start_addr, map.size, map.phys_offset,
+                           false);
     }
 }
 
@@ -1977,17 +2037,17 @@ int cpu_str_to_log_mask(const char *str)
         p1 = strchr(p, ',');
         if (!p1)
             p1 = p + strlen(p);
-	if(cmp1(p,p1-p,"all")) {
-		for(item = cpu_log_items; item->mask != 0; item++) {
-			mask |= item->mask;
-		}
-	} else {
-        for(item = cpu_log_items; item->mask != 0; item++) {
-            if (cmp1(p, p1 - p, item->name))
-                goto found;
+        if(cmp1(p,p1-p,"all")) {
+            for(item = cpu_log_items; item->mask != 0; item++) {
+                mask |= item->mask;
+            }
+        } else {
+            for(item = cpu_log_items; item->mask != 0; item++) {
+                if (cmp1(p, p1 - p, item->name))
+                    goto found;
+            }
+            return 0;
         }
-        return 0;
-	}
     found:
         mask |= item->mask;
         if (*p1 != ',')
@@ -2082,11 +2142,11 @@ static inline void tlb_flush_jmp_cache(C
        overlap the flushed page.  */
     i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
     memset (&env->tb_jmp_cache[i], 0,
-	    TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
 
     i = tb_jmp_cache_hash_page(addr);
     memset (&env->tb_jmp_cache[i], 0,
-	    TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
 #ifdef VBOX
 
     /* inform raw mode about TLB page flush */
@@ -2240,10 +2300,10 @@ void cpu_physical_memory_reset_dirty(ram
 #if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
     start1 = start;
 #elif !defined(VBOX)
-    start1 = (uintptr_t)qemu_get_ram_ptr(start);
-    /* Chek that we don't span multiple blocks - this breaks the
+    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
+    /* Check that we don't span multiple blocks - this breaks the
        address comparisons below.  */
-    if ((uintptr_t)qemu_get_ram_ptr(end - 1) - start1
+    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
             != (end - 1) - start) {
         abort();
     }
@@ -2291,6 +2351,42 @@ int cpu_physical_sync_dirty_bitmap(targe
 #endif /* VBOX */
 }
 
+int cpu_physical_log_start(target_phys_addr_t start_addr,
+                           ram_addr_t size)
+{
+#ifndef VBOX
+    CPUPhysMemoryClient *client;
+    QLIST_FOREACH(client, &memory_client_list, list) {
+        if (client->log_start) {
+            int r = client->log_start(client, start_addr, size);
+            if (r < 0) {
+                return r;
+            }
+        }
+    }
+#else  /* VBOX */
+    return 0;
+#endif /* VBOX */
+}
+
+int cpu_physical_log_stop(target_phys_addr_t start_addr,
+                          ram_addr_t size)
+{
+#ifndef VBOX
+    CPUPhysMemoryClient *client;
+    QLIST_FOREACH(client, &memory_client_list, list) {
+        if (client->log_stop) {
+            int r = client->log_stop(client, start_addr, size);
+            if (r < 0) {
+                return r;
+            }
+        }
+    }
+#else  /* VBOX */
+    return 0;
+#endif /* VBOX */
+}
+
 #if defined(VBOX) && !defined(REM_PHYS_ADDR_IN_TLB)
 DECLINLINE(void) tlb_update_dirty(CPUTLBEntry *tlb_entry, target_phys_addr_t phys_addend)
 #else
@@ -2308,7 +2404,7 @@ static inline void tlb_update_dirty(CPUT
 #elif !defined(VBOX)
         p = (void *)(uintptr_t)((tlb_entry->addr_write & TARGET_PAGE_MASK)
             + tlb_entry->addend);
-        ram_addr = qemu_ram_addr_from_host(p);
+        ram_addr = qemu_ram_addr_from_host_nofail(p);
 #else
         Assert(phys_addend != -1);
         ram_addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + phys_addend;
@@ -2407,8 +2503,9 @@ void tlb_set_page(CPUState *env, target_
         pd = p->phys_offset;
     }
 #if defined(DEBUG_TLB)
-    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x idx=%d size=" TARGET_FMT_lx " pd=0x%08lx\n",
-           vaddr, (int)paddr, prot, mmu_idx, size, (long)pd);
+    printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
+           " prot=%x idx=%d pd=0x%08lx\n",
+           vaddr, paddr, prot, mmu_idx, pd);
 #endif
 
     address = vaddr;
@@ -2853,10 +2950,11 @@ static subpage_t *subpage_init (target_p
    start_addr and region_offset are rounded down to a page boundary
    before calculating this offset.  This should not be a problem unless
    the low bits of start_addr and region_offset differ.  */
-void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
+void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
                                          ram_addr_t size,
                                          ram_addr_t phys_offset,
-                                         ram_addr_t region_offset)
+                                         ram_addr_t region_offset,
+                                         bool log_dirty)
 {
     target_phys_addr_t addr, end_addr;
     PhysPageDesc *p;
@@ -2865,7 +2963,8 @@ void cpu_register_physical_memory_offset
     subpage_t *subpage;
 
 #ifndef VBOX
-    cpu_notify_set_memory(start_addr, size, phys_offset);
+    assert(size);
+    cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
 #endif /* !VBOX */
 
     if (phys_offset == IO_MEM_UNASSIGNED) {
@@ -2874,7 +2973,9 @@ void cpu_register_physical_memory_offset
     region_offset &= TARGET_PAGE_MASK;
     size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
     end_addr = start_addr + (target_phys_addr_t)size;
-    for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
+
+    addr = start_addr;
+    do {
         p = phys_page_find(addr >> TARGET_PAGE_BITS);
         if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
             ram_addr_t orig_memory = p->phys_offset;
@@ -2926,7 +3027,8 @@ void cpu_register_physical_memory_offset
             }
         }
         region_offset += TARGET_PAGE_SIZE;
-    }
+        addr += TARGET_PAGE_SIZE;
+    } while (addr != end_addr);
 
     /* since each CPU stores ram addresses in its TLB cache, we must
        reset the modified entries */
@@ -2990,16 +3092,16 @@ static size_t gethugepagesize(const char
     int ret;
 
     do {
-	    ret = statfs(path, &fs);
+        ret = statfs(path, &fs);
     } while (ret != 0 && errno == EINTR);
 
     if (ret != 0) {
-	    perror(path);
-	    return 0;
+        perror(path);
+        return 0;
     }
 
     if (fs.f_type != HUGETLBFS_MAGIC)
-	    fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
 
     return (size_t)fs.f_bsize;
 }
@@ -3018,7 +3120,7 @@ static void *file_ram_alloc(RAMBlock *bl
 
     hpagesize = gethugepagesize(path);
     if (!hpagesize) {
-	return NULL;
+        return NULL;
     }
 
     if (memory < hpagesize) {
@@ -3031,14 +3133,14 @@ static void *file_ram_alloc(RAMBlock *bl
     }
 
     if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
-	return NULL;
+        return NULL;
     }
 
     fd = mkstemp(filename);
     if (fd < 0) {
-	perror("unable to create backing store for hugepages");
-	free(filename);
-	return NULL;
+        perror("unable to create backing store for hugepages");
+        free(filename);
+        return NULL;
     }
     unlink(filename);
     free(filename);
@@ -3052,7 +3154,7 @@ static void *file_ram_alloc(RAMBlock *bl
      * mmap will fail.
      */
     if (ftruncate(fd, memory))
-	perror("ftruncate");
+        perror("ftruncate");
 
 #ifdef MAP_POPULATE
     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
@@ -3065,9 +3167,9 @@ static void *file_ram_alloc(RAMBlock *bl
     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 #endif
     if (area == MAP_FAILED) {
-	perror("file_ram_alloc: can't mmap RAM pages");
-	close(fd);
-	return (NULL);
+        perror("file_ram_alloc: can't mmap RAM pages");
+        close(fd);
+        return (NULL);
     }
     block->fd = fd;
     return area;
@@ -3112,7 +3214,7 @@ static ram_addr_t last_ram_offset(void)
 }
 
 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
-                        ram_addr_t size, void *host)
+                                   ram_addr_t size, void *host)
 {
     RAMBlock *new_block, *block;
 
@@ -3136,9 +3238,46 @@ ram_addr_t qemu_ram_alloc_from_ptr(Devic
         }
     }
 
-    new_block->host = host;
-
     new_block->offset = find_ram_offset(size);
+    if (host) {
+        new_block->host = host;
+        new_block->flags |= RAM_PREALLOC_MASK;
+    } else {
+        if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+            new_block->host = file_ram_alloc(new_block, size, mem_path);
+            if (!new_block->host) {
+                new_block->host = qemu_vmalloc(size);
+                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
+            }
+#else
+            fprintf(stderr, "-mem-path option unsupported\n");
+            exit(1);
+#endif
+        } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+            /* S390 KVM requires the topmost vma of the RAM to be smaller than
+               an system defined value, which is at least 256GB. Larger systems
+               have larger values. We put the guest between the end of data
+               segment (system break) and this value. We use 32GB as a base to
+               have enough room for the system break to grow. */
+            new_block->host = mmap((void*)0x800000000, size,
+                                   PROT_EXEC|PROT_READ|PROT_WRITE,
+                                   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+            if (new_block->host == MAP_FAILED) {
+                fprintf(stderr, "Allocating RAM failed\n");
+                abort();
+            }
+#else
+            if (xen_enabled()) {
+                xen_ram_alloc(new_block->offset, size);
+            } else {
+                new_block->host = qemu_vmalloc(size);
+            }
+#endif
+            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
+        }
+    }
     new_block->length = size;
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
@@ -3156,68 +3295,20 @@ ram_addr_t qemu_ram_alloc_from_ptr(Devic
 
 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
 {
-    RAMBlock *new_block, *block;
-
-    size = TARGET_PAGE_ALIGN(size);
-    new_block = qemu_mallocz(sizeof(*new_block));
+    return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
+}
 
-    if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
-        char *id = dev->parent_bus->info->get_dev_path(dev);
-        if (id) {
-            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
-            qemu_free(id);
-        }
-    }
-    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+void qemu_ram_free_from_ptr(ram_addr_t addr)
+{
+    RAMBlock *block;
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
-        if (!strcmp(block->idstr, new_block->idstr)) {
-            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
-                    new_block->idstr);
-            abort();
-        }
-    }
-
-    if (mem_path) {
-#if defined (__linux__) && !defined(TARGET_S390X)
-        new_block->host = file_ram_alloc(new_block, size, mem_path);
-        if (!new_block->host) {
-            new_block->host = qemu_vmalloc(size);
-#ifdef MADV_MERGEABLE
-            madvise(new_block->host, size, MADV_MERGEABLE);
-#endif
+        if (addr == block->offset) {
+            QLIST_REMOVE(block, next);
+            qemu_free(block);
+            return;
         }
-#else
-        fprintf(stderr, "-mem-path option unsupported\n");
-        exit(1);
-#endif
-    } else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-        /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
-        new_block->host = mmap((void*)0x1000000, size,
-                                PROT_EXEC|PROT_READ|PROT_WRITE,
-                                MAP_SHARED | MAP_ANONYMOUS, -1, 0);
-#else
-        new_block->host = qemu_vmalloc(size);
-#endif
-#ifdef MADV_MERGEABLE
-        madvise(new_block->host, size, MADV_MERGEABLE);
-#endif
     }
-    new_block->offset = find_ram_offset(size);
-    new_block->length = size;
-
-    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
-
-    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
-                                       last_ram_offset() >> TARGET_PAGE_BITS);
-    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
-           0xff, size >> TARGET_PAGE_BITS);
-
-    if (kvm_enabled())
-        kvm_setup_guest_memory(new_block->host, size);
-
-    return new_block->offset;
 }
 
 void qemu_ram_free(ram_addr_t addr)
@@ -3227,7 +3318,9 @@ void qemu_ram_free(ram_addr_t addr)
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
-            if (mem_path) {
+            if (block->flags & RAM_PREALLOC_MASK) {
+                ;
+            } else if (mem_path) {
 #if defined (__linux__) && !defined(TARGET_S390X)
                 if (block->fd) {
                     munmap(block->host, block->length);
@@ -3235,12 +3328,18 @@ void qemu_ram_free(ram_addr_t addr)
                 } else {
                     qemu_vfree(block->host);
                 }
+#else
+                abort();
 #endif
             } else {
 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
                 munmap(block->host, block->length);
 #else
-                qemu_vfree(block->host);
+                if (xen_enabled()) {
+                    xen_invalidate_map_cache_entry(block->host);
+                } else {
+                    qemu_vfree(block->host);
+                }
 #endif
             }
             qemu_free(block);
@@ -3250,6 +3349,66 @@ void qemu_ram_free(ram_addr_t addr)
 
 }
 
+#ifndef _WIN32
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
+{
+    RAMBlock *block;
+    ram_addr_t offset;
+    int flags;
+    void *area, *vaddr;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        offset = addr - block->offset;
+        if (offset < block->length) {
+            vaddr = block->host + offset;
+            if (block->flags & RAM_PREALLOC_MASK) {
+                ;
+            } else {
+                flags = MAP_FIXED;
+                munmap(vaddr, length);
+                if (mem_path) {
+#if defined(__linux__) && !defined(TARGET_S390X)
+                    if (block->fd) {
+#ifdef MAP_POPULATE
+                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
+                            MAP_PRIVATE;
+#else
+                        flags |= MAP_PRIVATE;
+#endif
+                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
+                                    flags, block->fd, offset);
+                    } else {
+                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
+                                    flags, -1, 0);
+                    }
+#else
+                    abort();
+#endif
+                } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+                    flags |= MAP_SHARED | MAP_ANONYMOUS;
+                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
+                                flags, -1, 0);
+#else
+                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
+                                flags, -1, 0);
+#endif
+                }
+                if (area != vaddr) {
+                    fprintf(stderr, "Could not remap addr: %lx@%lx\n",
+                            length, addr);
+                    exit(1);
+                }
+                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
+            }
+            return;
+        }
+    }
+}
+#endif /* !_WIN32 */
+
 /* Return a host pointer to ram allocated with qemu_ram_alloc.
    With the exception of the softmmu code in this file, this should
    only be used for local memory (e.g. video ram) that the device owns,
@@ -3264,8 +3423,23 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
-            QLIST_REMOVE(block, next);
-            QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+            /* Move this entry to to start of the list.  */
+            if (block != QLIST_FIRST(&ram_list.blocks)) {
+                QLIST_REMOVE(block, next);
+                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+            }
+            if (xen_enabled()) {
+                /* We need to check if the requested address is in the RAM
+                 * because we don't want to map the entire memory in QEMU.
+                 * In that case just map until the end of the page.
+                 */
+                if (block->offset == 0) {
+                    return xen_map_cache(addr, 0, 0);
+                } else if (block->host == NULL) {
+                    block->host =
+                        xen_map_cache(block->offset, block->length, 1);
+                }
+            }
             return block->host + (addr - block->offset);
         }
     }
@@ -3276,23 +3450,104 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
     return NULL;
 }
 
-/* Some of the softmmu routines need to translate from a host pointer
-   (typically a TLB entry) back to a ram offset.  */
-ram_addr_t qemu_ram_addr_from_host(void *ptr)
+/* Return a host pointer to ram allocated with qemu_ram_alloc.
+ * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
+ */
+void *qemu_safe_ram_ptr(ram_addr_t addr)
+{
+    RAMBlock *block;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (addr - block->offset < block->length) {
+            if (xen_enabled()) {
+                /* We need to check if the requested address is in the RAM
+                 * because we don't want to map the entire memory in QEMU.
+                 * In that case just map until the end of the page.
+                 */
+                if (block->offset == 0) {
+                    return xen_map_cache(addr, 0, 0);
+                } else if (block->host == NULL) {
+                    block->host =
+                        xen_map_cache(block->offset, block->length, 1);
+                }
+            }
+            return block->host + (addr - block->offset);
+        }
+    }
+
+    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+    abort();
+
+    return NULL;
+}
+
+/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
+ * but takes a size argument */
+void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
+{
+    if (*size == 0) {
+        return NULL;
+    }
+    if (xen_enabled()) {
+        return xen_map_cache(addr, *size, 1);
+    } else {
+        RAMBlock *block;
+
+        QLIST_FOREACH(block, &ram_list.blocks, next) {
+            if (addr - block->offset < block->length) {
+                if (addr - block->offset + *size > block->length)
+                    *size = block->length - addr + block->offset;
+                return block->host + (addr - block->offset);
+            }
+        }
+
+        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+        abort();
+    }
+}
+
+#ifndef VBOX
+void qemu_put_ram_ptr(void *addr)
+{
+    trace_qemu_put_ram_ptr(addr);
+}
+#endif
+
+int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
 {
     RAMBlock *block;
     uint8_t *host = ptr;
 
+    if (xen_enabled()) {
+        *ram_addr = xen_ram_addr_from_mapcache(ptr);
+        return 0;
+    }
+
     QLIST_FOREACH(block, &ram_list.blocks, next) {
+        /* This case append when the block is not mapped. */
+        if (block->host == NULL) {
+            continue;
+        }
         if (host - block->host < block->length) {
-            return block->offset + (host - block->host);
+            *ram_addr = block->offset + (host - block->host);
+            return 0;
         }
     }
 
-    fprintf(stderr, "Bad ram pointer %p\n", ptr);
-    abort();
+    return -1;
+}
 
-    return 0;
+/* Some of the softmmu routines need to translate from a host pointer
+   (typically a TLB entry) back to a ram offset.  */
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+{
+    ram_addr_t ram_addr;
+
+    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
+        fprintf(stderr, "Bad ram pointer %p\n", ptr);
+        abort();
+    }
+    return ram_addr;
 }
 
 #endif /* !VBOX */
@@ -3302,8 +3557,8 @@ static uint32_t unassigned_mem_readb(voi
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
-#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 1);
+#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
 #endif
     return 0;
 }
@@ -3313,8 +3568,8 @@ static uint32_t unassigned_mem_readw(voi
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
-#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 2);
+#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
 #endif
     return 0;
 }
@@ -3324,8 +3579,8 @@ static uint32_t unassigned_mem_readl(voi
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
 #endif
-#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 0, 0, 0, 4);
+#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
 #endif
     return 0;
 }
@@ -3335,8 +3590,8 @@ static void unassigned_mem_writeb(void *
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
-#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 1);
+#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
 #endif
 }
 
@@ -3345,8 +3600,8 @@ static void unassigned_mem_writew(void *
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
-#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 2);
+#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
 #endif
 }
 
@@ -3355,8 +3610,8 @@ static void unassigned_mem_writel(void *
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
 #endif
-#if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
-    do_unassigned_access(addr, 1, 0, 0, 4);
+#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
+    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
 #endif
 }
 
@@ -3485,7 +3740,7 @@ static void check_watchpoint(int offset,
                     cpu_abort(env, "check_watchpoint: could not find TB for "
                               "pc=%p", (void *)env->mem_io_pc);
                 }
-                cpu_restore_state(tb, env, env->mem_io_pc, NULL);
+                cpu_restore_state(tb, env, env->mem_io_pc);
                 tb_phys_invalidate(tb, -1);
                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                     env->exception_index = EXCP_DEBUG;
@@ -3642,6 +3897,8 @@ static int subpage_register (subpage_t *
     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
            mmio, start, end, idx, eidx, memory);
 #endif
+    if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
+        memory = IO_MEM_UNASSIGNED;
     memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     for (; idx <= eidx; idx++) {
         mmio->sub_io_index[idx] = memory;
@@ -3661,7 +3918,8 @@ static subpage_t *subpage_init (target_p
     mmio = qemu_mallocz(sizeof(subpage_t));
 
     mmio->base = base;
-    subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio);
+    subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
+                                            DEVICE_NATIVE_ENDIAN);
 #if defined(DEBUG_SUBPAGE)
     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
@@ -3685,6 +3943,106 @@ static int get_free_io_mem_idx(void)
     return -1;
 }
 
+/*
+ * Usually, devices operate in little endian mode. There are devices out
+ * there that operate in big endian too. Each device gets byte swapped
+ * mmio if plugged onto a CPU that does the other endianness.
+ *
+ * CPU          Device           swap?
+ *
+ * little       little           no
+ * little       big              yes
+ * big          little           yes
+ * big          big              no
+ */
+
+typedef struct SwapEndianContainer {
+    CPUReadMemoryFunc *read[3];
+    CPUWriteMemoryFunc *write[3];
+    void *opaque;
+} SwapEndianContainer;
+
+static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val;
+    SwapEndianContainer *c = opaque;
+    val = c->read[0](c->opaque, addr);
+    return val;
+}
+
+static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val;
+    SwapEndianContainer *c = opaque;
+    val = bswap16(c->read[1](c->opaque, addr));
+    return val;
+}
+
+static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val;
+    SwapEndianContainer *c = opaque;
+    val = bswap32(c->read[2](c->opaque, addr));
+    return val;
+}
+
+static CPUReadMemoryFunc * const swapendian_readfn[3]={
+    swapendian_mem_readb,
+    swapendian_mem_readw,
+    swapendian_mem_readl
+};
+
+static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    SwapEndianContainer *c = opaque;
+    c->write[0](c->opaque, addr, val);
+}
+
+static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    SwapEndianContainer *c = opaque;
+    c->write[1](c->opaque, addr, bswap16(val));
+}
+
+static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
+                                  uint32_t val)
+{
+    SwapEndianContainer *c = opaque;
+    c->write[2](c->opaque, addr, bswap32(val));
+}
+
+static CPUWriteMemoryFunc * const swapendian_writefn[3]={
+    swapendian_mem_writeb,
+    swapendian_mem_writew,
+    swapendian_mem_writel
+};
+
+static void swapendian_init(int io_index)
+{
+    SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
+    int i;
+
+    /* Swap mmio for big endian targets */
+    c->opaque = io_mem_opaque[io_index];
+    for (i = 0; i < 3; i++) {
+        c->read[i] = io_mem_read[io_index][i];
+        c->write[i] = io_mem_write[io_index][i];
+
+        io_mem_read[io_index][i] = swapendian_readfn[i];
+        io_mem_write[io_index][i] = swapendian_writefn[i];
+    }
+    io_mem_opaque[io_index] = c;
+}
+
+static void swapendian_del(int io_index)
+{
+    if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
+        qemu_free(io_mem_opaque[io_index]);
+    }
+}
+
 /* mem_read and mem_write are arrays of functions containing the
    function to access byte (index 0), word (index 1) and dword (index
    2). Functions can be omitted with a NULL function pointer.
@@ -3695,7 +4053,7 @@ static int get_free_io_mem_idx(void)
 static int cpu_register_io_memory_fixed(int io_index,
                                         CPUReadMemoryFunc * const *mem_read,
                                         CPUWriteMemoryFunc * const *mem_write,
-                                        void *opaque)
+                                        void *opaque, enum device_endian endian)
 {
     int i;
 
@@ -3719,14 +4077,30 @@ static int cpu_register_io_memory_fixed(
     }
     io_mem_opaque[io_index] = opaque;
 
+    switch (endian) {
+    case DEVICE_BIG_ENDIAN:
+#ifndef TARGET_WORDS_BIGENDIAN
+        swapendian_init(io_index);
+#endif
+        break;
+    case DEVICE_LITTLE_ENDIAN:
+#ifdef TARGET_WORDS_BIGENDIAN
+        swapendian_init(io_index);
+#endif
+        break;
+    case DEVICE_NATIVE_ENDIAN:
+    default:
+        break;
+    }
+
     return (io_index << IO_MEM_SHIFT);
 }
 
 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
                            CPUWriteMemoryFunc * const *mem_write,
-                           void *opaque)
+                           void *opaque, enum device_endian endian)
 {
-    return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
+    return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
 }
 
 void cpu_unregister_io_memory(int io_table_address)
@@ -3734,6 +4108,8 @@ void cpu_unregister_io_memory(int io_tab
     int i;
     int io_index = io_table_address >> IO_MEM_SHIFT;
 
+    swapendian_del(io_index);
+
     for (i=0;i < 3; i++) {
         io_mem_read[io_index][i] = unassigned_mem_read[i];
         io_mem_write[io_index][i] = unassigned_mem_write[i];
@@ -3746,14 +4122,21 @@ static void io_mem_init(void)
 {
     int i;
 
-    cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read, unassigned_mem_write, NULL);
-    cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read, unassigned_mem_write, NULL);
-    cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read, notdirty_mem_write, NULL);
+    cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
+                                 unassigned_mem_write, NULL,
+                                 DEVICE_NATIVE_ENDIAN);
+    cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
+                                 unassigned_mem_write, NULL,
+                                 DEVICE_NATIVE_ENDIAN);
+    cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
+                                 notdirty_mem_write, NULL,
+                                 DEVICE_NATIVE_ENDIAN);
     for (i=0; i<5; i++)
         io_mem_used[i] = 1;
 
     io_mem_watch = cpu_register_io_memory(watch_mem_read,
-                                          watch_mem_write, NULL);
+                                          watch_mem_write, NULL,
+                                          DEVICE_NATIVE_ENDIAN);
 }
 
 #endif /* !defined(CONFIG_USER_ONLY) */
@@ -3875,6 +4258,9 @@ void cpu_physical_memory_rw(target_phys_
                     cpu_physical_memory_set_dirty_flags(
                         addr1, (0xff & ~CODE_DIRTY_FLAG));
                 }
+#ifndef VBOX
+                qemu_put_ram_ptr(ptr);
+#endif
             }
         } else {
             if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
@@ -3917,9 +4303,9 @@ void cpu_physical_memory_rw(target_phys_
 #ifdef VBOX
                 remR3PhysRead((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK), buf, l); NOREF(ptr);
 #else
-                ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
-                    (addr & ~TARGET_PAGE_MASK);
-                memcpy(buf, ptr, l);
+                ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
+                memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
+                qemu_put_ram_ptr(ptr);
 #endif
             }
         }
@@ -3963,6 +4349,7 @@ void cpu_physical_memory_write_rom(targe
             /* ROM/RAM case */
             ptr = qemu_get_ram_ptr(addr1);
             memcpy(ptr, buf, l);
+            qemu_put_ram_ptr(ptr);
         }
         len -= l;
         buf += l;
@@ -4028,14 +4415,14 @@ void *cpu_physical_memory_map(target_phy
                               int is_write)
 {
     target_phys_addr_t len = *plen;
-    target_phys_addr_t done = 0;
+    target_phys_addr_t todo = 0;
     int l;
-    uint8_t *ret = NULL;
-    uint8_t *ptr;
     target_phys_addr_t page;
     ram_addr_t pd;
     PhysPageDesc *p;
-    ram_addr_t addr1;
+    ram_addr_t raddr = ULONG_MAX;
+    ram_addr_t rlen;
+    void *ret;
 
     while (len > 0) {
         page = addr & TARGET_PAGE_MASK;
@@ -4050,31 +4437,30 @@ void *cpu_physical_memory_map(target_phy
         }
 
         if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-            if (done || bounce.buffer) {
+            if (todo || bounce.buffer) {
                 break;
             }
             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
             bounce.addr = addr;
             bounce.len = l;
             if (!is_write) {
-                cpu_physical_memory_rw(addr, bounce.buffer, l, 0);
+                cpu_physical_memory_read(addr, bounce.buffer, l);
             }
-            ptr = bounce.buffer;
-        } else {
-            addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
-            ptr = qemu_get_ram_ptr(addr1);
+
+            *plen = l;
+            return bounce.buffer;
         }
-        if (!done) {
-            ret = ptr;
-        } else if (ret + done != ptr) {
-            break;
+        if (!todo) {
+            raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
         }
 
         len -= l;
         addr += l;
-        done += l;
+        todo += l;
     }
-    *plen = done;
+    rlen = todo;
+    ret = qemu_ram_ptr_length(raddr, &rlen);
+    *plen = rlen;
     return ret;
 }
 
@@ -4087,7 +4473,7 @@ void cpu_physical_memory_unmap(void *buf
 {
     if (buffer != bounce.buffer) {
         if (is_write) {
-            ram_addr_t addr1 = qemu_ram_addr_from_host(buffer);
+            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
             while (access_len) {
                 unsigned l;
                 l = TARGET_PAGE_SIZE;
@@ -4104,6 +4490,9 @@ void cpu_physical_memory_unmap(void *buf
                 access_len -= l;
             }
         }
+        if (xen_enabled()) {
+            xen_invalidate_map_cache_entry(buffer);
+        }
         return;
     }
     if (is_write) {
@@ -4117,7 +4506,8 @@ void cpu_physical_memory_unmap(void *buf
 #endif /* !VBOX */
 
 /* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
+static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
+                                         enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4139,12 +4529,31 @@ uint32_t ldl_phys(target_phys_addr_t add
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
         val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap32(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap32(val);
+        }
+#endif
     } else {
         /* RAM case */
 #ifndef VBOX
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = ldl_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = ldl_be_p(ptr);
+            break;
+        default:
+            val = ldl_p(ptr);
+            break;
+        }
 #else
         val = remR3PhysReadU32((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr);
 #endif
@@ -4152,8 +4561,24 @@ uint32_t ldl_phys(target_phys_addr_t add
     return val;
 }
 
+uint32_t ldl_phys(target_phys_addr_t addr)
+{
+    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t ldl_le_phys(target_phys_addr_t addr)
+{
+    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t ldl_be_phys(target_phys_addr_t addr)
+{
+    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* warning: addr must be aligned */
-uint64_t ldq_phys(target_phys_addr_t addr)
+static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
+                                         enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4174,6 +4599,9 @@ uint64_t ldq_phys(target_phys_addr_t add
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+
+        /* XXX This is broken when device endian != cpu endian.
+               Fix and add "endian" variable check */
 #ifdef TARGET_WORDS_BIGENDIAN
         val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
         val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
@@ -4186,7 +4614,17 @@ uint64_t ldq_phys(target_phys_addr_t add
 #ifndef VBOX
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldq_p(ptr);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = ldq_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = ldq_be_p(ptr);
+            break;
+        default:
+            val = ldq_p(ptr);
+            break;
+        }
 #else
         val = remR3PhysReadU64((pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK)); NOREF(ptr);
 #endif
@@ -4194,6 +4632,21 @@ uint64_t ldq_phys(target_phys_addr_t add
     return val;
 }
 
+uint64_t ldq_phys(target_phys_addr_t addr)
+{
+    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint64_t ldq_le_phys(target_phys_addr_t addr)
+{
+    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint64_t ldq_be_phys(target_phys_addr_t addr)
+{
+    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 uint32_t ldub_phys(target_phys_addr_t addr)
 {
@@ -4203,7 +4656,8 @@ uint32_t ldub_phys(target_phys_addr_t ad
 }
 
 /* warning: addr must be aligned */
-uint32_t lduw_phys(target_phys_addr_t addr)
+static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
+                                          enum device_endian endian)
 {
     int io_index;
 #ifndef VBOX
@@ -4232,7 +4686,17 @@ uint32_t lduw_phys(target_phys_addr_t ad
 #ifndef VBOX
         ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = lduw_p(ptr);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = lduw_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = lduw_be_p(ptr);
+            break;
+        default:
+            val = lduw_p(ptr);
+            break;
+        }
 #else
         val = remR3PhysReadU16((pd & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK));
 #endif
@@ -4240,6 +4704,21 @@ uint32_t lduw_phys(target_phys_addr_t ad
     return val;
 }
 
+uint32_t lduw_phys(target_phys_addr_t addr)
+{
+    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t lduw_le_phys(target_phys_addr_t addr)
+{
+    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t lduw_be_phys(target_phys_addr_t addr)
+{
+    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
+}
+
 /* warning: addr must be aligned. The ram page is not masked as dirty
    and the code inside is not invalidated. It is useful if the dirty
    bits are used to track modified PTEs */
@@ -4322,7 +4801,8 @@ void stq_phys_notdirty(target_phys_addr_
 }
 
 /* warning: addr must be aligned */
-void stl_phys(target_phys_addr_t addr, uint32_t val)
+static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
+                                     enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4361,6 +4841,21 @@ void stl_phys(target_phys_addr_t addr, u
     }
 }
 
+void stl_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+}
+
+void stl_le_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+}
+
+void stl_be_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 void stb_phys(target_phys_addr_t addr, uint32_t val)
 {
@@ -4369,7 +4864,8 @@ void stb_phys(target_phys_addr_t addr, u
 }
 
 /* warning: addr must be aligned */
-void stw_phys(target_phys_addr_t addr, uint32_t val)
+static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
+                                     enum device_endian endian)
 {
     int io_index;
     uint8_t *ptr;
@@ -4387,6 +4883,15 @@ void stw_phys(target_phys_addr_t addr, u
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
         if (p)
             addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap16(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap16(val);
+        }
+#endif
         io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
     } else {
         ram_addr_t addr1;
@@ -4394,7 +4899,17 @@ void stw_phys(target_phys_addr_t addr, u
         /* RAM case */
 #ifndef VBOX
         ptr = qemu_get_ram_ptr(addr1);
-        stw_p(ptr, val);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stw_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stw_be_p(ptr, val);
+            break;
+        default:
+            stw_p(ptr, val);
+            break;
+        }
 #else
         remR3PhysWriteU16(addr1, val); NOREF(ptr);
 #endif
@@ -4408,11 +4923,38 @@ void stw_phys(target_phys_addr_t addr, u
     }
 }
 
+void stw_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
+}
+
+void stw_le_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
+}
+
+void stw_be_phys(target_phys_addr_t addr, uint32_t val)
+{
+    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
+}
+
 /* XXX: optimize */
 void stq_phys(target_phys_addr_t addr, uint64_t val)
 {
     val = tswap64(val);
-    cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
+    cpu_physical_memory_write(addr, &val, 8);
+}
+
+void stq_le_phys(target_phys_addr_t addr, uint64_t val)
+{
+    val = cpu_to_le64(val);
+    cpu_physical_memory_write(addr, &val, 8);
+}
+
+void stq_be_phys(target_phys_addr_t addr, uint64_t val)
+{
+    val = cpu_to_be64(val);
+    cpu_physical_memory_write(addr, &val, 8);
 }
 
 #ifndef VBOX
@@ -4462,7 +5004,7 @@ void cpu_io_recompile(CPUState *env, voi
                   retaddr);
     }
     n = env->icount_decr.u16.low + tb->icount;
-    cpu_restore_state(tb, env, (uintptr_t)retaddr, NULL);
+    cpu_restore_state(tb, env, (uintptr_t)retaddr);
     /* Calculate how many instructions had been executed before the fault
        occurred.  */
     n = n - env->icount_decr.u16.low;
@@ -4509,8 +5051,7 @@ void cpu_io_recompile(CPUState *env, voi
 #if !defined(CONFIG_USER_ONLY)
 
 #ifndef VBOX
-void dump_exec_info(FILE *f,
-                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
 {
     int i, target_code_size, max_target_code_size;
     int direct_jmp_count, direct_jmp2_count, cross_page;
@@ -4537,14 +5078,14 @@ void dump_exec_info(FILE *f,
     }
     /* XXX: avoid using doubles ? */
     cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %ld/%ld\n",
+    cpu_fprintf(f, "gen code size       %td/%ld\n",
                 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
     cpu_fprintf(f, "TB count            %d/%d\n",
                 nb_tbs, code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
                 nb_tbs ? target_code_size / nb_tbs : 0,
                 max_target_code_size);
-    cpu_fprintf(f, "TB avg host size    %d bytes (expansion ratio: %0.1f)\n",
+    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
                 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
                 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
     cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
--- fpu/softfloat-macros.h	2013-12-18 11:11:37.000000000 -0500
+++ fpu/softfloat-macros.h	2014-01-14 15:21:31.000000000 -0500
@@ -1,3 +1,8 @@
+/*
+ * QEMU float support macros
+ *
+ * Derived from SoftFloat.
+ */
 
 /*============================================================================
 
@@ -31,6 +36,17 @@ these four paragraphs for those parts of
 =============================================================================*/
 
 /*----------------------------------------------------------------------------
+| This macro tests for minimum version of the GNU C compiler.
+*----------------------------------------------------------------------------*/
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
+         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
+#endif
+
+
+/*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
 | bits are shifted off, they are ``jammed'' into the least significant bit of
 | the result by setting the least significant bit to 1.  The value of `count'
@@ -39,9 +55,9 @@ these four paragraphs for those parts of
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/
 
-INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+INLINE void shift32RightJamming( uint32_t a, int16 count, uint32_t *zPtr )
 {
-    bits32 z;
+    uint32_t z;
 
     if ( count == 0 ) {
         z = a;
@@ -65,9 +81,9 @@ INLINE void shift32RightJamming( bits32 
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/
 
-INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+INLINE void shift64RightJamming( uint64_t a, int16 count, uint64_t *zPtr )
 {
-    bits64 z;
+    uint64_t z;
 
     if ( count == 0 ) {
         z = a;
@@ -101,9 +117,9 @@ INLINE void shift64RightJamming( bits64 
 
 INLINE void
  shift64ExtraRightJamming(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z0, z1;
+    uint64_t z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -138,9 +154,9 @@ INLINE void
 
 INLINE void
  shift128Right(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z0, z1;
+    uint64_t z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -173,9 +189,9 @@ INLINE void
 
 INLINE void
  shift128RightJamming(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z0, z1;
+    uint64_t z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -224,16 +240,16 @@ INLINE void
 
 INLINE void
  shift128ExtraRightJamming(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
      int16 count,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -282,7 +298,7 @@ INLINE void
 
 INLINE void
  shortShift128Left(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
 
     *z1Ptr = a1<<count;
@@ -301,16 +317,16 @@ INLINE void
 
 INLINE void
  shortShift192Left(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
      int16 count,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 negCount;
 
     z2 = a2<<count;
@@ -336,9 +352,9 @@ INLINE void
 
 INLINE void
  add128(
-     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z1;
+    uint64_t z1;
 
     z1 = a1 + b1;
     *z1Ptr = z1;
@@ -356,18 +372,18 @@ INLINE void
 
 INLINE void
  add192(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
-     bits64 b0,
-     bits64 b1,
-     bits64 b2,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t b2,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 carry0, carry1;
 
     z2 = a2 + b2;
@@ -394,7 +410,7 @@ INLINE void
 
 INLINE void
  sub128(
-     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
 
     *z1Ptr = a1 - b1;
@@ -412,18 +428,18 @@ INLINE void
 
 INLINE void
  sub192(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
-     bits64 b0,
-     bits64 b1,
-     bits64 b2,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t b2,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 borrow0, borrow1;
 
     z2 = a2 - b2;
@@ -446,21 +462,21 @@ INLINE void
 | `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits32 aHigh, aLow, bHigh, bLow;
-    bits64 z0, zMiddleA, zMiddleB, z1;
+    uint32_t aHigh, aLow, bHigh, bLow;
+    uint64_t z0, zMiddleA, zMiddleB, z1;
 
     aLow = a;
     aHigh = a>>32;
     bLow = b;
     bHigh = b>>32;
-    z1 = ( (bits64) aLow ) * bLow;
-    zMiddleA = ( (bits64) aLow ) * bHigh;
-    zMiddleB = ( (bits64) aHigh ) * bLow;
-    z0 = ( (bits64) aHigh ) * bHigh;
+    z1 = ( (uint64_t) aLow ) * bLow;
+    zMiddleA = ( (uint64_t) aLow ) * bHigh;
+    zMiddleB = ( (uint64_t) aHigh ) * bLow;
+    z0 = ( (uint64_t) aHigh ) * bHigh;
     zMiddleA += zMiddleB;
-    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
     zMiddleA <<= 32;
     z1 += zMiddleA;
     z0 += ( z1 < zMiddleA );
@@ -478,15 +494,15 @@ INLINE void mul64To128( bits64 a, bits64
 
 INLINE void
  mul128By64To192(
-     bits64 a0,
-     bits64 a1,
-     bits64 b,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t b,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2, more1;
+    uint64_t z0, z1, z2, more1;
 
     mul64To128( a1, b, &z1, &z2 );
     mul64To128( a0, b, &z0, &more1 );
@@ -506,18 +522,18 @@ INLINE void
 
 INLINE void
  mul128To256(
-     bits64 a0,
-     bits64 a1,
-     bits64 b0,
-     bits64 b1,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr,
-     bits64 *z3Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr,
+     uint64_t *z3Ptr
  )
 {
-    bits64 z0, z1, z2, z3;
-    bits64 more1, more2;
+    uint64_t z0, z1, z2, z3;
+    uint64_t more1, more2;
 
     mul64To128( a1, b1, &z2, &z3 );
     mul64To128( a1, b0, &z1, &more2 );
@@ -543,18 +559,18 @@ INLINE void
 | unsigned integer is returned.
 *----------------------------------------------------------------------------*/
 
-static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
 {
-    bits64 b0, b1;
-    bits64 rem0, rem1, term0, term1;
-    bits64 z;
+    uint64_t b0, b1;
+    uint64_t rem0, rem1, term0, term1;
+    uint64_t z;
 
     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
     b0 = b>>32;
     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
     mul64To128( b, z, &term0, &term1 );
     sub128( a0, a1, term0, term1, &rem0, &rem1 );
-    while ( ( (sbits64) rem0 ) < 0 ) {
+    while ( ( (int64_t) rem0 ) < 0 ) {
         z -= LIT64( 0x100000000 );
         b1 = b<<32;
         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
@@ -575,18 +591,18 @@ static bits64 estimateDiv128To64( bits64
 | value.
 *----------------------------------------------------------------------------*/
 
-static bits32 estimateSqrt32( int16 aExp, bits32 a )
+static uint32_t estimateSqrt32( int16 aExp, uint32_t a )
 {
-    static const bits16 sqrtOddAdjustments[] = {
+    static const uint16_t sqrtOddAdjustments[] = {
         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
     };
-    static const bits16 sqrtEvenAdjustments[] = {
+    static const uint16_t sqrtEvenAdjustments[] = {
         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
     };
     int8 index;
-    bits32 z;
+    uint32_t z;
 
     index = ( a>>27 ) & 15;
     if ( aExp & 1 ) {
@@ -598,9 +614,9 @@ static bits32 estimateSqrt32( int16 aExp
         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
         z = a / z + z;
         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
-        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+        if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
     }
-    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+    return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
 
 }
 
@@ -609,8 +625,15 @@ static bits32 estimateSqrt32( int16 aExp
 | `a'.  If `a' is zero, 32 is returned.
 *----------------------------------------------------------------------------*/
 
-static int8 countLeadingZeros32( bits32 a )
+static int8 countLeadingZeros32( uint32_t a )
 {
+#if SOFTFLOAT_GNUC_PREREQ(3, 4)
+    if (a) {
+        return __builtin_clz(a);
+    } else {
+        return 32;
+    }
+#else
     static const int8 countLeadingZerosHigh[] = {
         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -642,7 +665,7 @@ static int8 countLeadingZeros32( bits32 
     }
     shiftCount += countLeadingZerosHigh[ a>>24 ];
     return shiftCount;
-
+#endif
 }
 
 /*----------------------------------------------------------------------------
@@ -650,12 +673,19 @@ static int8 countLeadingZeros32( bits32 
 | `a'.  If `a' is zero, 64 is returned.
 *----------------------------------------------------------------------------*/
 
-static int8 countLeadingZeros64( bits64 a )
+static int8 countLeadingZeros64( uint64_t a )
 {
+#if SOFTFLOAT_GNUC_PREREQ(3, 4)
+    if (a) {
+        return __builtin_clzll(a);
+    } else {
+        return 64;
+    }
+#else
     int8 shiftCount;
 
     shiftCount = 0;
-    if ( a < ( (bits64) 1 )<<32 ) {
+    if ( a < ( (uint64_t) 1 )<<32 ) {
         shiftCount += 32;
     }
     else {
@@ -663,7 +693,7 @@ static int8 countLeadingZeros64( bits64 
     }
     shiftCount += countLeadingZeros32( a );
     return shiftCount;
-
+#endif
 }
 
 /*----------------------------------------------------------------------------
@@ -672,7 +702,7 @@ static int8 countLeadingZeros64( bits64 
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 == b0 ) && ( a1 == b1 );
@@ -685,7 +715,7 @@ INLINE flag eq128( bits64 a0, bits64 a1,
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
@@ -698,7 +728,7 @@ INLINE flag le128( bits64 a0, bits64 a1,
 | returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
@@ -711,7 +741,7 @@ INLINE flag lt128( bits64 a0, bits64 a1,
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 != b0 ) || ( a1 != b1 );
--- fpu/softfloat-native.c	2013-12-18 11:11:37.000000000 -0500
+++ fpu/softfloat-native.c	1969-12-31 19:00:00.000000000 -0500
@@ -1,521 +0,0 @@
-/* Native implementation of soft float functions. Only a single status
-   context is supported */
-#include "softfloat.h"
-#include <math.h>
-#if defined(CONFIG_SOLARIS)
-#include <fenv.h>
-#endif
-
-void set_float_rounding_mode(int val STATUS_PARAM)
-{
-    STATUS(float_rounding_mode) = val;
-#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__)) || \
-    (defined(CONFIG_SOLARIS) && (CONFIG_SOLARIS_VERSION < 10 || CONFIG_SOLARIS_VERSION == 11)) /* VBOX adds sol 11 */
-    fpsetround(val);
-#else
-    fesetround(val);
-#endif
-}
-
-#ifdef FLOATX80
-void set_floatx80_rounding_precision(int val STATUS_PARAM)
-{
-    STATUS(floatx80_rounding_precision) = val;
-}
-#endif
-
-#if defined(CONFIG_BSD) || \
-    (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10)
-#define lrint(d)		((int32_t)rint(d))
-#define llrint(d)		((int64_t)rint(d))
-#define lrintf(f)		((int32_t)rint(f))
-#define llrintf(f)		((int64_t)rint(f))
-#define sqrtf(f)		((float)sqrt(f))
-#define remainderf(fa, fb)	((float)remainder(fa, fb))
-#define rintf(f)		((float)rint(f))
-# if defined(VBOX) && defined(HOST_BSD) /* Some defines which only apply to *BSD */
-#  define lrintl(f)            ((int32_t)rint(f))
-#  define llrintl(f)           ((int64_t)rint(f))
-#  define rintl(d)             ((int32_t)rint(d))
-#  define sqrtl(f)             (sqrt(f))
-#  define remainderl(fa, fb)   (remainder(fa, fb))
-# endif /* VBOX && _BSD */
-#if !defined(__sparc__) && \
-    (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10)
-extern long double rintl(long double);
-extern long double scalbnl(long double, int);
-
-long long
-llrintl(long double x) {
-	return ((long long) rintl(x));
-}
-
-long
-lrintl(long double x) {
-	return ((long) rintl(x));
-}
-
-long double
-ldexpl(long double x, int n) {
-	return (scalbnl(x, n));
-}
-#endif
-#endif
-
-#if defined(_ARCH_PPC)
-
-/* correct (but slow) PowerPC rint() (glibc version is incorrect) */
-static double qemu_rint(double x)
-{
-    double y = 4503599627370496.0;
-    if (fabs(x) >= y)
-        return x;
-    if (x < 0)
-        y = -y;
-    y = (x + y) - y;
-    if (y == 0.0)
-        y = copysign(y, x);
-    return y;
-}
-
-#define rint qemu_rint
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE integer-to-floating-point conversion routines.
-*----------------------------------------------------------------------------*/
-float32 int32_to_float32(int v STATUS_PARAM)
-{
-    return (float32)v;
-}
-
-float32 uint32_to_float32(unsigned int v STATUS_PARAM)
-{
-    return (float32)v;
-}
-
-float64 int32_to_float64(int v STATUS_PARAM)
-{
-    return (float64)v;
-}
-
-float64 uint32_to_float64(unsigned int v STATUS_PARAM)
-{
-    return (float64)v;
-}
-
-#ifdef FLOATX80
-floatx80 int32_to_floatx80(int v STATUS_PARAM)
-{
-    return (floatx80)v;
-}
-#endif
-float32 int64_to_float32( int64_t v STATUS_PARAM)
-{
-    return (float32)v;
-}
-float32 uint64_to_float32( uint64_t v STATUS_PARAM)
-{
-    return (float32)v;
-}
-float64 int64_to_float64( int64_t v STATUS_PARAM)
-{
-    return (float64)v;
-}
-float64 uint64_to_float64( uint64_t v STATUS_PARAM)
-{
-    return (float64)v;
-}
-#ifdef FLOATX80
-floatx80 int64_to_floatx80( int64_t v STATUS_PARAM)
-{
-    return (floatx80)v;
-}
-#endif
-
-/* XXX: this code implements the x86 behaviour, not the IEEE one.  */
-#if HOST_LONG_BITS == 32
-static inline int long_to_int32(long a)
-{
-    return a;
-}
-#else
-static inline int long_to_int32(long a)
-{
-    if (a != (int32_t)a)
-        a = 0x80000000;
-    return a;
-}
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int float32_to_int32( float32 a STATUS_PARAM)
-{
-    return long_to_int32(lrintf(a));
-}
-int float32_to_int32_round_to_zero( float32 a STATUS_PARAM)
-{
-    return (int)a;
-}
-int64_t float32_to_int64( float32 a STATUS_PARAM)
-{
-    return llrintf(a);
-}
-
-int64_t float32_to_int64_round_to_zero( float32 a STATUS_PARAM)
-{
-    return (int64_t)a;
-}
-
-float64 float32_to_float64( float32 a STATUS_PARAM)
-{
-    return a;
-}
-#ifdef FLOATX80
-floatx80 float32_to_floatx80( float32 a STATUS_PARAM)
-{
-    return a;
-}
-#endif
-
-unsigned int float32_to_uint32( float32 a STATUS_PARAM)
-{
-    int64_t v;
-    unsigned int res;
-
-    v = llrintf(a);
-    if (v < 0) {
-        res = 0;
-    } else if (v > 0xffffffff) {
-        res = 0xffffffff;
-    } else {
-        res = v;
-    }
-    return res;
-}
-unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM)
-{
-    int64_t v;
-    unsigned int res;
-
-    v = (int64_t)a;
-    if (v < 0) {
-        res = 0;
-    } else if (v > 0xffffffff) {
-        res = 0xffffffff;
-    } else {
-        res = v;
-    }
-    return res;
-}
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision operations.
-*----------------------------------------------------------------------------*/
-float32 float32_round_to_int( float32 a STATUS_PARAM)
-{
-    return rintf(a);
-}
-
-float32 float32_rem( float32 a, float32 b STATUS_PARAM)
-{
-    return remainderf(a, b);
-}
-
-float32 float32_sqrt( float32 a STATUS_PARAM)
-{
-    return sqrtf(a);
-}
-int float32_compare( float32 a, float32 b STATUS_PARAM )
-{
-    if (a < b) {
-        return float_relation_less;
-    } else if (a == b) {
-        return float_relation_equal;
-    } else if (a > b) {
-        return float_relation_greater;
-    } else {
-        return float_relation_unordered;
-    }
-}
-int float32_compare_quiet( float32 a, float32 b STATUS_PARAM )
-{
-    if (isless(a, b)) {
-        return float_relation_less;
-    } else if (a == b) {
-        return float_relation_equal;
-    } else if (isgreater(a, b)) {
-        return float_relation_greater;
-    } else {
-        return float_relation_unordered;
-    }
-}
-int float32_is_signaling_nan( float32 a1)
-{
-    float32u u;
-    uint32_t a;
-    u.f = a1;
-    a = u.i;
-    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
-}
-
-int float32_is_nan( float32 a1 )
-{
-    float32u u;
-    uint64_t a;
-    u.f = a1;
-    a = u.i;
-    return ( 0xFF800000 < ( a<<1 ) );
-}
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int float64_to_int32( float64 a STATUS_PARAM)
-{
-    return long_to_int32(lrint(a));
-}
-int float64_to_int32_round_to_zero( float64 a STATUS_PARAM)
-{
-    return (int)a;
-}
-int64_t float64_to_int64( float64 a STATUS_PARAM)
-{
-    return llrint(a);
-}
-int64_t float64_to_int64_round_to_zero( float64 a STATUS_PARAM)
-{
-    return (int64_t)a;
-}
-float32 float64_to_float32( float64 a STATUS_PARAM)
-{
-    return a;
-}
-#ifdef FLOATX80
-floatx80 float64_to_floatx80( float64 a STATUS_PARAM)
-{
-    return a;
-}
-#endif
-#ifdef FLOAT128
-float128 float64_to_float128( float64 a STATUS_PARAM)
-{
-    return a;
-}
-#endif
-
-unsigned int float64_to_uint32( float64 a STATUS_PARAM)
-{
-    int64_t v;
-    unsigned int res;
-
-    v = llrint(a);
-    if (v < 0) {
-        res = 0;
-    } else if (v > 0xffffffff) {
-        res = 0xffffffff;
-    } else {
-        res = v;
-    }
-    return res;
-}
-unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM)
-{
-    int64_t v;
-    unsigned int res;
-
-    v = (int64_t)a;
-    if (v < 0) {
-        res = 0;
-    } else if (v > 0xffffffff) {
-        res = 0xffffffff;
-    } else {
-        res = v;
-    }
-    return res;
-}
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
-{
-    int64_t v;
-
-    v = llrint(a + (float64)INT64_MIN);
-
-    return v - INT64_MIN;
-}
-uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
-{
-    int64_t v;
-
-    v = (int64_t)(a + (float64)INT64_MIN);
-
-    return v - INT64_MIN;
-}
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision operations.
-*----------------------------------------------------------------------------*/
-#if defined(__sun__) && \
-    (defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10)
-static inline float64 trunc(float64 x)
-{
-    return x < 0 ? -floor(-x) : floor(x);
-}
-#endif
-float64 float64_trunc_to_int( float64 a STATUS_PARAM )
-{
-    return trunc(a);
-}
-
-float64 float64_round_to_int( float64 a STATUS_PARAM )
-{
-    return rint(a);
-}
-
-float64 float64_rem( float64 a, float64 b STATUS_PARAM)
-{
-    return remainder(a, b);
-}
-
-float64 float64_sqrt( float64 a STATUS_PARAM)
-{
-    return sqrt(a);
-}
-int float64_compare( float64 a, float64 b STATUS_PARAM )
-{
-    if (a < b) {
-        return float_relation_less;
-    } else if (a == b) {
-        return float_relation_equal;
-    } else if (a > b) {
-        return float_relation_greater;
-    } else {
-        return float_relation_unordered;
-    }
-}
-int float64_compare_quiet( float64 a, float64 b STATUS_PARAM )
-{
-    if (isless(a, b)) {
-        return float_relation_less;
-    } else if (a == b) {
-        return float_relation_equal;
-    } else if (isgreater(a, b)) {
-        return float_relation_greater;
-    } else {
-        return float_relation_unordered;
-    }
-}
-int float64_is_signaling_nan( float64 a1)
-{
-    float64u u;
-    uint64_t a;
-    u.f = a1;
-    a = u.i;
-    return
-           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
-        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
-
-}
-
-int float64_is_nan( float64 a1 )
-{
-    float64u u;
-    uint64_t a;
-    u.f = a1;
-    a = u.i;
-
-    return ( LIT64( 0xFFF0000000000000 ) < (bits64) ( a<<1 ) );
-
-}
-
-#ifdef FLOATX80
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int floatx80_to_int32( floatx80 a STATUS_PARAM)
-{
-    return long_to_int32(lrintl(a));
-}
-int floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM)
-{
-    return (int)a;
-}
-int64_t floatx80_to_int64( floatx80 a STATUS_PARAM)
-{
-    return llrintl(a);
-}
-int64_t floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM)
-{
-    return (int64_t)a;
-}
-float32 floatx80_to_float32( floatx80 a STATUS_PARAM)
-{
-    return a;
-}
-float64 floatx80_to_float64( floatx80 a STATUS_PARAM)
-{
-    return a;
-}
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision operations.
-*----------------------------------------------------------------------------*/
-floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM)
-{
-    return rintl(a);
-}
-floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return remainderl(a, b);
-}
-floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM)
-{
-    return sqrtl(a);
-}
-int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
-{
-    if (a < b) {
-        return float_relation_less;
-    } else if (a == b) {
-        return float_relation_equal;
-    } else if (a > b) {
-        return float_relation_greater;
-    } else {
-        return float_relation_unordered;
-    }
-}
-int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
-{
-    if (isless(a, b)) {
-        return float_relation_less;
-    } else if (a == b) {
-        return float_relation_equal;
-    } else if (isgreater(a, b)) {
-        return float_relation_greater;
-    } else {
-        return float_relation_unordered;
-    }
-}
-int floatx80_is_signaling_nan( floatx80 a1)
-{
-    floatx80u u;
-    uint64_t aLow;
-    u.f = a1;
-
-    aLow = u.i.low & ~ LIT64( 0x4000000000000000 );
-    return
-           ( ( u.i.high & 0x7FFF ) == 0x7FFF )
-        && (bits64) ( aLow<<1 )
-        && ( u.i.low == aLow );
-}
-
-int floatx80_is_nan( floatx80 a1 )
-{
-    floatx80u u;
-    u.f = a1;
-    return ( ( u.i.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( u.i.low<<1 );
-}
-
-#endif
--- fpu/softfloat-native.h	2013-12-18 11:11:37.000000000 -0500
+++ fpu/softfloat-native.h	1969-12-31 19:00:00.000000000 -0500
@@ -1,493 +0,0 @@
-/* Native implementation of soft float functions */
-#define __C99FEATURES__
-#include <math.h>
-
-#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__) && !defined(__FreeBSD__)) \
-    || defined(CONFIG_SOLARIS) /* VBox: Added __FreeBSD__ */
-#include <ieeefp.h>
-#define fabsf(f) ((float)fabs(f))
-#else
-#include <fenv.h>
-#endif
-
-#if defined(__OpenBSD__) || defined(__NetBSD__)
-#include <sys/param.h>
-#endif
-
-/*
- * Define some C99-7.12.3 classification macros and
- *        some C99-.12.4 for Solaris systems OS less than 10,
- *        or Solaris 10 systems running GCC 3.x or less.
- *   Solaris 10 with GCC4 does not need these macros as they
- *   are defined in <iso/math_c99.h> with a compiler directive
- */
-#if defined(CONFIG_SOLARIS) && \
-           ((CONFIG_SOLARIS_VERSION <= 9 ) || \
-           ((CONFIG_SOLARIS_VERSION == 10) && (__GNUC__ < 4))) \
-    || (defined(__OpenBSD__) && (OpenBSD < 200811))
-/*
- * C99 7.12.3 classification macros
- * and
- * C99 7.12.14 comparison macros
- *
- * ... do not work on Solaris 10 using GNU CC 3.4.x.
- * Try to workaround the missing / broken C99 math macros.
- */
-#if defined(__OpenBSD__)
-#define unordered(x, y) (isnan(x) || isnan(y))
-#endif
-
-#ifdef __NetBSD__
-#ifndef isgreater
-#define isgreater(x, y)		__builtin_isgreater(x, y)
-#endif
-#ifndef isgreaterequal
-#define isgreaterequal(x, y)	__builtin_isgreaterequal(x, y)
-#endif
-#ifndef isless
-#define isless(x, y)		__builtin_isless(x, y)
-#endif
-#ifndef islessequal
-#define islessequal(x, y)	__builtin_islessequal(x, y)
-#endif
-#ifndef isunordered
-#define isunordered(x, y)	__builtin_isunordered(x, y)
-#endif
-#endif
-
-
-#define isnormal(x)             (fpclass(x) >= FP_NZERO)
-#define isgreater(x, y)         ((!unordered(x, y)) && ((x) > (y)))
-#define isgreaterequal(x, y)    ((!unordered(x, y)) && ((x) >= (y)))
-#define isless(x, y)            ((!unordered(x, y)) && ((x) < (y)))
-#define islessequal(x, y)       ((!unordered(x, y)) && ((x) <= (y)))
-#define isunordered(x,y)        unordered(x, y)
-#endif
-
-#if defined(__sun__) && !defined(CONFIG_NEEDS_LIBSUNMATH)
-
-#ifndef isnan
-# define isnan(x) \
-    (sizeof (x) == sizeof (long double) ? isnan_ld (x) \
-     : sizeof (x) == sizeof (double) ? isnan_d (x) \
-     : isnan_f (x))
-static inline int isnan_f  (float       x) { return x != x; }
-static inline int isnan_d  (double      x) { return x != x; }
-static inline int isnan_ld (long double x) { return x != x; }
-#endif
-
-#ifndef isinf
-# define isinf(x) \
-    (sizeof (x) == sizeof (long double) ? isinf_ld (x) \
-     : sizeof (x) == sizeof (double) ? isinf_d (x) \
-     : isinf_f (x))
-static inline int isinf_f  (float       x) { return isnan (x - x); }
-static inline int isinf_d  (double      x) { return isnan (x - x); }
-static inline int isinf_ld (long double x) { return isnan (x - x); }
-#endif
-#endif
-
-typedef float float32;
-typedef double float64;
-#ifdef FLOATX80
-typedef long double floatx80;
-#endif
-
-typedef union {
-    float32 f;
-    uint32_t i;
-} float32u;
-typedef union {
-    float64 f;
-    uint64_t i;
-} float64u;
-#ifdef FLOATX80
-typedef union {
-    floatx80 f;
-    struct {
-        uint64_t low;
-        uint16_t high;
-    } i;
-} floatx80u;
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point rounding mode.
-*----------------------------------------------------------------------------*/
-#if (defined(CONFIG_BSD) && !defined(__APPLE__) && !defined(__GLIBC__)) \
-    || defined(CONFIG_SOLARIS)
-#if defined(__OpenBSD__)
-#define FE_RM FP_RM
-#define FE_RP FP_RP
-#define FE_RZ FP_RZ
-#endif
-enum {
-    float_round_nearest_even = FP_RN,
-    float_round_down         = FP_RM,
-    float_round_up           = FP_RP,
-    float_round_to_zero      = FP_RZ
-};
-#else
-enum {
-    float_round_nearest_even = FE_TONEAREST,
-    float_round_down         = FE_DOWNWARD,
-    float_round_up           = FE_UPWARD,
-    float_round_to_zero      = FE_TOWARDZERO
-};
-#endif
-
-typedef struct float_status {
-    int float_rounding_mode;
-#ifdef FLOATX80
-    int floatx80_rounding_precision;
-#endif
-} float_status;
-
-void set_float_rounding_mode(int val STATUS_PARAM);
-#ifdef FLOATX80
-void set_floatx80_rounding_precision(int val STATUS_PARAM);
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE integer-to-floating-point conversion routines.
-*----------------------------------------------------------------------------*/
-float32 int32_to_float32( int STATUS_PARAM);
-float32 uint32_to_float32( unsigned int STATUS_PARAM);
-float64 int32_to_float64( int STATUS_PARAM);
-float64 uint32_to_float64( unsigned int STATUS_PARAM);
-#ifdef FLOATX80
-floatx80 int32_to_floatx80( int STATUS_PARAM);
-#endif
-#ifdef FLOAT128
-float128 int32_to_float128( int STATUS_PARAM);
-#endif
-float32 int64_to_float32( int64_t STATUS_PARAM);
-float32 uint64_to_float32( uint64_t STATUS_PARAM);
-float64 int64_to_float64( int64_t STATUS_PARAM);
-float64 uint64_to_float64( uint64_t v STATUS_PARAM);
-#ifdef FLOATX80
-floatx80 int64_to_floatx80( int64_t STATUS_PARAM);
-#endif
-#ifdef FLOAT128
-float128 int64_to_float128( int64_t STATUS_PARAM);
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int float32_to_int32( float32  STATUS_PARAM);
-int float32_to_int32_round_to_zero( float32  STATUS_PARAM);
-unsigned int float32_to_uint32( float32 a STATUS_PARAM);
-unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM);
-int64_t float32_to_int64( float32  STATUS_PARAM);
-int64_t float32_to_int64_round_to_zero( float32  STATUS_PARAM);
-float64 float32_to_float64( float32  STATUS_PARAM);
-#ifdef FLOATX80
-floatx80 float32_to_floatx80( float32  STATUS_PARAM);
-#endif
-#ifdef FLOAT128
-float128 float32_to_float128( float32  STATUS_PARAM);
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision operations.
-*----------------------------------------------------------------------------*/
-float32 float32_round_to_int( float32  STATUS_PARAM);
-INLINE float32 float32_add( float32 a, float32 b STATUS_PARAM)
-{
-    return a + b;
-}
-INLINE float32 float32_sub( float32 a, float32 b STATUS_PARAM)
-{
-    return a - b;
-}
-INLINE float32 float32_mul( float32 a, float32 b STATUS_PARAM)
-{
-    return a * b;
-}
-INLINE float32 float32_div( float32 a, float32 b STATUS_PARAM)
-{
-    return a / b;
-}
-float32 float32_rem( float32, float32  STATUS_PARAM);
-float32 float32_sqrt( float32  STATUS_PARAM);
-INLINE int float32_eq( float32 a, float32 b STATUS_PARAM)
-{
-    return a == b;
-}
-INLINE int float32_le( float32 a, float32 b STATUS_PARAM)
-{
-    return a <= b;
-}
-INLINE int float32_lt( float32 a, float32 b STATUS_PARAM)
-{
-    return a < b;
-}
-INLINE int float32_eq_signaling( float32 a, float32 b STATUS_PARAM)
-{
-    return a <= b && a >= b;
-}
-INLINE int float32_le_quiet( float32 a, float32 b STATUS_PARAM)
-{
-    return islessequal(a, b);
-}
-INLINE int float32_lt_quiet( float32 a, float32 b STATUS_PARAM)
-{
-    return isless(a, b);
-}
-INLINE int float32_unordered( float32 a, float32 b STATUS_PARAM)
-{
-    return isunordered(a, b);
-
-}
-int float32_compare( float32, float32 STATUS_PARAM );
-int float32_compare_quiet( float32, float32 STATUS_PARAM );
-int float32_is_signaling_nan( float32 );
-int float32_is_nan( float32 );
-
-INLINE float32 float32_abs(float32 a)
-{
-    return fabsf(a);
-}
-
-INLINE float32 float32_chs(float32 a)
-{
-    return -a;
-}
-
-INLINE float32 float32_is_infinity(float32 a)
-{
-    return fpclassify(a) == FP_INFINITE;
-}
-
-INLINE float32 float32_is_neg(float32 a)
-{
-    float32u u;
-    u.f = a;
-    return u.i >> 31;
-}
-
-INLINE float32 float32_is_zero(float32 a)
-{
-    return fpclassify(a) == FP_ZERO;
-}
-
-INLINE float32 float32_scalbn(float32 a, int n)
-{
-    return scalbnf(a, n);
-}
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int float64_to_int32( float64 STATUS_PARAM );
-int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
-unsigned int float64_to_uint32( float64 STATUS_PARAM );
-unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
-int64_t float64_to_int64( float64 STATUS_PARAM );
-int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM );
-uint64_t float64_to_uint64( float64 STATUS_PARAM );
-uint64_t float64_to_uint64_round_to_zero( float64 STATUS_PARAM );
-float32 float64_to_float32( float64 STATUS_PARAM );
-#ifdef FLOATX80
-floatx80 float64_to_floatx80( float64 STATUS_PARAM );
-#endif
-#ifdef FLOAT128
-float128 float64_to_float128( float64 STATUS_PARAM );
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision operations.
-*----------------------------------------------------------------------------*/
-float64 float64_round_to_int( float64 STATUS_PARAM );
-float64 float64_trunc_to_int( float64 STATUS_PARAM );
-INLINE float64 float64_add( float64 a, float64 b STATUS_PARAM)
-{
-    return a + b;
-}
-INLINE float64 float64_sub( float64 a, float64 b STATUS_PARAM)
-{
-    return a - b;
-}
-INLINE float64 float64_mul( float64 a, float64 b STATUS_PARAM)
-{
-    return a * b;
-}
-INLINE float64 float64_div( float64 a, float64 b STATUS_PARAM)
-{
-    return a / b;
-}
-float64 float64_rem( float64, float64 STATUS_PARAM );
-float64 float64_sqrt( float64 STATUS_PARAM );
-INLINE int float64_eq( float64 a, float64 b STATUS_PARAM)
-{
-    return a == b;
-}
-INLINE int float64_le( float64 a, float64 b STATUS_PARAM)
-{
-    return a <= b;
-}
-INLINE int float64_lt( float64 a, float64 b STATUS_PARAM)
-{
-    return a < b;
-}
-INLINE int float64_eq_signaling( float64 a, float64 b STATUS_PARAM)
-{
-    return a <= b && a >= b;
-}
-INLINE int float64_le_quiet( float64 a, float64 b STATUS_PARAM)
-{
-    return islessequal(a, b);
-}
-INLINE int float64_lt_quiet( float64 a, float64 b STATUS_PARAM)
-{
-    return isless(a, b);
-
-}
-INLINE int float64_unordered( float64 a, float64 b STATUS_PARAM)
-{
-    return isunordered(a, b);
-
-}
-int float64_compare( float64, float64 STATUS_PARAM );
-int float64_compare_quiet( float64, float64 STATUS_PARAM );
-int float64_is_signaling_nan( float64 );
-int float64_is_nan( float64 );
-
-INLINE float64 float64_abs(float64 a)
-{
-    return fabs(a);
-}
-
-INLINE float64 float64_chs(float64 a)
-{
-    return -a;
-}
-
-INLINE float64 float64_is_infinity(float64 a)
-{
-    return fpclassify(a) == FP_INFINITE;
-}
-
-INLINE float64 float64_is_neg(float64 a)
-{
-    float64u u;
-    u.f = a;
-    return u.i >> 63;
-}
-
-INLINE float64 float64_is_zero(float64 a)
-{
-    return fpclassify(a) == FP_ZERO;
-}
-
-INLINE float64 float64_scalbn(float64 a, int n)
-{
-    return scalbn(a, n);
-}
-
-#ifdef FLOATX80
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int floatx80_to_int32( floatx80 STATUS_PARAM );
-int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
-int64_t floatx80_to_int64( floatx80 STATUS_PARAM);
-int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM);
-float32 floatx80_to_float32( floatx80 STATUS_PARAM );
-float64 floatx80_to_float64( floatx80 STATUS_PARAM );
-#ifdef FLOAT128
-float128 floatx80_to_float128( floatx80 STATUS_PARAM );
-#endif
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision operations.
-*----------------------------------------------------------------------------*/
-floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
-INLINE floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a + b;
-}
-INLINE floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a - b;
-}
-INLINE floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a * b;
-}
-INLINE floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a / b;
-}
-floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
-INLINE int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a == b;
-}
-INLINE int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a <= b;
-}
-INLINE int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a < b;
-}
-INLINE int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return a <= b && a >= b;
-}
-INLINE int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return islessequal(a, b);
-}
-INLINE int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return isless(a, b);
-
-}
-INLINE int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM)
-{
-    return isunordered(a, b);
-
-}
-int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
-int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_is_signaling_nan( floatx80 );
-int floatx80_is_nan( floatx80 );
-
-INLINE floatx80 floatx80_abs(floatx80 a)
-{
-    return fabsl(a);
-}
-
-INLINE floatx80 floatx80_chs(floatx80 a)
-{
-    return -a;
-}
-
-INLINE floatx80 floatx80_is_infinity(floatx80 a)
-{
-    return fpclassify(a) == FP_INFINITE;
-}
-
-INLINE floatx80 floatx80_is_neg(floatx80 a)
-{
-    floatx80u u;
-    u.f = a;
-    return u.i.high >> 15;
-}
-
-INLINE floatx80 floatx80_is_zero(floatx80 a)
-{
-    return fpclassify(a) == FP_ZERO;
-}
-
-INLINE floatx80 floatx80_scalbn(floatx80 a, int n)
-{
-    return scalbnl(a, n);
-}
-
-#endif
--- fpu/softfloat-specialize.h	2013-12-18 11:11:37.000000000 -0500
+++ fpu/softfloat-specialize.h	2014-01-14 15:21:31.000000000 -0500
@@ -1,3 +1,8 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
 
 /*============================================================================
 
@@ -30,12 +35,6 @@ these four paragraphs for those parts of
 
 =============================================================================*/
 
-#if defined(TARGET_MIPS) || defined(TARGET_HPPA)
-#define SNAN_BIT_IS_ONE		1
-#else
-#define SNAN_BIT_IS_ONE		0
-#endif
-
 /*----------------------------------------------------------------------------
 | Raises the exceptions specified by `flags'.  Floating-point traps can be
 | defined here if desired.  It is currently not possible for such a trap
@@ -53,36 +52,111 @@ void float_raise( int8 flags STATUS_PARA
 *----------------------------------------------------------------------------*/
 typedef struct {
     flag sign;
-    bits64 high, low;
+    uint64_t high, low;
 } commonNaNT;
 
 /*----------------------------------------------------------------------------
-| The pattern for a default generated single-precision NaN.
+| Returns 1 if the half-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float16_is_quiet_nan(float16 a_)
+{
+    uint16_t a = float16_val(a_);
+#if SNAN_BIT_IS_ONE
+    return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
+#else
+    return ((a & ~0x8000) >= 0x7c80);
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
-#if defined(TARGET_SPARC)
-#define float32_default_nan make_float32(0x7FFFFFFF)
-#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
-#define float32_default_nan make_float32(0x7FC00000)
-#elif defined(TARGET_HPPA)
-#define float32_default_nan make_float32(0x7FA00000)
-#elif SNAN_BIT_IS_ONE
-#define float32_default_nan make_float32(0x7FBFFFFF)
+
+int float16_is_signaling_nan(float16 a_)
+{
+    uint16_t a = float16_val(a_);
+#if SNAN_BIT_IS_ONE
+    return ((a & ~0x8000) >= 0x7c80);
 #else
-#define float32_default_nan make_float32(0xFFC00000)
+    return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
 #endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns a quiet NaN if the half-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+float16 float16_maybe_silence_nan(float16 a_)
+{
+    if (float16_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        return float16_default_nan;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        uint16_t a = float16_val(a_);
+        a |= (1 << 9);
+        return make_float16(a);
+#endif
+    }
+    return a_;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the half-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float16ToCommonNaN( float16 a STATUS_PARAM )
+{
+    commonNaNT z;
+
+    if ( float16_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
+    z.sign = float16_val(a) >> 15;
+    z.low = 0;
+    z.high = ((uint64_t) float16_val(a))<<54;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the half-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
+{
+    uint16_t mantissa = a.high>>54;
+
+    if (STATUS(default_nan_mode)) {
+        return float16_default_nan;
+    }
+
+    if (mantissa) {
+        return make_float16(((((uint16_t) a.sign) << 15)
+                             | (0x1F << 10) | mantissa));
+    } else {
+        return float16_default_nan;
+    }
+}
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float32_is_nan( float32 a_ )
+int float32_is_quiet_nan( float32 a_ )
 {
     uint32_t a = float32_val(a_);
 #if SNAN_BIT_IS_ONE
     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
 #else
-    return ( 0xFF800000 <= (bits32) ( a<<1 ) );
+    return ( 0xFF800000 <= (uint32_t) ( a<<1 ) );
 #endif
 }
 
@@ -95,13 +169,36 @@ int float32_is_signaling_nan( float32 a_
 {
     uint32_t a = float32_val(a_);
 #if SNAN_BIT_IS_ONE
-    return ( 0xFF800000 <= (bits32) ( a<<1 ) );
+    return ( 0xFF800000 <= (uint32_t) ( a<<1 ) );
 #else
     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
 #endif
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the single-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float32 float32_maybe_silence_nan( float32 a_ )
+{
+    if (float32_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        return float32_default_nan;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        uint32_t a = float32_val(a_);
+        a |= (1 << 22);
+        return make_float32(a);
+#endif
+    }
+    return a_;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | exception is raised.
@@ -114,7 +211,7 @@ static commonNaNT float32ToCommonNaN( fl
     if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
     z.sign = float32_val(a)>>31;
     z.low = 0;
-    z.high = ( (bits64) float32_val(a) )<<41;
+    z.high = ( (uint64_t) float32_val(a) )<<41;
     return z;
 }
 
@@ -123,17 +220,134 @@ static commonNaNT float32ToCommonNaN( fl
 | precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static float32 commonNaNToFloat32( commonNaNT a )
+static float32 commonNaNToFloat32( commonNaNT a STATUS_PARAM)
 {
-    bits32 mantissa = a.high>>41;
+    uint32_t mantissa = a.high>>41;
+
+    if ( STATUS(default_nan_mode) ) {
+        return float32_default_nan;
+    }
+
     if ( mantissa )
         return make_float32(
-            ( ( (bits32) a.sign )<<31 ) | 0x7F800000 | ( a.high>>41 ) );
+            ( ( (uint32_t) a.sign )<<31 ) | 0x7F800000 | ( a.high>>41 ) );
     else
         return float32_default_nan;
 }
 
 /*----------------------------------------------------------------------------
+| Select which NaN to propagate for a two-input operation.
+| IEEE754 doesn't specify all the details of this, so the
+| algorithm is target-specific.
+| The routine is passed various bits of information about the
+| two NaNs and should return 0 to select NaN a and 1 for NaN b.
+| Note that signalling NaNs are always squashed to quiet NaNs
+| by the caller, by calling floatXX_maybe_silence_nan() before
+| returning them.
+|
+| aIsLargerSignificand is only valid if both a and b are NaNs
+| of some kind, and is true if a has the larger significand,
+| or if both a and b have the same significand but a is
+| positive but b is negative. It is only needed for the x87
+| tie-break rule.
+*----------------------------------------------------------------------------*/
+
+#if defined(TARGET_ARM)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                    flag aIsLargerSignificand)
+{
+    /* ARM mandated NaN propagation rules: take the first of:
+     *  1. A if it is signaling
+     *  2. B if it is signaling
+     *  3. A (quiet)
+     *  4. B (quiet)
+     * A signaling NaN is always quietened before returning it.
+     */
+    if (aIsSNaN) {
+        return 0;
+    } else if (bIsSNaN) {
+        return 1;
+    } else if (aIsQNaN) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+#elif defined(TARGET_MIPS)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                    flag aIsLargerSignificand)
+{
+    /* According to MIPS specifications, if one of the two operands is
+     * a sNaN, a new qNaN has to be generated. This is done in
+     * floatXX_maybe_silence_nan(). For qNaN inputs the specifications
+     * says: "When possible, this QNaN result is one of the operand QNaN
+     * values." In practice it seems that most implementations choose
+     * the first operand if both operands are qNaN. In short this gives
+     * the following rules:
+     *  1. A if it is signaling
+     *  2. B if it is signaling
+     *  3. A (quiet)
+     *  4. B (quiet)
+     * A signaling NaN is always silenced before returning it.
+     */
+    if (aIsSNaN) {
+        return 0;
+    } else if (bIsSNaN) {
+        return 1;
+    } else if (aIsQNaN) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+#elif defined(TARGET_PPC)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                   flag aIsLargerSignificand)
+{
+    /* PowerPC propagation rules:
+     *  1. A if it sNaN or qNaN
+     *  2. B if it sNaN or qNaN
+     * A signaling NaN is always silenced before returning it.
+     */
+    if (aIsSNaN || aIsQNaN) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+#else
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                    flag aIsLargerSignificand)
+{
+    /* This implements x87 NaN propagation rules:
+     * SNaN + QNaN => return the QNaN
+     * two SNaNs => return the one with the larger significand, silenced
+     * two QNaNs => return the one with the larger significand
+     * SNaN and a non-NaN => return the SNaN, silenced
+     * QNaN and a non-NaN => return the QNaN
+     *
+     * If we get down to comparing significands and they are the same,
+     * return the NaN with the positive sign bit (if any).
+     */
+    if (aIsSNaN) {
+        if (bIsSNaN) {
+            return aIsLargerSignificand ? 0 : 1;
+        }
+        return bIsQNaN ? 1 : 0;
+    }
+    else if (aIsQNaN) {
+        if (bIsSNaN || !bIsQNaN)
+            return 0;
+        else {
+            return aIsLargerSignificand ? 0 : 1;
+        }
+    } else {
+        return 1;
+    }
+}
+#endif
+
+/*----------------------------------------------------------------------------
 | Takes two single-precision floating-point values `a' and `b', one of which
 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
 | signaling NaN, the invalid exception is raised.
@@ -141,78 +355,52 @@ static float32 commonNaNToFloat32( commo
 
 static float32 propagateFloat32NaN( float32 a, float32 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
-    bits32 av, bv, res;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+    uint32_t av, bv;
 
-    if ( STATUS(default_nan_mode) )
-        return float32_default_nan;
-
-    aIsNaN = float32_is_nan( a );
+    aIsQuietNaN = float32_is_quiet_nan( a );
     aIsSignalingNaN = float32_is_signaling_nan( a );
-    bIsNaN = float32_is_nan( b );
+    bIsQuietNaN = float32_is_quiet_nan( b );
     bIsSignalingNaN = float32_is_signaling_nan( b );
     av = float32_val(a);
     bv = float32_val(b);
-#if SNAN_BIT_IS_ONE
-    av &= ~0x00400000;
-    bv &= ~0x00400000;
-#else
-    av |= 0x00400000;
-    bv |= 0x00400000;
-#endif
+
     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        res = bIsNaN ? bv : av;
-    }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN )
-            res = av;
-        else {
- returnLargerSignificand:
-            if ( (bits32) ( av<<1 ) < (bits32) ( bv<<1 ) )
-                res = bv;
-            else if ( (bits32) ( bv<<1 ) < (bits32) ( av<<1 ) )
-                res = av;
-            else
-                res = ( av < bv ) ? av : bv;
-        }
+
+    if ( STATUS(default_nan_mode) )
+        return float32_default_nan;
+
+    if ((uint32_t)(av<<1) < (uint32_t)(bv<<1)) {
+        aIsLargerSignificand = 0;
+    } else if ((uint32_t)(bv<<1) < (uint32_t)(av<<1)) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (av < bv) ? 1 : 0;
     }
-    else {
-        res = bv;
+
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return float32_maybe_silence_nan(b);
+    } else {
+        return float32_maybe_silence_nan(a);
     }
-    return make_float32(res);
 }
 
 /*----------------------------------------------------------------------------
-| The pattern for a default generated double-precision NaN.
-*----------------------------------------------------------------------------*/
-#if defined(TARGET_SPARC)
-#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF ))
-#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
-#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 ))
-#elif defined(TARGET_HPPA)
-#define float64_default_nan make_float64(LIT64( 0x7FF4000000000000 ))
-#elif SNAN_BIT_IS_ONE
-#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF ))
-#else
-#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 ))
-#endif
-
-/*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float64_is_nan( float64 a_ )
+int float64_is_quiet_nan( float64 a_ )
 {
-    bits64 a = float64_val(a_);
+    uint64_t a = float64_val(a_);
 #if SNAN_BIT_IS_ONE
     return
            ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
         && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
 #else
-    return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) );
+    return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) );
 #endif
 }
 
@@ -223,9 +411,9 @@ int float64_is_nan( float64 a_ )
 
 int float64_is_signaling_nan( float64 a_ )
 {
-    bits64 a = float64_val(a_);
+    uint64_t a = float64_val(a_);
 #if SNAN_BIT_IS_ONE
-    return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) );
+    return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) );
 #else
     return
            ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
@@ -234,6 +422,29 @@ int float64_is_signaling_nan( float64 a_
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the double-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float64 float64_maybe_silence_nan( float64 a_ )
+{
+    if (float64_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        return float64_default_nan;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        uint64_t a = float64_val(a_);
+        a |= LIT64( 0x0008000000000000 );
+        return make_float64(a);
+#endif
+    }
+    return a_;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | exception is raised.
@@ -255,13 +466,17 @@ static commonNaNT float64ToCommonNaN( fl
 | precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static float64 commonNaNToFloat64( commonNaNT a )
+static float64 commonNaNToFloat64( commonNaNT a STATUS_PARAM)
 {
-    bits64 mantissa = a.high>>12;
+    uint64_t mantissa = a.high>>12;
+
+    if ( STATUS(default_nan_mode) ) {
+        return float64_default_nan;
+    }
 
     if ( mantissa )
         return make_float64(
-              ( ( (bits64) a.sign )<<63 )
+              ( ( (uint64_t) a.sign )<<63 )
             | LIT64( 0x7FF0000000000000 )
             | ( a.high>>12 ));
     else
@@ -276,105 +491,106 @@ static float64 commonNaNToFloat64( commo
 
 static float64 propagateFloat64NaN( float64 a, float64 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
-    bits64 av, bv, res;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+    uint64_t av, bv;
 
-    if ( STATUS(default_nan_mode) )
-        return float64_default_nan;
-
-    aIsNaN = float64_is_nan( a );
+    aIsQuietNaN = float64_is_quiet_nan( a );
     aIsSignalingNaN = float64_is_signaling_nan( a );
-    bIsNaN = float64_is_nan( b );
+    bIsQuietNaN = float64_is_quiet_nan( b );
     bIsSignalingNaN = float64_is_signaling_nan( b );
     av = float64_val(a);
     bv = float64_val(b);
-#if SNAN_BIT_IS_ONE
-    av &= ~LIT64( 0x0008000000000000 );
-    bv &= ~LIT64( 0x0008000000000000 );
-#else
-    av |= LIT64( 0x0008000000000000 );
-    bv |= LIT64( 0x0008000000000000 );
-#endif
+
     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        res = bIsNaN ? bv : av;
-    }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN )
-            res = av;
-        else {
- returnLargerSignificand:
-            if ( (bits64) ( av<<1 ) < (bits64) ( bv<<1 ) )
-                res = bv;
-            else if ( (bits64) ( bv<<1 ) < (bits64) ( av<<1 ) )
-                res = av;
-            else
-                res = ( av < bv ) ? av : bv;
-        }
+
+    if ( STATUS(default_nan_mode) )
+        return float64_default_nan;
+
+    if ((uint64_t)(av<<1) < (uint64_t)(bv<<1)) {
+        aIsLargerSignificand = 0;
+    } else if ((uint64_t)(bv<<1) < (uint64_t)(av<<1)) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (av < bv) ? 1 : 0;
     }
-    else {
-        res = bv;
+
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return float64_maybe_silence_nan(b);
+    } else {
+        return float64_maybe_silence_nan(a);
     }
-    return make_float64(res);
 }
 
-#ifdef FLOATX80
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated extended double-precision NaN.  The
-| `high' and `low' values hold the most- and least-significant bits,
-| respectively.
-*----------------------------------------------------------------------------*/
-#if SNAN_BIT_IS_ONE
-#define floatx80_default_nan_high 0x7FFF
-#define floatx80_default_nan_low  LIT64( 0xBFFFFFFFFFFFFFFF )
-#else
-#define floatx80_default_nan_high 0xFFFF
-#define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
-#endif
-
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is a
-| quiet NaN; otherwise returns 0.
+| quiet NaN; otherwise returns 0. This slightly differs from the same
+| function for other types as floatx80 has an explicit bit.
 *----------------------------------------------------------------------------*/
 
-int floatx80_is_nan( floatx80 a )
+int floatx80_is_quiet_nan( floatx80 a )
 {
 #if SNAN_BIT_IS_ONE
-    bits64 aLow;
+    uint64_t aLow;
 
     aLow = a.low & ~ LIT64( 0x4000000000000000 );
     return
            ( ( a.high & 0x7FFF ) == 0x7FFF )
-        && (bits64) ( aLow<<1 )
+        && (uint64_t) ( aLow<<1 )
         && ( a.low == aLow );
 #else
-    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+    return ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 )));
 #endif
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is a
-| signaling NaN; otherwise returns 0.
+| signaling NaN; otherwise returns 0. This slightly differs from the same
+| function for other types as floatx80 has an explicit bit.
 *----------------------------------------------------------------------------*/
 
 int floatx80_is_signaling_nan( floatx80 a )
 {
 #if SNAN_BIT_IS_ONE
-    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+    return ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 )));
 #else
-    bits64 aLow;
+    uint64_t aLow;
 
     aLow = a.low & ~ LIT64( 0x4000000000000000 );
     return
            ( ( a.high & 0x7FFF ) == 0x7FFF )
-        && (bits64) ( aLow<<1 )
+        && (uint64_t) ( aLow<<1 )
         && ( a.low == aLow );
 #endif
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the extended double-precision floating point value
+| `a' is a signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_maybe_silence_nan( floatx80 a )
+{
+    if (floatx80_is_signaling_nan(a)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        a.low = floatx80_default_nan_low;
+        a.high = floatx80_default_nan_high;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        a.low |= LIT64( 0xC000000000000000 );
+        return a;
+#endif
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the extended double-precision floating-
 | point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
 | invalid exception is raised.
@@ -385,9 +601,15 @@ static commonNaNT floatx80ToCommonNaN( f
     commonNaNT z;
 
     if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
-    z.sign = a.high>>15;
-    z.low = 0;
-    z.high = a.low;
+    if ( a.low >> 63 ) {
+        z.sign = a.high >> 15;
+        z.low = 0;
+        z.high = a.low << 1;
+    } else {
+        z.sign = floatx80_default_nan_high >> 15;
+        z.low = 0;
+        z.high = floatx80_default_nan_low << 1;
+    }
     return z;
 }
 
@@ -396,15 +618,24 @@ static commonNaNT floatx80ToCommonNaN( f
 | double-precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static floatx80 commonNaNToFloatx80( commonNaNT a )
+static floatx80 commonNaNToFloatx80( commonNaNT a STATUS_PARAM)
 {
     floatx80 z;
 
-    if (a.high)
-        z.low = a.high;
-    else
+    if ( STATUS(default_nan_mode) ) {
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+
+    if (a.high >> 1) {
+        z.low = LIT64( 0x8000000000000000 ) | a.high >> 1;
+        z.high = ( ( (uint16_t) a.sign )<<15 ) | 0x7FFF;
+    } else {
         z.low = floatx80_default_nan_low;
-    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+        z.high = floatx80_default_nan_high;
+    }
+
     return z;
 }
 
@@ -416,7 +647,15 @@ static floatx80 commonNaNToFloatx80( com
 
 static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+
+    aIsQuietNaN = floatx80_is_quiet_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsQuietNaN = floatx80_is_quiet_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
 
     if ( STATUS(default_nan_mode) ) {
         a.low = floatx80_default_nan_low;
@@ -424,56 +663,28 @@ static floatx80 propagateFloatx80NaN( fl
         return a;
     }
 
-    aIsNaN = floatx80_is_nan( a );
-    aIsSignalingNaN = floatx80_is_signaling_nan( a );
-    bIsNaN = floatx80_is_nan( b );
-    bIsSignalingNaN = floatx80_is_signaling_nan( b );
-#if SNAN_BIT_IS_ONE
-    a.low &= ~LIT64( 0xC000000000000000 );
-    b.low &= ~LIT64( 0xC000000000000000 );
-#else
-    a.low |= LIT64( 0xC000000000000000 );
-    b.low |= LIT64( 0xC000000000000000 );
-#endif
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        return bIsNaN ? b : a;
-    }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN ) return a;
- returnLargerSignificand:
-        if ( a.low < b.low ) return b;
-        if ( b.low < a.low ) return a;
-        return ( a.high < b.high ) ? a : b;
+    if (a.low < b.low) {
+        aIsLargerSignificand = 0;
+    } else if (b.low < a.low) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
     }
-    else {
-        return b;
+
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return floatx80_maybe_silence_nan(b);
+    } else {
+        return floatx80_maybe_silence_nan(a);
     }
 }
 
-#endif
-
-#ifdef FLOAT128
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated quadruple-precision NaN.  The `high' and
-| `low' values hold the most- and least-significant bits, respectively.
-*----------------------------------------------------------------------------*/
-#if SNAN_BIT_IS_ONE
-#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
-#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
-#else
-#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
-#define float128_default_nan_low  LIT64( 0x0000000000000000 )
-#endif
-
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float128_is_nan( float128 a )
+int float128_is_quiet_nan( float128 a )
 {
 #if SNAN_BIT_IS_ONE
     return
@@ -481,7 +692,7 @@ int float128_is_nan( float128 a )
         && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
 #else
     return
-           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+           ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) )
         && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
 #endif
 }
@@ -495,7 +706,7 @@ int float128_is_signaling_nan( float128 
 {
 #if SNAN_BIT_IS_ONE
     return
-           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+           ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) )
         && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
 #else
     return
@@ -505,6 +716,29 @@ int float128_is_signaling_nan( float128 
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the quadruple-precision floating point value `a' is
+| a signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float128 float128_maybe_silence_nan( float128 a )
+{
+    if (float128_is_signaling_nan(a)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        a.low = float128_default_nan_low;
+        a.high = float128_default_nan_high;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        a.high |= LIT64( 0x0000800000000000 );
+        return a;
+#endif
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | exception is raised.
@@ -525,12 +759,18 @@ static commonNaNT float128ToCommonNaN( f
 | precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static float128 commonNaNToFloat128( commonNaNT a )
+static float128 commonNaNToFloat128( commonNaNT a STATUS_PARAM)
 {
     float128 z;
 
+    if ( STATUS(default_nan_mode) ) {
+        z.low = float128_default_nan_low;
+        z.high = float128_default_nan_high;
+        return z;
+    }
+
     shift128Right( a.high, a.low, 16, &z.high, &z.low );
-    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF000000000000 );
+    z.high |= ( ( (uint64_t) a.sign )<<63 ) | LIT64( 0x7FFF000000000000 );
     return z;
 }
 
@@ -542,7 +782,15 @@ static float128 commonNaNToFloat128( com
 
 static float128 propagateFloat128NaN( float128 a, float128 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+
+    aIsQuietNaN = float128_is_quiet_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsQuietNaN = float128_is_quiet_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
 
     if ( STATUS(default_nan_mode) ) {
         a.low = float128_default_nan_low;
@@ -550,32 +798,19 @@ static float128 propagateFloat128NaN( fl
         return a;
     }
 
-    aIsNaN = float128_is_nan( a );
-    aIsSignalingNaN = float128_is_signaling_nan( a );
-    bIsNaN = float128_is_nan( b );
-    bIsSignalingNaN = float128_is_signaling_nan( b );
-#if SNAN_BIT_IS_ONE
-    a.high &= ~LIT64( 0x0000800000000000 );
-    b.high &= ~LIT64( 0x0000800000000000 );
-#else
-    a.high |= LIT64( 0x0000800000000000 );
-    b.high |= LIT64( 0x0000800000000000 );
-#endif
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        return bIsNaN ? b : a;
-    }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN ) return a;
- returnLargerSignificand:
-        if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;
-        if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;
-        return ( a.high < b.high ) ? a : b;
+    if (lt128(a.high<<1, a.low, b.high<<1, b.low)) {
+        aIsLargerSignificand = 0;
+    } else if (lt128(b.high<<1, b.low, a.high<<1, a.low)) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
     }
-    else {
-        return b;
+
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return float128_maybe_silence_nan(b);
+    } else {
+        return float128_maybe_silence_nan(a);
     }
 }
 
-#endif
--- fpu/softfloat.c	2013-12-18 11:11:37.000000000 -0500
+++ fpu/softfloat.c	2014-01-14 15:21:31.000000000 -0500
@@ -1,3 +1,8 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
 
 /*============================================================================
 
@@ -30,8 +35,6 @@ these four paragraphs for those parts of
 
 =============================================================================*/
 
-/* FIXME: Flush-To-Zero only effects results.  Denormal inputs should also
-   be flushed to zero.  */
 #include "softfloat.h"
 
 /*----------------------------------------------------------------------------
@@ -61,12 +64,37 @@ void set_float_exception_flags(int val S
     STATUS(float_exception_flags) = val;
 }
 
-#ifdef FLOATX80
 void set_floatx80_rounding_precision(int val STATUS_PARAM)
 {
     STATUS(floatx80_rounding_precision) = val;
 }
-#endif
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint32_t extractFloat16Frac(float16 a)
+{
+    return float16_val(a) & 0x3ff;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat16Exp(float16 a)
+{
+    return (float16_val(a) >> 10) & 0x1f;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat16Sign(float16 a)
+{
+    return float16_val(a)>>15;
+}
 
 /*----------------------------------------------------------------------------
 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
@@ -79,7 +107,7 @@ void set_floatx80_rounding_precision(int
 | positive or negative integer is returned.
 *----------------------------------------------------------------------------*/
 
-static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
+static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -110,7 +138,7 @@ static int32 roundAndPackInt32( flag zSi
     if ( zSign ) z = - z;
     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
         float_raise( float_flag_invalid STATUS_VAR);
-        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
     return z;
@@ -129,7 +157,7 @@ static int32 roundAndPackInt32( flag zSi
 | returned.
 *----------------------------------------------------------------------------*/
 
-static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
+static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven, increment;
@@ -137,7 +165,7 @@ static int64 roundAndPackInt64( flag zSi
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    increment = ( (sbits64) absZ1 < 0 );
+    increment = ( (int64_t) absZ1 < 0 );
     if ( ! roundNearestEven ) {
         if ( roundingMode == float_round_to_zero ) {
             increment = 0;
@@ -154,7 +182,7 @@ static int64 roundAndPackInt64( flag zSi
     if ( increment ) {
         ++absZ0;
         if ( absZ0 == 0 ) goto overflow;
-        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
     }
     z = absZ0;
     if ( zSign ) z = - z;
@@ -162,7 +190,7 @@ static int64 roundAndPackInt64( flag zSi
  overflow:
         float_raise( float_flag_invalid STATUS_VAR);
         return
-              zSign ? (sbits64) LIT64( 0x8000000000000000 )
+              zSign ? (int64_t) LIT64( 0x8000000000000000 )
             : LIT64( 0x7FFFFFFFFFFFFFFF );
     }
     if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -174,7 +202,7 @@ static int64 roundAndPackInt64( flag zSi
 | Returns the fraction bits of the single-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits32 extractFloat32Frac( float32 a )
+INLINE uint32_t extractFloat32Frac( float32 a )
 {
 
     return float32_val(a) & 0x007FFFFF;
@@ -204,6 +232,21 @@ INLINE flag extractFloat32Sign( float32 
 }
 
 /*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
+{
+    if (STATUS(flush_inputs_to_zero)) {
+        if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
+            float_raise(float_flag_input_denormal STATUS_VAR);
+            return make_float32(float32_val(a) & 0x80000000);
+        }
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Normalizes the subnormal single-precision floating-point value represented
 | by the denormalized significand `aSig'.  The normalized exponent and
 | significand are stored at the locations pointed to by `zExpPtr' and
@@ -211,7 +254,7 @@ INLINE flag extractFloat32Sign( float32 
 *----------------------------------------------------------------------------*/
 
 static void
- normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+ normalizeFloat32Subnormal( uint32_t aSig, int16 *zExpPtr, uint32_t *zSigPtr )
 {
     int8 shiftCount;
 
@@ -232,11 +275,11 @@ static void
 | significand.
 *----------------------------------------------------------------------------*/
 
-INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
 {
 
     return make_float32(
-          ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
+          ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
 
 }
 
@@ -262,7 +305,7 @@ INLINE float32 packFloat32( flag zSign, 
 | Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -287,16 +330,19 @@ static float32 roundAndPackFloat32( flag
         }
     }
     roundBits = zSig & 0x7F;
-    if ( 0xFD <= (bits16) zExp ) {
+    if ( 0xFD <= (uint16_t) zExp ) {
         if (    ( 0xFD < zExp )
              || (    ( zExp == 0xFD )
-                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
            ) {
             float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
             return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
         }
         if ( zExp < 0 ) {
-            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
+            if (STATUS(flush_to_zero)) {
+                float_raise(float_flag_output_denormal STATUS_VAR);
+                return packFloat32(zSign, 0, 0);
+            }
             isTiny =
                    ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                 || ( zExp < -1 )
@@ -325,7 +371,7 @@ static float32 roundAndPackFloat32( flag
 *----------------------------------------------------------------------------*/
 
 static float32
- normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+ normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
 {
     int8 shiftCount;
 
@@ -338,7 +384,7 @@ static float32
 | Returns the fraction bits of the double-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloat64Frac( float64 a )
+INLINE uint64_t extractFloat64Frac( float64 a )
 {
 
     return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
@@ -368,6 +414,21 @@ INLINE flag extractFloat64Sign( float64 
 }
 
 /*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
+{
+    if (STATUS(flush_inputs_to_zero)) {
+        if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
+            float_raise(float_flag_input_denormal STATUS_VAR);
+            return make_float64(float64_val(a) & (1ULL << 63));
+        }
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Normalizes the subnormal double-precision floating-point value represented
 | by the denormalized significand `aSig'.  The normalized exponent and
 | significand are stored at the locations pointed to by `zExpPtr' and
@@ -375,7 +436,7 @@ INLINE flag extractFloat64Sign( float64 
 *----------------------------------------------------------------------------*/
 
 static void
- normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+ normalizeFloat64Subnormal( uint64_t aSig, int16 *zExpPtr, uint64_t *zSigPtr )
 {
     int8 shiftCount;
 
@@ -396,11 +457,11 @@ static void
 | significand.
 *----------------------------------------------------------------------------*/
 
-INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
 {
 
     return make_float64(
-        ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
+        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
 
 }
 
@@ -426,7 +487,7 @@ INLINE float64 packFloat64( flag zSign, 
 | Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -451,16 +512,19 @@ static float64 roundAndPackFloat64( flag
         }
     }
     roundBits = zSig & 0x3FF;
-    if ( 0x7FD <= (bits16) zExp ) {
+    if ( 0x7FD <= (uint16_t) zExp ) {
         if (    ( 0x7FD < zExp )
              || (    ( zExp == 0x7FD )
-                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
            ) {
             float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
             return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
         }
         if ( zExp < 0 ) {
-            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
+            if (STATUS(flush_to_zero)) {
+                float_raise(float_flag_output_denormal STATUS_VAR);
+                return packFloat64(zSign, 0, 0);
+            }
             isTiny =
                    ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                 || ( zExp < -1 )
@@ -489,7 +553,7 @@ static float64 roundAndPackFloat64( flag
 *----------------------------------------------------------------------------*/
 
 static float64
- normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+ normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
 {
     int8 shiftCount;
 
@@ -498,14 +562,12 @@ static float64
 
 }
 
-#ifdef FLOATX80
-
 /*----------------------------------------------------------------------------
 | Returns the fraction bits of the extended double-precision floating-point
 | value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloatx80Frac( floatx80 a )
+INLINE uint64_t extractFloatx80Frac( floatx80 a )
 {
 
     return a.low;
@@ -544,7 +606,7 @@ INLINE flag extractFloatx80Sign( floatx8
 *----------------------------------------------------------------------------*/
 
 static void
- normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+ normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
 {
     int8 shiftCount;
 
@@ -559,12 +621,12 @@ static void
 | extended double-precision floating-point value, returning the result.
 *----------------------------------------------------------------------------*/
 
-INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
 {
     floatx80 z;
 
     z.low = zSig;
-    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+    z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
     return z;
 
 }
@@ -595,7 +657,7 @@ INLINE floatx80 packFloatx80( flag zSign
 
 static floatx80
  roundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
  STATUS_PARAM)
 {
     int8 roundingMode;
@@ -632,14 +694,17 @@ static floatx80
         }
     }
     roundBits = zSig0 & roundMask;
-    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
         if (    ( 0x7FFE < zExp )
              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
            ) {
             goto overflow;
         }
         if ( zExp <= 0 ) {
-            if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
+            if (STATUS(flush_to_zero)) {
+                float_raise(float_flag_output_denormal STATUS_VAR);
+                return packFloatx80(zSign, 0, 0);
+            }
             isTiny =
                    ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                 || ( zExp < 0 )
@@ -650,7 +715,7 @@ static floatx80
             if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
             if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
             zSig0 += roundIncrement;
-            if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            if ( (int64_t) zSig0 < 0 ) zExp = 1;
             roundIncrement = roundMask + 1;
             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
                 roundMask |= roundIncrement;
@@ -673,7 +738,7 @@ static floatx80
     if ( zSig0 == 0 ) zExp = 0;
     return packFloatx80( zSign, zExp, zSig0 );
  precision80:
-    increment = ( (sbits64) zSig1 < 0 );
+    increment = ( (int64_t) zSig1 < 0 );
     if ( ! roundNearestEven ) {
         if ( roundingMode == float_round_to_zero ) {
             increment = 0;
@@ -687,7 +752,7 @@ static floatx80
             }
         }
     }
-    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
         if (    ( 0x7FFE < zExp )
              || (    ( zExp == 0x7FFE )
                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
@@ -716,7 +781,7 @@ static floatx80
             if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
             if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
             if ( roundNearestEven ) {
-                increment = ( (sbits64) zSig1 < 0 );
+                increment = ( (int64_t) zSig1 < 0 );
             }
             else {
                 if ( zSign ) {
@@ -729,8 +794,8 @@ static floatx80
             if ( increment ) {
                 ++zSig0;
                 zSig0 &=
-                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
-                if ( (sbits64) zSig0 < 0 ) zExp = 1;
+                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+                if ( (int64_t) zSig0 < 0 ) zExp = 1;
             }
             return packFloatx80( zSign, zExp, zSig0 );
         }
@@ -743,7 +808,7 @@ static floatx80
             zSig0 = LIT64( 0x8000000000000000 );
         }
         else {
-            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
         }
     }
     else {
@@ -764,7 +829,7 @@ static floatx80
 
 static floatx80
  normalizeRoundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
  STATUS_PARAM)
 {
     int8 shiftCount;
@@ -782,16 +847,12 @@ static floatx80
 
 }
 
-#endif
-
-#ifdef FLOAT128
-
 /*----------------------------------------------------------------------------
 | Returns the least-significant 64 fraction bits of the quadruple-precision
 | floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloat128Frac1( float128 a )
+INLINE uint64_t extractFloat128Frac1( float128 a )
 {
 
     return a.low;
@@ -803,7 +864,7 @@ INLINE bits64 extractFloat128Frac1( floa
 | floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloat128Frac0( float128 a )
+INLINE uint64_t extractFloat128Frac0( float128 a )
 {
 
     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
@@ -845,11 +906,11 @@ INLINE flag extractFloat128Sign( float12
 
 static void
  normalizeFloat128Subnormal(
-     bits64 aSig0,
-     bits64 aSig1,
+     uint64_t aSig0,
+     uint64_t aSig1,
      int32 *zExpPtr,
-     bits64 *zSig0Ptr,
-     bits64 *zSig1Ptr
+     uint64_t *zSig0Ptr,
+     uint64_t *zSig1Ptr
  )
 {
     int8 shiftCount;
@@ -888,12 +949,12 @@ static void
 *----------------------------------------------------------------------------*/
 
 INLINE float128
- packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+ packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
 {
     float128 z;
 
     z.low = zSig1;
-    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+    z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
     return z;
 
 }
@@ -921,14 +982,14 @@ INLINE float128
 
 static float128
  roundAndPackFloat128(
-     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
+     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven, increment, isTiny;
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    increment = ( (sbits64) zSig2 < 0 );
+    increment = ( (int64_t) zSig2 < 0 );
     if ( ! roundNearestEven ) {
         if ( roundingMode == float_round_to_zero ) {
             increment = 0;
@@ -942,7 +1003,7 @@ static float128
             }
         }
     }
-    if ( 0x7FFD <= (bits32) zExp ) {
+    if ( 0x7FFD <= (uint32_t) zExp ) {
         if (    ( 0x7FFD < zExp )
              || (    ( zExp == 0x7FFD )
                   && eq128(
@@ -970,7 +1031,10 @@ static float128
             return packFloat128( zSign, 0x7FFF, 0, 0 );
         }
         if ( zExp < 0 ) {
-            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
+            if (STATUS(flush_to_zero)) {
+                float_raise(float_flag_output_denormal STATUS_VAR);
+                return packFloat128(zSign, 0, 0, 0);
+            }
             isTiny =
                    ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                 || ( zExp < -1 )
@@ -986,7 +1050,7 @@ static float128
             zExp = 0;
             if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
             if ( roundNearestEven ) {
-                increment = ( (sbits64) zSig2 < 0 );
+                increment = ( (int64_t) zSig2 < 0 );
             }
             else {
                 if ( zSign ) {
@@ -1022,10 +1086,10 @@ static float128
 
 static float128
  normalizeRoundAndPackFloat128(
-     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
+     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
 {
     int8 shiftCount;
-    bits64 zSig2;
+    uint64_t zSig2;
 
     if ( zSig0 == 0 ) {
         zSig0 = zSig1;
@@ -1046,8 +1110,6 @@ static float128
 
 }
 
-#endif
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 32-bit two's complement integer `a'
 | to the single-precision floating-point format.  The conversion is performed
@@ -1059,7 +1121,7 @@ float32 int32_to_float32( int32 a STATUS
     flag zSign;
 
     if ( a == 0 ) return float32_zero;
-    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
     zSign = ( a < 0 );
     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
 
@@ -1076,7 +1138,7 @@ float64 int32_to_float64( int32 a STATUS
     flag zSign;
     uint32 absA;
     int8 shiftCount;
-    bits64 zSig;
+    uint64_t zSig;
 
     if ( a == 0 ) return float64_zero;
     zSign = ( a < 0 );
@@ -1087,8 +1149,6 @@ float64 int32_to_float64( int32 a STATUS
 
 }
 
-#ifdef FLOATX80
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 32-bit two's complement integer `a'
 | to the extended double-precision floating-point format.  The conversion
@@ -1101,7 +1161,7 @@ floatx80 int32_to_floatx80( int32 a STAT
     flag zSign;
     uint32 absA;
     int8 shiftCount;
-    bits64 zSig;
+    uint64_t zSig;
 
     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
     zSign = ( a < 0 );
@@ -1112,10 +1172,6 @@ floatx80 int32_to_floatx80( int32 a STAT
 
 }
 
-#endif
-
-#ifdef FLOAT128
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 32-bit two's complement integer `a' to
 | the quadruple-precision floating-point format.  The conversion is performed
@@ -1127,7 +1183,7 @@ float128 int32_to_float128( int32 a STAT
     flag zSign;
     uint32 absA;
     int8 shiftCount;
-    bits64 zSig0;
+    uint64_t zSig0;
 
     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
     zSign = ( a < 0 );
@@ -1138,8 +1194,6 @@ float128 int32_to_float128( int32 a STAT
 
 }
 
-#endif
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 64-bit two's complement integer `a'
 | to the single-precision floating-point format.  The conversion is performed
@@ -1204,7 +1258,7 @@ float64 int64_to_float64( int64 a STATUS
     flag zSign;
 
     if ( a == 0 ) return float64_zero;
-    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
+    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
         return packFloat64( 1, 0x43E, 0 );
     }
     zSign = ( a < 0 );
@@ -1219,8 +1273,6 @@ float64 uint64_to_float64( uint64 a STAT
 
 }
 
-#ifdef FLOATX80
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 64-bit two's complement integer `a'
 | to the extended double-precision floating-point format.  The conversion
@@ -1242,10 +1294,6 @@ floatx80 int64_to_floatx80( int64 a STAT
 
 }
 
-#endif
-
-#ifdef FLOAT128
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 64-bit two's complement integer `a' to
 | the quadruple-precision floating-point format.  The conversion is performed
@@ -1258,7 +1306,7 @@ float128 int64_to_float128( int64 a STAT
     uint64 absA;
     int8 shiftCount;
     int32 zExp;
-    bits64 zSig0, zSig1;
+    uint64_t zSig0, zSig1;
 
     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
     zSign = ( a < 0 );
@@ -1279,8 +1327,6 @@ float128 int64_to_float128( int64 a STAT
 
 }
 
-#endif
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point value
 | `a' to the 32-bit two's complement integer format.  The conversion is
@@ -1295,9 +1341,10 @@ int32 float32_to_int32( float32 a STATUS
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
-    bits64 aSig64;
+    uint32_t aSig;
+    uint64_t aSig64;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
@@ -1325,8 +1372,9 @@ int32 float32_to_int32_round_to_zero( fl
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
+    uint32_t aSig;
     int32 z;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1337,7 +1385,7 @@ int32 float32_to_int32_round_to_zero( fl
             float_raise( float_flag_invalid STATUS_VAR);
             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
         }
-        return (sbits32) 0x80000000;
+        return (int32_t) 0x80000000;
     }
     else if ( aExp <= 0x7E ) {
         if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -1345,7 +1393,7 @@ int32 float32_to_int32_round_to_zero( fl
     }
     aSig = ( aSig | 0x00800000 )<<8;
     z = aSig>>( - shiftCount );
-    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
     }
     if ( aSign ) z = - z;
@@ -1355,6 +1403,55 @@ int32 float32_to_int32_round_to_zero( fl
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point value
+| `a' to the 16-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    uint32_t aSig;
+    int32 z;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = aExp - 0x8E;
+    if ( 0 <= shiftCount ) {
+        if ( float32_val(a) != 0xC7000000 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+                return 0x7FFF;
+            }
+        }
+        return (int32_t) 0xffff8000;
+    }
+    else if ( aExp <= 0x7E ) {
+        if ( aExp | aSig ) {
+            STATUS(float_exception_flags) |= float_flag_inexact;
+        }
+        return 0;
+    }
+    shiftCount -= 0x10;
+    aSig = ( aSig | 0x00800000 )<<8;
+    z = aSig>>( - shiftCount );
+    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    if ( aSign ) {
+        z = - z;
+    }
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
 | `a' to the 64-bit two's complement integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
 | Arithmetic---which means in particular that the conversion is rounded
@@ -1367,8 +1464,9 @@ int64 float32_to_int64( float32 a STATUS
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
-    bits64 aSig64, aSigExtra;
+    uint32_t aSig;
+    uint64_t aSig64, aSigExtra;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1379,7 +1477,7 @@ int64 float32_to_int64( float32 a STATUS
         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
             return LIT64( 0x7FFFFFFFFFFFFFFF );
         }
-        return (sbits64) LIT64( 0x8000000000000000 );
+        return (int64_t) LIT64( 0x8000000000000000 );
     }
     if ( aExp ) aSig |= 0x00800000;
     aSig64 = aSig;
@@ -1403,9 +1501,10 @@ int64 float32_to_int64_round_to_zero( fl
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
-    bits64 aSig64;
+    uint32_t aSig;
+    uint64_t aSig64;
     int64 z;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1418,7 +1517,7 @@ int64 float32_to_int64_round_to_zero( fl
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
         }
-        return (sbits64) LIT64( 0x8000000000000000 );
+        return (int64_t) LIT64( 0x8000000000000000 );
     }
     else if ( aExp <= 0x7E ) {
         if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -1427,7 +1526,7 @@ int64 float32_to_int64_round_to_zero( fl
     aSig64 = aSig | 0x00800000;
     aSig64 <<= 40;
     z = aSig64>>( - shiftCount );
-    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
+    if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
     }
     if ( aSign ) z = - z;
@@ -1446,13 +1545,14 @@ float64 float32_to_float64( float32 a ST
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
-        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
+        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat64( aSign, 0x7FF, 0 );
     }
     if ( aExp == 0 ) {
@@ -1460,12 +1560,10 @@ float64 float32_to_float64( float32 a ST
         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
         --aExp;
     }
-    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+    return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
 
 }
 
-#ifdef FLOATX80
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point value
 | `a' to the extended double-precision floating-point format.  The conversion
@@ -1477,13 +1575,14 @@ floatx80 float32_to_floatx80( float32 a 
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
-        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( aExp == 0 ) {
@@ -1491,14 +1590,10 @@ floatx80 float32_to_floatx80( float32 a 
         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
     }
     aSig |= 0x00800000;
-    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
 
 }
 
-#endif
-
-#ifdef FLOAT128
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point value
 | `a' to the double-precision floating-point format.  The conversion is
@@ -1510,13 +1605,14 @@ float128 float32_to_float128( float32 a 
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
-        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat128( aSign, 0x7FFF, 0, 0 );
     }
     if ( aExp == 0 ) {
@@ -1524,12 +1620,10 @@ float128 float32_to_float128( float32 a 
         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
         --aExp;
     }
-    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
 
 }
 
-#endif
-
 /*----------------------------------------------------------------------------
 | Rounds the single-precision floating-point value `a' to an integer, and
 | returns the result as a single-precision floating-point value.  The
@@ -1541,9 +1635,10 @@ float32 float32_round_to_int( float32 a 
 {
     flag aSign;
     int16 aExp;
-    bits32 lastBitMask, roundBitsMask;
+    uint32_t lastBitMask, roundBitsMask;
     int8 roundingMode;
-    bits32 z;
+    uint32_t z;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aExp = extractFloat32Exp( a );
     if ( 0x96 <= aExp ) {
@@ -1553,7 +1648,7 @@ float32 float32_round_to_int( float32 a 
         return a;
     }
     if ( aExp <= 0x7E ) {
-        if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
+        if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
         STATUS(float_exception_flags) |= float_flag_inexact;
         aSign = extractFloat32Sign( a );
         switch ( STATUS(float_rounding_mode) ) {
@@ -1600,7 +1695,7 @@ float32 float32_round_to_int( float32 a 
 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
 {
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig, zSig;
+    uint32_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat32Frac( a );
@@ -1644,7 +1739,12 @@ static float32 addFloat32Sigs( float32 a
             return a;
         }
         if ( aExp == 0 ) {
-            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
+            if (STATUS(flush_to_zero)) {
+                if (aSig | bSig) {
+                    float_raise(float_flag_output_denormal STATUS_VAR);
+                }
+                return packFloat32(zSign, 0, 0);
+            }
             return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
         }
         zSig = 0x40000000 + aSig + bSig;
@@ -1654,7 +1754,7 @@ static float32 addFloat32Sigs( float32 a
     aSig |= 0x20000000;
     zSig = ( aSig + bSig )<<1;
     --zExp;
-    if ( (sbits32) zSig < 0 ) {
+    if ( (int32_t) zSig < 0 ) {
         zSig = aSig + bSig;
         ++zExp;
     }
@@ -1674,7 +1774,7 @@ static float32 addFloat32Sigs( float32 a
 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
 {
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig, zSig;
+    uint32_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat32Frac( a );
@@ -1747,6 +1847,8 @@ static float32 subFloat32Sigs( float32 a
 float32 float32_add( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat32Sign( a );
     bSign = extractFloat32Sign( b );
@@ -1768,6 +1870,8 @@ float32 float32_add( float32 a, float32 
 float32 float32_sub( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat32Sign( a );
     bSign = extractFloat32Sign( b );
@@ -1790,9 +1894,12 @@ float32 float32_mul( float32 a, float32 
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig;
-    bits64 zSig64;
-    bits32 zSig;
+    uint32_t aSig, bSig;
+    uint64_t zSig64;
+    uint32_t zSig;
+
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1830,9 +1937,9 @@ float32 float32_mul( float32 a, float32 
     zExp = aExp + bExp - 0x7F;
     aSig = ( aSig | 0x00800000 )<<7;
     bSig = ( bSig | 0x00800000 )<<8;
-    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+    shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
     zSig = zSig64;
-    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+    if ( 0 <= (int32_t) ( zSig<<1 ) ) {
         zSig <<= 1;
         --zExp;
     }
@@ -1850,7 +1957,9 @@ float32 float32_div( float32 a, float32 
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig, zSig;
+    uint32_t aSig, bSig, zSig;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1894,9 +2003,9 @@ float32 float32_div( float32 a, float32 
         aSig >>= 1;
         ++zExp;
     }
-    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+    zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
     if ( ( zSig & 0x3F ) == 0 ) {
-        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
+        zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
     }
     return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
 
@@ -1912,11 +2021,13 @@ float32 float32_rem( float32 a, float32 
 {
     flag aSign, zSign;
     int16 aExp, bExp, expDiff;
-    bits32 aSig, bSig;
-    bits32 q;
-    bits64 aSig64, bSig64, q64;
-    bits32 alternateASig;
-    sbits32 sigMean;
+    uint32_t aSig, bSig;
+    uint32_t q;
+    uint64_t aSig64, bSig64, q64;
+    uint32_t alternateASig;
+    int32_t sigMean;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1958,7 +2069,7 @@ float32 float32_rem( float32 a, float32 
         q = ( bSig <= aSig );
         if ( q ) aSig -= bSig;
         if ( 0 < expDiff ) {
-            q = ( ( (bits64) aSig )<<32 ) / bSig;
+            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
             q >>= 32 - expDiff;
             bSig >>= 2;
             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
@@ -1970,8 +2081,8 @@ float32 float32_rem( float32 a, float32 
     }
     else {
         if ( bSig <= aSig ) aSig -= bSig;
-        aSig64 = ( (bits64) aSig )<<40;
-        bSig64 = ( (bits64) bSig )<<40;
+        aSig64 = ( (uint64_t) aSig )<<40;
+        bSig64 = ( (uint64_t) bSig )<<40;
         expDiff -= 64;
         while ( 0 < expDiff ) {
             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
@@ -1990,12 +2101,12 @@ float32 float32_rem( float32 a, float32 
         alternateASig = aSig;
         ++q;
         aSig -= bSig;
-    } while ( 0 <= (sbits32) aSig );
+    } while ( 0 <= (int32_t) aSig );
     sigMean = aSig + alternateASig;
     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
         aSig = alternateASig;
     }
-    zSign = ( (sbits32) aSig < 0 );
+    zSign = ( (int32_t) aSig < 0 );
     if ( zSign ) aSig = - aSig;
     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
 
@@ -2011,8 +2122,9 @@ float32 float32_sqrt( float32 a STATUS_P
 {
     flag aSign;
     int16 aExp, zExp;
-    bits32 aSig, zSig;
-    bits64 rem, term;
+    uint32_t aSig, zSig;
+    uint64_t rem, term;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -2041,11 +2153,11 @@ float32 float32_sqrt( float32 a STATUS_P
             goto roundAndPack;
         }
         aSig >>= aExp & 1;
-        term = ( (bits64) zSig ) * zSig;
-        rem = ( ( (bits64) aSig )<<32 ) - term;
-        while ( (sbits64) rem < 0 ) {
+        term = ( (uint64_t) zSig ) * zSig;
+        rem = ( ( (uint64_t) aSig )<<32 ) - term;
+        while ( (int64_t) rem < 0 ) {
             --zSig;
-            rem += ( ( (bits64) zSig )<<1 ) | 1;
+            rem += ( ( (uint64_t) zSig )<<1 ) | 1;
         }
         zSig |= ( rem != 0 );
     }
@@ -2075,30 +2187,31 @@ float32 float32_sqrt( float32 a STATUS_P
 
 static const float64 float32_exp2_coefficients[15] =
 {
-    make_float64( 0x3ff0000000000000ll ), /*  1 */
-    make_float64( 0x3fe0000000000000ll ), /*  2 */
-    make_float64( 0x3fc5555555555555ll ), /*  3 */
-    make_float64( 0x3fa5555555555555ll ), /*  4 */
-    make_float64( 0x3f81111111111111ll ), /*  5 */
-    make_float64( 0x3f56c16c16c16c17ll ), /*  6 */
-    make_float64( 0x3f2a01a01a01a01all ), /*  7 */
-    make_float64( 0x3efa01a01a01a01all ), /*  8 */
-    make_float64( 0x3ec71de3a556c734ll ), /*  9 */
-    make_float64( 0x3e927e4fb7789f5cll ), /* 10 */
-    make_float64( 0x3e5ae64567f544e4ll ), /* 11 */
-    make_float64( 0x3e21eed8eff8d898ll ), /* 12 */
-    make_float64( 0x3de6124613a86d09ll ), /* 13 */
-    make_float64( 0x3da93974a8c07c9dll ), /* 14 */
-    make_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
+    const_float64( 0x3ff0000000000000ll ), /*  1 */
+    const_float64( 0x3fe0000000000000ll ), /*  2 */
+    const_float64( 0x3fc5555555555555ll ), /*  3 */
+    const_float64( 0x3fa5555555555555ll ), /*  4 */
+    const_float64( 0x3f81111111111111ll ), /*  5 */
+    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
+    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
+    const_float64( 0x3efa01a01a01a01all ), /*  8 */
+    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
+    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
+    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
+    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
+    const_float64( 0x3de6124613a86d09ll ), /* 13 */
+    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
+    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
 };
 
 float32 float32_exp2( float32 a STATUS_PARAM )
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
     float64 r, x, xn;
     int i;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -2143,8 +2256,9 @@ float32 float32_log2( float32 a STATUS_P
 {
     flag aSign, zSign;
     int16 aExp;
-    bits32 aSig, zSig, i;
+    uint32_t aSig, zSig, i;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
@@ -2168,7 +2282,7 @@ float32 float32_log2( float32 a STATUS_P
     zSig = aExp << 23;
 
     for (i = 1 << 22; i > 0; i >>= 1) {
-        aSig = ( (bits64)aSig * aSig ) >> 23;
+        aSig = ( (uint64_t)aSig * aSig ) >> 23;
         if ( aSig & 0x01000000 ) {
             aSig >>= 1;
             zSig |= i;
@@ -2183,37 +2297,41 @@ float32 float32_log2( float32 a STATUS_P
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float32_eq( float32 a, float32 b STATUS_PARAM )
 {
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        ) {
-        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid STATUS_VAR);
-        }
+        float_raise( float_flag_invalid STATUS_VAR);
         return 0;
     }
-    return ( float32_val(a) == float32_val(b) ) ||
-            ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
-
+    av = float32_val(a);
+    bv = float32_val(b);
+    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise.  The comparison
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
+| or equal to the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float32_le( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits32 av, bv;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2225,21 +2343,24 @@ int float32_le( float32 a, float32 b STA
     bSign = extractFloat32Sign( b );
     av = float32_val(a);
     bv = float32_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
     return ( av == bv ) || ( aSign ^ ( av < bv ) );
 
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float32_lt( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits32 av, bv;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2251,32 +2372,54 @@ int float32_lt( float32 a, float32 b STA
     bSign = extractFloat32Sign( b );
     av = float32_val(a);
     bv = float32_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
+    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
     return ( av != bv ) && ( aSign ^ ( av < bv ) );
 
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns 1 if the single-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  The invalid exception is raised if either
+| operand is a NaN.  The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
+int float32_unordered( float32 a, float32 b STATUS_PARAM )
 {
-    bits32 av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        ) {
         float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
+        return 1;
     }
-    av = float32_val(a);
-    bv = float32_val(b);
-    return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+    return 0;
+}
 
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
+{
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 0;
+    }
+    return ( float32_val(a) == float32_val(b) ) ||
+            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
 }
 
 /*----------------------------------------------------------------------------
@@ -2289,7 +2432,9 @@ int float32_eq_signaling( float32 a, flo
 int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits32 av, bv;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2303,7 +2448,7 @@ int float32_le_quiet( float32 a, float32
     bSign = extractFloat32Sign( b );
     av = float32_val(a);
     bv = float32_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
     return ( av == bv ) || ( aSign ^ ( av < bv ) );
 
 }
@@ -2318,7 +2463,9 @@ int float32_le_quiet( float32 a, float32
 int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits32 av, bv;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2332,12 +2479,35 @@ int float32_lt_quiet( float32 a, float32
     bSign = extractFloat32Sign( b );
     av = float32_val(a);
     bv = float32_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
+    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
     return ( av != bv ) && ( aSign ^ ( av < bv ) );
 
 }
 
 /*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
+{
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
 | `a' to the 32-bit two's complement integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
@@ -2351,7 +2521,8 @@ int32 float64_to_int32( float64 a STATUS
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2378,8 +2549,9 @@ int32 float64_to_int32_round_to_zero( fl
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig, savedASig;
+    uint64_t aSig, savedASig;
     int32 z;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2401,7 +2573,7 @@ int32 float64_to_int32_round_to_zero( fl
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
         float_raise( float_flag_invalid STATUS_VAR);
-        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
@@ -2412,6 +2584,57 @@ int32 float64_to_int32_round_to_zero( fl
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
+| `a' to the 16-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    uint64_t aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( 0x40E < aExp ) {
+        if ( ( aExp == 0x7FF ) && aSig ) {
+            aSign = 0;
+        }
+        goto invalid;
+    }
+    else if ( aExp < 0x3FF ) {
+        if ( aExp || aSig ) {
+            STATUS(float_exception_flags) |= float_flag_inexact;
+        }
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x433 - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) {
+        z = - z;
+    }
+    if ( ( (int16_t)z < 0 ) ^ aSign ) {
+ invalid:
+        float_raise( float_flag_invalid STATUS_VAR);
+        return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
 | `a' to the 64-bit two's complement integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
 | Arithmetic---which means in particular that the conversion is rounded
@@ -2424,7 +2647,8 @@ int64 float64_to_int64( float64 a STATUS
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig, aSigExtra;
+    uint64_t aSig, aSigExtra;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2440,7 +2664,7 @@ int64 float64_to_int64( float64 a STATUS
                ) {
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         aSigExtra = 0;
         aSig <<= - shiftCount;
@@ -2466,8 +2690,9 @@ int64 float64_to_int64_round_to_zero( fl
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
     int64 z;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2485,7 +2710,7 @@ int64 float64_to_int64_round_to_zero( fl
                     return LIT64( 0x7FFFFFFFFFFFFFFF );
                 }
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         z = aSig<<shiftCount;
     }
@@ -2495,7 +2720,7 @@ int64 float64_to_int64_round_to_zero( fl
             return 0;
         }
         z = aSig>>( - shiftCount );
-        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+        if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
             STATUS(float_exception_flags) |= float_flag_inexact;
         }
     }
@@ -2515,14 +2740,15 @@ float32 float64_to_float32( float64 a ST
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig;
-    bits32 zSig;
+    uint64_t aSig;
+    uint32_t zSig;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
     if ( aExp == 0x7FF ) {
-        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat32( aSign, 0xFF, 0 );
     }
     shift64RightJamming( aSig, 22, &aSig );
@@ -2546,29 +2772,28 @@ float32 float64_to_float32( float64 a ST
 | than the desired result exponent whenever `zSig' is a complete, normalized
 | significand.
 *----------------------------------------------------------------------------*/
-static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
+static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
 {
-    return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
+    return make_float16(
+        (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
 }
 
 /* Half precision floats come in two formats: standard IEEE and "ARM" format.
    The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
 
-float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
+float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
 
-    aSign = a >> 15;
-    aExp = (a >> 10) & 0x1f;
-    aSig = a & 0x3ff;
+    aSign = extractFloat16Sign(a);
+    aExp = extractFloat16Exp(a);
+    aSig = extractFloat16Frac(a);
 
     if (aExp == 0x1f && ieee) {
         if (aSig) {
-            /* Make sure correct exceptions are raised.  */
-            float32ToCommonNaN(a STATUS_VAR);
-            aSig |= 0x200;
+            return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
         }
         return packFloat32(aSign, 0xff, aSig << 13);
     }
@@ -2586,38 +2811,45 @@ float32 float16_to_float32( bits16 a, fl
     return packFloat32( aSign, aExp + 0x70, aSig << 13);
 }
 
-bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
+float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
-    bits32 mask;
-    bits32 increment;
+    uint32_t aSig;
+    uint32_t mask;
+    uint32_t increment;
     int8 roundingMode;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
         if (aSig) {
-            /* Make sure correct exceptions are raised.  */
-            float32ToCommonNaN(a STATUS_VAR);
-            aSig |= 0x00400000;
+            /* Input is a NaN */
+            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+            if (!ieee) {
+                return packFloat16(aSign, 0, 0);
+            }
+            return r;
+        }
+        /* Infinity */
+        if (!ieee) {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return packFloat16(aSign, 0x1f, 0x3ff);
         }
-        return packFloat16(aSign, 0x1f, aSig >> 13);
+        return packFloat16(aSign, 0x1f, 0);
     }
-    if (aExp == 0 && aSign == 0) {
+    if (aExp == 0 && aSig == 0) {
         return packFloat16(aSign, 0, 0);
     }
     /* Decimal point between bits 22 and 23.  */
     aSig |= 0x00800000;
     aExp -= 0x7f;
     if (aExp < -14) {
-        mask = 0x007fffff;
-        if (aExp < -24) {
-            aExp = -25;
-        } else {
-            mask >>= 24 + aExp;
+        mask = 0x00ffffff;
+        if (aExp >= -24) {
+            mask >>= 25 + aExp;
         }
     } else {
         mask = 0x00001fff;
@@ -2659,7 +2891,7 @@ bits16 float32_to_float16( float32 a, fl
         }
     } else {
         if (aExp > 16) {
-            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
             return packFloat16(aSign, 0x1f, 0x3ff);
         }
     }
@@ -2673,8 +2905,6 @@ bits16 float32_to_float16( float32 a, fl
     return packFloat16(aSign, aExp + 14, aSig >> 13);
 }
 
-#ifdef FLOATX80
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
 | `a' to the extended double-precision floating-point format.  The conversion
@@ -2686,13 +2916,14 @@ floatx80 float64_to_floatx80( float64 a 
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig;
+    uint64_t aSig;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
     if ( aExp == 0x7FF ) {
-        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( aExp == 0 ) {
@@ -2705,10 +2936,6 @@ floatx80 float64_to_floatx80( float64 a 
 
 }
 
-#endif
-
-#ifdef FLOAT128
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
 | `a' to the quadruple-precision floating-point format.  The conversion is
@@ -2720,13 +2947,14 @@ float128 float64_to_float128( float64 a 
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig, zSig0, zSig1;
+    uint64_t aSig, zSig0, zSig1;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
     if ( aExp == 0x7FF ) {
-        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat128( aSign, 0x7FFF, 0, 0 );
     }
     if ( aExp == 0 ) {
@@ -2739,8 +2967,6 @@ float128 float64_to_float128( float64 a 
 
 }
 
-#endif
-
 /*----------------------------------------------------------------------------
 | Rounds the double-precision floating-point value `a' to an integer, and
 | returns the result as a double-precision floating-point value.  The
@@ -2752,9 +2978,10 @@ float64 float64_round_to_int( float64 a 
 {
     flag aSign;
     int16 aExp;
-    bits64 lastBitMask, roundBitsMask;
+    uint64_t lastBitMask, roundBitsMask;
     int8 roundingMode;
-    bits64 z;
+    uint64_t z;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aExp = extractFloat64Exp( a );
     if ( 0x433 <= aExp ) {
@@ -2764,7 +2991,7 @@ float64 float64_round_to_int( float64 a 
         return a;
     }
     if ( aExp < 0x3FF ) {
-        if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
+        if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
         STATUS(float_exception_flags) |= float_flag_inexact;
         aSign = extractFloat64Sign( a );
         switch ( STATUS(float_rounding_mode) ) {
@@ -2824,7 +3051,7 @@ float64 float64_trunc_to_int( float64 a 
 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
 {
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig;
+    uint64_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat64Frac( a );
@@ -2868,7 +3095,12 @@ static float64 addFloat64Sigs( float64 a
             return a;
         }
         if ( aExp == 0 ) {
-            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
+            if (STATUS(flush_to_zero)) {
+                if (aSig | bSig) {
+                    float_raise(float_flag_output_denormal STATUS_VAR);
+                }
+                return packFloat64(zSign, 0, 0);
+            }
             return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
         }
         zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
@@ -2878,7 +3110,7 @@ static float64 addFloat64Sigs( float64 a
     aSig |= LIT64( 0x2000000000000000 );
     zSig = ( aSig + bSig )<<1;
     --zExp;
-    if ( (sbits64) zSig < 0 ) {
+    if ( (int64_t) zSig < 0 ) {
         zSig = aSig + bSig;
         ++zExp;
     }
@@ -2898,7 +3130,7 @@ static float64 addFloat64Sigs( float64 a
 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
 {
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig;
+    uint64_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat64Frac( a );
@@ -2971,6 +3203,8 @@ static float64 subFloat64Sigs( float64 a
 float64 float64_add( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat64Sign( a );
     bSign = extractFloat64Sign( b );
@@ -2992,6 +3226,8 @@ float64 float64_add( float64 a, float64 
 float64 float64_sub( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat64Sign( a );
     bSign = extractFloat64Sign( b );
@@ -3014,7 +3250,10 @@ float64 float64_mul( float64 a, float64 
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
+
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3054,7 +3293,7 @@ float64 float64_mul( float64 a, float64 
     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
     mul64To128( aSig, bSig, &zSig0, &zSig1 );
     zSig0 |= ( zSig1 != 0 );
-    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+    if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
         zSig0 <<= 1;
         --zExp;
     }
@@ -3072,9 +3311,11 @@ float64 float64_div( float64 a, float64 
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig;
-    bits64 rem0, rem1;
-    bits64 term0, term1;
+    uint64_t aSig, bSig, zSig;
+    uint64_t rem0, rem1;
+    uint64_t term0, term1;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3122,7 +3363,7 @@ float64 float64_div( float64 a, float64 
     if ( ( zSig & 0x1FF ) <= 2 ) {
         mul64To128( bSig, zSig, &term0, &term1 );
         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
-        while ( (sbits64) rem0 < 0 ) {
+        while ( (int64_t) rem0 < 0 ) {
             --zSig;
             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
         }
@@ -3142,10 +3383,12 @@ float64 float64_rem( float64 a, float64 
 {
     flag aSign, zSign;
     int16 aExp, bExp, expDiff;
-    bits64 aSig, bSig;
-    bits64 q, alternateASig;
-    sbits64 sigMean;
+    uint64_t aSig, bSig;
+    uint64_t q, alternateASig;
+    int64_t sigMean;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
@@ -3205,12 +3448,12 @@ float64 float64_rem( float64 a, float64 
         alternateASig = aSig;
         ++q;
         aSig -= bSig;
-    } while ( 0 <= (sbits64) aSig );
+    } while ( 0 <= (int64_t) aSig );
     sigMean = aSig + alternateASig;
     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
         aSig = alternateASig;
     }
-    zSign = ( (sbits64) aSig < 0 );
+    zSign = ( (int64_t) aSig < 0 );
     if ( zSign ) aSig = - aSig;
     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
 
@@ -3226,8 +3469,9 @@ float64 float64_sqrt( float64 a STATUS_P
 {
     flag aSign;
     int16 aExp, zExp;
-    bits64 aSig, zSig, doubleZSig;
-    bits64 rem0, rem1, term0, term1;
+    uint64_t aSig, zSig, doubleZSig;
+    uint64_t rem0, rem1, term0, term1;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3256,7 +3500,7 @@ float64 float64_sqrt( float64 a STATUS_P
         doubleZSig = zSig<<1;
         mul64To128( zSig, zSig, &term0, &term1 );
         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
-        while ( (sbits64) rem0 < 0 ) {
+        while ( (int64_t) rem0 < 0 ) {
             --zSig;
             doubleZSig -= 2;
             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
@@ -3276,7 +3520,8 @@ float64 float64_log2( float64 a STATUS_P
 {
     flag aSign, zSign;
     int16 aExp;
-    bits64 aSig, aSig0, aSig1, zSig, i;
+    uint64_t aSig, aSig0, aSig1, zSig, i;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3298,7 +3543,7 @@ float64 float64_log2( float64 a STATUS_P
     aExp -= 0x3FF;
     aSig |= LIT64( 0x0010000000000000 );
     zSign = aExp < 0;
-    zSig = (bits64)aExp << 52;
+    zSig = (uint64_t)aExp << 52;
     for (i = 1LL << 51; i > 0; i >>= 1) {
         mul64To128( aSig, aSig, &aSig0, &aSig1 );
         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
@@ -3315,39 +3560,42 @@ float64 float64_log2( float64 a STATUS_P
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise.  The comparison is performed
+| corresponding value `b', and 0 otherwise.  The invalid exception is raised
+| if either operand is a NaN.  Otherwise, the comparison is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float64_eq( float64 a, float64 b STATUS_PARAM )
 {
-    bits64 av, bv;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        ) {
-        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid STATUS_VAR);
-        }
+        float_raise( float_flag_invalid STATUS_VAR);
         return 0;
     }
     av = float64_val(a);
     bv = float64_val(b);
-    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
 
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise.  The comparison is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
+| equal to the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float64_le( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits64 av, bv;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3359,22 +3607,25 @@ int float64_le( float64 a, float64 b STA
     bSign = extractFloat64Sign( b );
     av = float64_val(a);
     bv = float64_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
     return ( av == bv ) || ( aSign ^ ( av < bv ) );
 
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float64_lt( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits64 av, bv;
+    uint64_t av, bv;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        ) {
@@ -3385,31 +3636,56 @@ int float64_lt( float64 a, float64 b STA
     bSign = extractFloat64Sign( b );
     av = float64_val(a);
     bv = float64_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
+    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
     return ( av != bv ) && ( aSign ^ ( av < bv ) );
 
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise.  The invalid exception is raised
-| if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns 1 if the double-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  The invalid exception is raised if either
+| operand is a NaN.  The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
+int float64_unordered( float64 a, float64 b STATUS_PARAM )
 {
-    bits64 av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        ) {
         float_raise( float_flag_invalid STATUS_VAR);
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
+{
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
         return 0;
     }
     av = float64_val(a);
     bv = float64_val(b);
-    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
 
 }
 
@@ -3423,7 +3699,9 @@ int float64_eq_signaling( float64 a, flo
 int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits64 av, bv;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3437,7 +3715,7 @@ int float64_le_quiet( float64 a, float64
     bSign = extractFloat64Sign( b );
     av = float64_val(a);
     bv = float64_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
     return ( av == bv ) || ( aSign ^ ( av < bv ) );
 
 }
@@ -3452,7 +3730,9 @@ int float64_le_quiet( float64 a, float64
 int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits64 av, bv;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3466,12 +3746,33 @@ int float64_lt_quiet( float64 a, float64
     bSign = extractFloat64Sign( b );
     av = float64_val(a);
     bv = float64_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
+    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
     return ( av != bv ) && ( aSign ^ ( av < bv ) );
 
 }
 
-#ifdef FLOATX80
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
+{
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the extended double-precision floating-
@@ -3487,12 +3788,12 @@ int32 floatx80_to_int32( floatx80 a STAT
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
-    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
     shiftCount = 0x4037 - aExp;
     if ( shiftCount <= 0 ) shiftCount = 1;
     shift64RightJamming( aSig, shiftCount, &aSig );
@@ -3514,14 +3815,14 @@ int32 floatx80_to_int32_round_to_zero( f
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig, savedASig;
+    uint64_t aSig, savedASig;
     int32 z;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( 0x401E < aExp ) {
-        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
         goto invalid;
     }
     else if ( aExp < 0x3FFF ) {
@@ -3536,7 +3837,7 @@ int32 floatx80_to_int32_round_to_zero( f
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
         float_raise( float_flag_invalid STATUS_VAR);
-        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
@@ -3559,7 +3860,7 @@ int64 floatx80_to_int64( floatx80 a STAT
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig, aSigExtra;
+    uint64_t aSig, aSigExtra;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
@@ -3574,7 +3875,7 @@ int64 floatx80_to_int64( floatx80 a STAT
                ) {
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         aSigExtra = 0;
     }
@@ -3599,7 +3900,7 @@ int64 floatx80_to_int64_round_to_zero( f
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
     int64 z;
 
     aSig = extractFloatx80Frac( a );
@@ -3614,14 +3915,14 @@ int64 floatx80_to_int64_round_to_zero( f
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
         }
-        return (sbits64) LIT64( 0x8000000000000000 );
+        return (int64_t) LIT64( 0x8000000000000000 );
     }
     else if ( aExp < 0x3FFF ) {
         if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
         return 0;
     }
     z = aSig>>( - shiftCount );
-    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
     }
     if ( aSign ) z = - z;
@@ -3640,14 +3941,14 @@ float32 floatx80_to_float32( floatx80 a 
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig;
+    uint64_t aSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) {
-            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
+        if ( (uint64_t) ( aSig<<1 ) ) {
+            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat32( aSign, 0xFF, 0 );
     }
@@ -3668,14 +3969,14 @@ float64 floatx80_to_float64( floatx80 a 
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig, zSig;
+    uint64_t aSig, zSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) {
-            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
+        if ( (uint64_t) ( aSig<<1 ) ) {
+            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat64( aSign, 0x7FF, 0 );
     }
@@ -3685,8 +3986,6 @@ float64 floatx80_to_float64( floatx80 a 
 
 }
 
-#ifdef FLOAT128
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the extended double-precision floating-
 | point value `a' to the quadruple-precision floating-point format.  The
@@ -3698,21 +3997,19 @@ float128 floatx80_to_float128( floatx80 
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig, zSig0, zSig1;
+    uint64_t aSig, zSig0, zSig1;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
-    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
-        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
+    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
+        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
     }
     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
     return packFloat128( aSign, aExp, zSig0, zSig1 );
 
 }
 
-#endif
-
 /*----------------------------------------------------------------------------
 | Rounds the extended double-precision floating-point value `a' to an integer,
 | and returns the result as an extended quadruple-precision floating-point
@@ -3724,27 +4021,27 @@ floatx80 floatx80_round_to_int( floatx80
 {
     flag aSign;
     int32 aExp;
-    bits64 lastBitMask, roundBitsMask;
+    uint64_t lastBitMask, roundBitsMask;
     int8 roundingMode;
     floatx80 z;
 
     aExp = extractFloatx80Exp( a );
     if ( 0x403E <= aExp ) {
-        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
             return propagateFloatx80NaN( a, a STATUS_VAR );
         }
         return a;
     }
     if ( aExp < 0x3FFF ) {
         if (    ( aExp == 0 )
-             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
             return a;
         }
         STATUS(float_exception_flags) |= float_flag_inexact;
         aSign = extractFloatx80Sign( a );
         switch ( STATUS(float_rounding_mode) ) {
          case float_round_nearest_even:
-            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
                ) {
                 return
                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
@@ -3797,7 +4094,7 @@ floatx80 floatx80_round_to_int( floatx80
 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
     int32 expDiff;
 
     aSig = extractFloatx80Frac( a );
@@ -3807,7 +4104,7 @@ static floatx80 addFloatx80Sigs( floatx8
     expDiff = aExp - bExp;
     if ( 0 < expDiff ) {
         if ( aExp == 0x7FFF ) {
-            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
             return a;
         }
         if ( bExp == 0 ) --expDiff;
@@ -3816,7 +4113,7 @@ static floatx80 addFloatx80Sigs( floatx8
     }
     else if ( expDiff < 0 ) {
         if ( bExp == 0x7FFF ) {
-            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
         }
         if ( aExp == 0 ) ++expDiff;
@@ -3825,7 +4122,7 @@ static floatx80 addFloatx80Sigs( floatx8
     }
     else {
         if ( aExp == 0x7FFF ) {
-            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
                 return propagateFloatx80NaN( a, b STATUS_VAR );
             }
             return a;
@@ -3840,7 +4137,7 @@ static floatx80 addFloatx80Sigs( floatx8
         goto shiftRight1;
     }
     zSig0 = aSig + bSig;
-    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
  shiftRight1:
     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
     zSig0 |= LIT64( 0x8000000000000000 );
@@ -3863,7 +4160,7 @@ static floatx80 addFloatx80Sigs( floatx8
 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
     int32 expDiff;
     floatx80 z;
 
@@ -3875,7 +4172,7 @@ static floatx80 subFloatx80Sigs( floatx8
     if ( 0 < expDiff ) goto aExpBigger;
     if ( expDiff < 0 ) goto bExpBigger;
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
             return propagateFloatx80NaN( a, b STATUS_VAR );
         }
         float_raise( float_flag_invalid STATUS_VAR);
@@ -3893,7 +4190,7 @@ static floatx80 subFloatx80Sigs( floatx8
     return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
  bExpBigger:
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( aExp == 0 ) ++expDiff;
@@ -3905,7 +4202,7 @@ static floatx80 subFloatx80Sigs( floatx8
     goto normalizeRoundAndPack;
  aExpBigger:
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return a;
     }
     if ( bExp == 0 ) --expDiff;
@@ -3972,7 +4269,7 @@ floatx80 floatx80_mul( floatx80 a, float
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
     floatx80 z;
 
     aSig = extractFloatx80Frac( a );
@@ -3983,15 +4280,15 @@ floatx80 floatx80_mul( floatx80 a, float
     bSign = extractFloatx80Sign( b );
     zSign = aSign ^ bSign;
     if ( aExp == 0x7FFF ) {
-        if (    (bits64) ( aSig<<1 )
-             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+        if (    (uint64_t) ( aSig<<1 )
+             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
             return propagateFloatx80NaN( a, b STATUS_VAR );
         }
         if ( ( bExp | bSig ) == 0 ) goto invalid;
         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         if ( ( aExp | aSig ) == 0 ) {
  invalid:
             float_raise( float_flag_invalid STATUS_VAR);
@@ -4011,7 +4308,7 @@ floatx80 floatx80_mul( floatx80 a, float
     }
     zExp = aExp + bExp - 0x3FFE;
     mul64To128( aSig, bSig, &zSig0, &zSig1 );
-    if ( 0 < (sbits64) zSig0 ) {
+    if ( 0 < (int64_t) zSig0 ) {
         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
         --zExp;
     }
@@ -4031,8 +4328,8 @@ floatx80 floatx80_div( floatx80 a, float
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
-    bits64 rem0, rem1, rem2, term0, term1, term2;
+    uint64_t aSig, bSig, zSig0, zSig1;
+    uint64_t rem0, rem1, rem2, term0, term1, term2;
     floatx80 z;
 
     aSig = extractFloatx80Frac( a );
@@ -4043,15 +4340,15 @@ floatx80 floatx80_div( floatx80 a, float
     bSign = extractFloatx80Sign( b );
     zSign = aSign ^ bSign;
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         if ( bExp == 0x7FFF ) {
-            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
             goto invalid;
         }
         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return packFloatx80( zSign, 0, 0 );
     }
     if ( bExp == 0 ) {
@@ -4081,15 +4378,15 @@ floatx80 floatx80_div( floatx80 a, float
     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
     mul64To128( bSig, zSig0, &term0, &term1 );
     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
     }
     zSig1 = estimateDiv128To64( rem1, 0, bSig );
-    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
         mul64To128( bSig, zSig1, &term1, &term2 );
         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
         }
@@ -4111,8 +4408,8 @@ floatx80 floatx80_rem( floatx80 a, float
 {
     flag aSign, zSign;
     int32 aExp, bExp, expDiff;
-    bits64 aSig0, aSig1, bSig;
-    bits64 q, term0, term1, alternateASig0, alternateASig1;
+    uint64_t aSig0, aSig1, bSig;
+    uint64_t q, term0, term1, alternateASig0, alternateASig1;
     floatx80 z;
 
     aSig0 = extractFloatx80Frac( a );
@@ -4121,14 +4418,14 @@ floatx80 floatx80_rem( floatx80 a, float
     bSig = extractFloatx80Frac( b );
     bExp = extractFloatx80Exp( b );
     if ( aExp == 0x7FFF ) {
-        if (    (bits64) ( aSig0<<1 )
-             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+        if (    (uint64_t) ( aSig0<<1 )
+             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
             return propagateFloatx80NaN( a, b STATUS_VAR );
         }
         goto invalid;
     }
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return a;
     }
     if ( bExp == 0 ) {
@@ -4142,7 +4439,7 @@ floatx80 floatx80_rem( floatx80 a, float
         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
     }
     if ( aExp == 0 ) {
-        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
     }
     bSig |= LIT64( 0x8000000000000000 );
@@ -4207,15 +4504,15 @@ floatx80 floatx80_sqrt( floatx80 a STATU
 {
     flag aSign;
     int32 aExp, zExp;
-    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
-    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
     floatx80 z;
 
     aSig0 = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
+        if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
         if ( ! aSign ) return a;
         goto invalid;
     }
@@ -4238,7 +4535,7 @@ floatx80 floatx80_sqrt( floatx80 a STATU
     doubleZSig0 = zSig0<<1;
     mul64To128( zSig0, zSig0, &term0, &term1 );
     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         doubleZSig0 -= 2;
         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
@@ -4250,7 +4547,7 @@ floatx80 floatx80_sqrt( floatx80 a STATU
         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
         mul64To128( zSig1, zSig1, &term2, &term3 );
         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
             term3 |= 1;
@@ -4268,31 +4565,28 @@ floatx80 floatx80_sqrt( floatx80 a STATU
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| equal to the corresponding value `b', and 0 otherwise.  The comparison is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
 {
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
-        if (    floatx80_is_signaling_nan( a )
-             || floatx80_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid STATUS_VAR);
-        }
+        float_raise( float_flag_invalid STATUS_VAR);
         return 0;
     }
     return
            ( a.low == b.low )
         && (    ( a.high == b.high )
              || (    ( a.low == 0 )
-                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
            );
 
 }
@@ -4300,8 +4594,9 @@ int floatx80_eq( floatx80 a, floatx80 b 
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is
 | less than or equal to the corresponding value `b', and 0 otherwise.  The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
+| invalid exception is raised if either operand is a NaN.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
@@ -4309,9 +4604,9 @@ int floatx80_le( floatx80 a, floatx80 b 
     flag aSign, bSign;
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
         float_raise( float_flag_invalid STATUS_VAR);
         return 0;
@@ -4321,7 +4616,7 @@ int floatx80_le( floatx80 a, floatx80 b 
     if ( aSign != bSign ) {
         return
                aSign
-            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  == 0 );
     }
     return
@@ -4332,9 +4627,9 @@ int floatx80_le( floatx80 a, floatx80 b 
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is
-| less than the corresponding value `b', and 0 otherwise.  The comparison
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
+| less than the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
@@ -4342,9 +4637,9 @@ int floatx80_lt( floatx80 a, floatx80 b 
     flag aSign, bSign;
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
         float_raise( float_flag_invalid STATUS_VAR);
         return 0;
@@ -4354,7 +4649,7 @@ int floatx80_lt( floatx80 a, floatx80 b 
     if ( aSign != bSign ) {
         return
                aSign
-            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  != 0 );
     }
     return
@@ -4364,28 +4659,50 @@ int floatx80_lt( floatx80 a, floatx80 b 
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is equal
-| to the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns 1 if the extended double-precision floating-point values `a' and `b'
+| cannot be compared, and 0 otherwise.  The invalid exception is raised if
+| either operand is a NaN.   The comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
+int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
 {
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
-        float_raise( float_flag_invalid STATUS_VAR);
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
         return 0;
     }
     return
            ( a.low == b.low )
         && (    ( a.high == b.high )
              || (    ( a.low == 0 )
-                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
            );
 
 }
@@ -4402,9 +4719,9 @@ int floatx80_le_quiet( floatx80 a, float
     flag aSign, bSign;
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
         if (    floatx80_is_signaling_nan( a )
              || floatx80_is_signaling_nan( b ) ) {
@@ -4417,7 +4734,7 @@ int floatx80_le_quiet( floatx80 a, float
     if ( aSign != bSign ) {
         return
                aSign
-            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  == 0 );
     }
     return
@@ -4438,9 +4755,9 @@ int floatx80_lt_quiet( floatx80 a, float
     flag aSign, bSign;
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
         if (    floatx80_is_signaling_nan( a )
              || floatx80_is_signaling_nan( b ) ) {
@@ -4453,7 +4770,7 @@ int floatx80_lt_quiet( floatx80 a, float
     if ( aSign != bSign ) {
         return
                aSign
-            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  != 0 );
     }
     return
@@ -4462,9 +4779,27 @@ int floatx80_lt_quiet( floatx80 a, float
 
 }
 
-#endif
-
-#ifdef FLOAT128
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point values `a' and `b'
+| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
+| The comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point
@@ -4480,7 +4815,7 @@ int32 float128_to_int32( float128 a STAT
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4509,7 +4844,7 @@ int32 float128_to_int32_round_to_zero( f
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1, savedASig;
+    uint64_t aSig0, aSig1, savedASig;
     int32 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -4534,7 +4869,7 @@ int32 float128_to_int32_round_to_zero( f
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
         float_raise( float_flag_invalid STATUS_VAR);
-        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig0<<shiftCount ) != savedASig ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
@@ -4557,7 +4892,7 @@ int64 float128_to_int64( float128 a STAT
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4575,7 +4910,7 @@ int64 float128_to_int64( float128 a STAT
                ) {
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
     }
@@ -4600,7 +4935,7 @@ int64 float128_to_int64_round_to_zero( f
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
     int64 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -4622,10 +4957,10 @@ int64 float128_to_int64_round_to_zero( f
                     return LIT64( 0x7FFFFFFFFFFFFFFF );
                 }
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
-        if ( (bits64) ( aSig1<<shiftCount ) ) {
+        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
             STATUS(float_exception_flags) |= float_flag_inexact;
         }
     }
@@ -4638,7 +4973,7 @@ int64 float128_to_int64_round_to_zero( f
         }
         z = aSig0>>( - shiftCount );
         if (    aSig1
-             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
             STATUS(float_exception_flags) |= float_flag_inexact;
         }
     }
@@ -4658,8 +4993,8 @@ float32 float128_to_float32( float128 a 
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig0, aSig1;
-    bits32 zSig;
+    uint64_t aSig0, aSig1;
+    uint32_t zSig;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4667,7 +5002,7 @@ float32 float128_to_float32( float128 a 
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
+            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat32( aSign, 0xFF, 0 );
     }
@@ -4693,7 +5028,7 @@ float64 float128_to_float64( float128 a 
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4701,7 +5036,7 @@ float64 float128_to_float64( float128 a 
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
+            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat64( aSign, 0x7FF, 0 );
     }
@@ -4715,8 +5050,6 @@ float64 float128_to_float64( float128 a 
 
 }
 
-#ifdef FLOATX80
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point
 | value `a' to the extended double-precision floating-point format.  The
@@ -4728,7 +5061,7 @@ floatx80 float128_to_floatx80( float128 
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4736,7 +5069,7 @@ floatx80 float128_to_floatx80( float128 
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( aSig0 | aSig1 ) {
-            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
+            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
@@ -4752,8 +5085,6 @@ floatx80 float128_to_floatx80( float128 
 
 }
 
-#endif
-
 /*----------------------------------------------------------------------------
 | Rounds the quadruple-precision floating-point value `a' to an integer, and
 | returns the result as a quadruple-precision floating-point value.  The
@@ -4765,7 +5096,7 @@ float128 float128_round_to_int( float128
 {
     flag aSign;
     int32 aExp;
-    bits64 lastBitMask, roundBitsMask;
+    uint64_t lastBitMask, roundBitsMask;
     int8 roundingMode;
     float128 z;
 
@@ -4790,9 +5121,9 @@ float128 float128_round_to_int( float128
                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
             }
             else {
-                if ( (sbits64) z.low < 0 ) {
+                if ( (int64_t) z.low < 0 ) {
                     ++z.high;
-                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
                 }
             }
         }
@@ -4806,7 +5137,7 @@ float128 float128_round_to_int( float128
     }
     else {
         if ( aExp < 0x3FFF ) {
-            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
             STATUS(float_exception_flags) |= float_flag_inexact;
             aSign = extractFloat128Sign( a );
             switch ( STATUS(float_rounding_mode) ) {
@@ -4868,7 +5199,7 @@ float128 float128_round_to_int( float128
 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
     int32 expDiff;
 
     aSig1 = extractFloat128Frac1( a );
@@ -4917,7 +5248,12 @@ static float128 addFloat128Sigs( float12
         }
         add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
         if ( aExp == 0 ) {
-            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
+            if (STATUS(flush_to_zero)) {
+                if (zSig0 | zSig1) {
+                    float_raise(float_flag_output_denormal STATUS_VAR);
+                }
+                return packFloat128(zSign, 0, 0, 0);
+            }
             return packFloat128( zSign, 0, zSig0, zSig1 );
         }
         zSig2 = 0;
@@ -4949,7 +5285,7 @@ static float128 addFloat128Sigs( float12
 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
     int32 expDiff;
     float128 z;
 
@@ -5074,7 +5410,7 @@ float128 float128_mul( float128 a, float
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5138,8 +5474,8 @@ float128 float128_div( float128 a, float
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
-    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5193,7 +5529,7 @@ float128 float128_div( float128 a, float
     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
     }
@@ -5201,7 +5537,7 @@ float128 float128_div( float128 a, float
     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
         }
@@ -5222,9 +5558,9 @@ float128 float128_rem( float128 a, float
 {
     flag aSign, zSign;
     int32 aExp, bExp, expDiff;
-    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
-    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
-    sbits64 sigMean0;
+    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
+    int64_t sigMean0;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5306,15 +5642,15 @@ float128 float128_rem( float128 a, float
         alternateASig1 = aSig1;
         ++q;
         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
-    } while ( 0 <= (sbits64) aSig0 );
+    } while ( 0 <= (int64_t) aSig0 );
     add128(
-        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
+        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
     if (    ( sigMean0 < 0 )
          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
         aSig0 = alternateASig0;
         aSig1 = alternateASig1;
     }
-    zSign = ( (sbits64) aSig0 < 0 );
+    zSign = ( (int64_t) aSig0 < 0 );
     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
     return
         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
@@ -5331,8 +5667,8 @@ float128 float128_sqrt( float128 a STATU
 {
     flag aSign;
     int32 aExp, zExp;
-    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
-    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5364,7 +5700,7 @@ float128 float128_sqrt( float128 a STATU
     doubleZSig0 = zSig0<<1;
     mul64To128( zSig0, zSig0, &term0, &term1 );
     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         doubleZSig0 -= 2;
         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
@@ -5376,7 +5712,7 @@ float128 float128_sqrt( float128 a STATU
         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
         mul64To128( zSig1, zSig1, &term2, &term3 );
         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
             term3 |= 1;
@@ -5392,7 +5728,8 @@ float128 float128_sqrt( float128 a STATU
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
@@ -5404,26 +5741,23 @@ int float128_eq( float128 a, float128 b 
          || (    ( extractFloat128Exp( b ) == 0x7FFF )
               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        ) {
-        if (    float128_is_signaling_nan( a )
-             || float128_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid STATUS_VAR);
-        }
+        float_raise( float_flag_invalid STATUS_VAR);
         return 0;
     }
     return
            ( a.low == b.low )
         && (    ( a.high == b.high )
              || (    ( a.low == 0 )
-                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
            );
 
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise.  The comparison
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
+| or equal to the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float128_le( float128 a, float128 b STATUS_PARAM )
@@ -5443,7 +5777,7 @@ int float128_le( float128 a, float128 b 
     if ( aSign != bSign ) {
         return
                aSign
-            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  == 0 );
     }
     return
@@ -5454,8 +5788,9 @@ int float128_le( float128 a, float128 b 
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float128_lt( float128 a, float128 b STATUS_PARAM )
@@ -5475,7 +5810,7 @@ int float128_lt( float128 a, float128 b 
     if ( aSign != bSign ) {
         return
                aSign
-            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  != 0 );
     }
     return
@@ -5485,13 +5820,33 @@ int float128_lt( float128 a, float128 b 
 }
 
 /*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  The invalid exception is raised if either
+| operand is a NaN. The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_unordered( float128 a, float128 b STATUS_PARAM )
+{
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
+int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
 {
 
     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
@@ -5499,14 +5854,17 @@ int float128_eq_signaling( float128 a, f
          || (    ( extractFloat128Exp( b ) == 0x7FFF )
               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        ) {
-        float_raise( float_flag_invalid STATUS_VAR);
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
         return 0;
     }
     return
            ( a.low == b.low )
         && (    ( a.high == b.high )
              || (    ( a.low == 0 )
-                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
            );
 
 }
@@ -5538,7 +5896,7 @@ int float128_le_quiet( float128 a, float
     if ( aSign != bSign ) {
         return
                aSign
-            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  == 0 );
     }
     return
@@ -5574,7 +5932,7 @@ int float128_lt_quiet( float128 a, float
     if ( aSign != bSign ) {
         return
                aSign
-            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  != 0 );
     }
     return
@@ -5583,7 +5941,28 @@ int float128_lt_quiet( float128 a, float
 
 }
 
-#endif
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
+{
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
 
 /* misc functions */
 float32 uint32_to_float32( unsigned int a STATUS_PARAM )
@@ -5632,6 +6011,24 @@ unsigned int float32_to_uint32_round_to_
     return res;
 }
 
+unsigned int float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float32_to_int64_round_to_zero(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffff) {
+        res = 0xffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
 unsigned int float64_to_uint32( float64 a STATUS_PARAM )
 {
     int64_t v;
@@ -5668,6 +6065,24 @@ unsigned int float64_to_uint32_round_to_
     return res;
 }
 
+unsigned int float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float64_to_int64_round_to_zero(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffff) {
+        res = 0xffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
 /* FIXME: This looks broken.  */
 uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
 {
@@ -5696,7 +6111,9 @@ INLINE int float ## s ## _compare_intern
                                       int is_quiet STATUS_PARAM )            \
 {                                                                            \
     flag aSign, bSign;                                                       \
-    bits ## s av, bv;                                                        \
+    uint ## s ## _t av, bv;                                                  \
+    a = float ## s ## _squash_input_denormal(a STATUS_VAR);                  \
+    b = float ## s ## _squash_input_denormal(b STATUS_VAR);                  \
                                                                              \
     if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
          extractFloat ## s ## Frac( a ) ) ||                                 \
@@ -5714,7 +6131,7 @@ INLINE int float ## s ## _compare_intern
     av = float ## s ## _val(a);                                              \
     bv = float ## s ## _val(b);                                              \
     if ( aSign != bSign ) {                                                  \
-        if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) {                         \
+        if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) {                   \
             /* zero case */                                                  \
             return float_relation_equal;                                     \
         } else {                                                             \
@@ -5742,6 +6159,52 @@ int float ## s ## _compare_quiet( float 
 COMPARE(32, 0xff)
 COMPARE(64, 0x7ff)
 
+INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
+                                      int is_quiet STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
+          ( extractFloatx80Frac( a )<<1 ) ) ||
+        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
+          ( extractFloatx80Frac( b )<<1 ) )) {
+        if (!is_quiet ||
+            floatx80_is_signaling_nan( a ) ||
+            floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return float_relation_unordered;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+
+        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
+             ( ( a.low | b.low ) == 0 ) ) {
+            /* zero case */
+            return float_relation_equal;
+        } else {
+            return 1 - (2 * aSign);
+        }
+    } else {
+        if (a.low == b.low && a.high == b.high) {
+            return float_relation_equal;
+        } else {
+            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
+        }
+    }
+}
+
+int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    return floatx80_compare_internal(a, b, 0 STATUS_VAR);
+}
+
+int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    return floatx80_compare_internal(a, b, 1 STATUS_VAR);
+}
+
 INLINE int float128_compare_internal( float128 a, float128 b,
                                       int is_quiet STATUS_PARAM )
 {
@@ -5786,18 +6249,71 @@ int float128_compare_quiet( float128 a, 
     return float128_compare_internal(a, b, 1 STATUS_VAR);
 }
 
+/* min() and max() functions. These can't be implemented as
+ * 'compare and pick one input' because that would mishandle
+ * NaNs and +0 vs -0.
+ */
+#define MINMAX(s, nan_exp)                                              \
+INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
+                                        int ismin STATUS_PARAM )        \
+{                                                                       \
+    flag aSign, bSign;                                                  \
+    uint ## s ## _t av, bv;                                             \
+    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
+    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
+    if (float ## s ## _is_any_nan(a) ||                                 \
+        float ## s ## _is_any_nan(b)) {                                 \
+        return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
+    }                                                                   \
+    aSign = extractFloat ## s ## Sign(a);                               \
+    bSign = extractFloat ## s ## Sign(b);                               \
+    av = float ## s ## _val(a);                                         \
+    bv = float ## s ## _val(b);                                         \
+    if (aSign != bSign) {                                               \
+        if (ismin) {                                                    \
+            return aSign ? a : b;                                       \
+        } else {                                                        \
+            return aSign ? b : a;                                       \
+        }                                                               \
+    } else {                                                            \
+        if (ismin) {                                                    \
+            return (aSign ^ (av < bv)) ? a : b;                         \
+        } else {                                                        \
+            return (aSign ^ (av < bv)) ? b : a;                         \
+        }                                                               \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 1 STATUS_VAR);                   \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 0 STATUS_VAR);                   \
+}
+
+MINMAX(32, 0xff)
+MINMAX(64, 0x7ff)
+
+
 /* Multiply A by 2 raised to the power N.  */
 float32 float32_scalbn( float32 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int16 aExp;
-    bits32 aSig;
+    int16_t aExp;
+    uint32_t aSig;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
 
     if ( aExp == 0xFF ) {
+        if ( aSig ) {
+            return propagateFloat32NaN( a, a STATUS_VAR );
+        }
         return a;
     }
     if ( aExp != 0 )
@@ -5805,6 +6321,12 @@ float32 float32_scalbn( float32 a, int n
     else if ( aSig == 0 )
         return a;
 
+    if (n > 0x200) {
+        n = 0x200;
+    } else if (n < -0x200) {
+        n = -0x200;
+    }
+
     aExp += n - 1;
     aSig <<= 7;
     return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
@@ -5813,14 +6335,18 @@ float32 float32_scalbn( float32 a, int n
 float64 float64_scalbn( float64 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int16 aExp;
-    bits64 aSig;
+    int16_t aExp;
+    uint64_t aSig;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
 
     if ( aExp == 0x7FF ) {
+        if ( aSig ) {
+            return propagateFloat64NaN( a, a STATUS_VAR );
+        }
         return a;
     }
     if ( aExp != 0 )
@@ -5828,46 +6354,62 @@ float64 float64_scalbn( float64 a, int n
     else if ( aSig == 0 )
         return a;
 
+    if (n > 0x1000) {
+        n = 0x1000;
+    } else if (n < -0x1000) {
+        n = -0x1000;
+    }
+
     aExp += n - 1;
     aSig <<= 10;
     return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
 }
 
-#ifdef FLOATX80
 floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int16 aExp;
-    bits64 aSig;
+    int32_t aExp;
+    uint64_t aSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
 
-    if ( aExp == 0x7FF ) {
+    if ( aExp == 0x7FFF ) {
+        if ( aSig<<1 ) {
+            return propagateFloatx80NaN( a, a STATUS_VAR );
+        }
         return a;
     }
+
     if (aExp == 0 && aSig == 0)
         return a;
 
+    if (n > 0x10000) {
+        n = 0x10000;
+    } else if (n < -0x10000) {
+        n = -0x10000;
+    }
+
     aExp += n;
     return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
                                           aSign, aExp, aSig, 0 STATUS_VAR );
 }
-#endif
 
-#ifdef FLOAT128
 float128 float128_scalbn( float128 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int32 aExp;
-    bits64 aSig0, aSig1;
+    int32_t aExp;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
     aExp = extractFloat128Exp( a );
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return propagateFloat128NaN( a, a STATUS_VAR );
+        }
         return a;
     }
     if ( aExp != 0 )
@@ -5875,9 +6417,14 @@ float128 float128_scalbn( float128 a, in
     else if ( aSig0 == 0 && aSig1 == 0 )
         return a;
 
+    if (n > 0x10000) {
+        n = 0x10000;
+    } else if (n < -0x10000) {
+        n = -0x10000;
+    }
+
     aExp += n - 1;
     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
                                           STATUS_VAR );
 
 }
-#endif
--- fpu/softfloat.h	2013-12-18 11:11:37.000000000 -0500
+++ fpu/softfloat.h	2014-01-14 15:21:31.000000000 -0500
@@ -1,3 +1,9 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
 /*============================================================================
 
 This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
@@ -63,44 +69,13 @@ typedef signed int int32;
 typedef uint64_t uint64;
 typedef int64_t int64;
 
-/*----------------------------------------------------------------------------
-| Each of the following `typedef's defines a type that holds integers
-| of _exactly_ the number of bits specified.  For instance, for most
-| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
-| `unsigned short int' and `signed short int' (or `short int'), respectively.
-*----------------------------------------------------------------------------*/
-typedef uint8_t bits8;
-typedef int8_t sbits8;
-typedef uint16_t bits16;
-typedef int16_t sbits16;
-typedef uint32_t bits32;
-typedef int32_t sbits32;
-typedef uint64_t bits64;
-typedef int64_t sbits64;
-
 #define LIT64( a ) a##LL
 #define INLINE static inline
 
-/*----------------------------------------------------------------------------
-| The macro `FLOATX80' must be defined to enable the extended double-precision
-| floating-point format `floatx80'.  If this macro is not defined, the
-| `floatx80' type will not be defined, and none of the functions that either
-| input or output the `floatx80' type will be defined.  The same applies to
-| the `FLOAT128' macro and the quadruple-precision format `float128'.
-*----------------------------------------------------------------------------*/
-#ifdef CONFIG_SOFTFLOAT
-/* bit exact soft float support */
-#define FLOATX80
-#define FLOAT128
+#if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+#define SNAN_BIT_IS_ONE		1
 #else
-/* native float support */
-#if (defined(__i386__) || defined(__x86_64__)) && (!defined(CONFIG_BSD) || defined(VBOX)) /** @todo VBOX: not correct on windows */
-#define FLOATX80
-#endif
-#endif /* !CONFIG_SOFTFLOAT */
-
-#if defined(VBOX) && (!defined(FLOATX80) || defined(CONFIG_SOFTFLOAT))
-# error misconfigured
+#define SNAN_BIT_IS_ONE		0
 #endif
 
 #define STATUS_PARAM , float_status *status
@@ -117,7 +92,6 @@ enum {
     float_relation_unordered =  2
 };
 
-#ifdef CONFIG_SOFTFLOAT
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE floating-point types.
 *----------------------------------------------------------------------------*/
@@ -128,31 +102,43 @@ enum {
 //#define USE_SOFTFLOAT_STRUCT_TYPES
 #ifdef USE_SOFTFLOAT_STRUCT_TYPES
 typedef struct {
+    uint16_t v;
+} float16;
+#define float16_val(x) (((float16)(x)).v)
+#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
+#define const_float16(x) { x }
+typedef struct {
     uint32_t v;
 } float32;
 /* The cast ensures an error if the wrong type is passed.  */
 #define float32_val(x) (((float32)(x)).v)
 #define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
+#define const_float32(x) { x }
 typedef struct {
     uint64_t v;
 } float64;
 #define float64_val(x) (((float64)(x)).v)
 #define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
+#define const_float64(x) { x }
 #else
+typedef uint16_t float16;
 typedef uint32_t float32;
 typedef uint64_t float64;
+#define float16_val(x) (x)
 #define float32_val(x) (x)
 #define float64_val(x) (x)
+#define make_float16(x) (x)
 #define make_float32(x) (x)
 #define make_float64(x) (x)
+#define const_float16(x) (x)
+#define const_float32(x) (x)
+#define const_float64(x) (x)
 #endif
-#ifdef FLOATX80
 typedef struct {
     uint64_t low;
     uint16_t high;
 } floatx80;
-#endif
-#ifdef FLOAT128
+#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
 typedef struct {
 #ifdef HOST_WORDS_BIGENDIAN
     uint64_t high, low;
@@ -160,7 +146,6 @@ typedef struct {
     uint64_t low, high;
 #endif
 } float128;
-#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE floating-point underflow tininess-detection mode.
@@ -188,26 +173,37 @@ enum {
     float_flag_divbyzero =  4,
     float_flag_overflow  =  8,
     float_flag_underflow = 16,
-    float_flag_inexact   = 32
+    float_flag_inexact   = 32,
+    float_flag_input_denormal = 64,
+    float_flag_output_denormal = 128
 };
 
 typedef struct float_status {
     signed char float_detect_tininess;
     signed char float_rounding_mode;
     signed char float_exception_flags;
-#ifdef FLOATX80
     signed char floatx80_rounding_precision;
-#endif
+    /* should denormalised results go to zero and set the inexact flag? */
     flag flush_to_zero;
+    /* should denormalised inputs go to zero and set the input_denormal flag? */
+    flag flush_inputs_to_zero;
     flag default_nan_mode;
 } float_status;
 
 void set_float_rounding_mode(int val STATUS_PARAM);
 void set_float_exception_flags(int val STATUS_PARAM);
+INLINE void set_float_detect_tininess(int val STATUS_PARAM)
+{
+    STATUS(float_detect_tininess) = val;
+}
 INLINE void set_flush_to_zero(flag val STATUS_PARAM)
 {
     STATUS(flush_to_zero) = val;
 }
+INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
+{
+    STATUS(flush_inputs_to_zero) = val;
+}
 INLINE void set_default_nan_mode(flag val STATUS_PARAM)
 {
     STATUS(default_nan_mode) = val;
@@ -216,9 +212,7 @@ INLINE int get_float_exception_flags(flo
 {
     return STATUS(float_exception_flags);
 }
-#ifdef FLOATX80
 void set_floatx80_rounding_precision(int val STATUS_PARAM);
-#endif
 
 /*----------------------------------------------------------------------------
 | Routine to raise any or all of the software IEC/IEEE floating-point
@@ -229,49 +223,57 @@ void float_raise( int8 flags STATUS_PARA
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE integer-to-floating-point conversion routines.
 *----------------------------------------------------------------------------*/
-float32 int32_to_float32( int STATUS_PARAM );
-float64 int32_to_float64( int STATUS_PARAM );
+float32 int32_to_float32( int32 STATUS_PARAM );
+float64 int32_to_float64( int32 STATUS_PARAM );
 float32 uint32_to_float32( unsigned int STATUS_PARAM );
 float64 uint32_to_float64( unsigned int STATUS_PARAM );
-#ifdef FLOATX80
-floatx80 int32_to_floatx80( int STATUS_PARAM );
-#endif
-#ifdef FLOAT128
-float128 int32_to_float128( int STATUS_PARAM );
-#endif
-float32 int64_to_float32( int64_t STATUS_PARAM );
-float32 uint64_to_float32( uint64_t STATUS_PARAM );
-float64 int64_to_float64( int64_t STATUS_PARAM );
-float64 uint64_to_float64( uint64_t STATUS_PARAM );
-#ifdef FLOATX80
-floatx80 int64_to_floatx80( int64_t STATUS_PARAM );
-#endif
-#ifdef FLOAT128
-float128 int64_to_float128( int64_t STATUS_PARAM );
-#endif
+floatx80 int32_to_floatx80( int32 STATUS_PARAM );
+float128 int32_to_float128( int32 STATUS_PARAM );
+float32 int64_to_float32( int64 STATUS_PARAM );
+float32 uint64_to_float32( uint64 STATUS_PARAM );
+float64 int64_to_float64( int64 STATUS_PARAM );
+float64 uint64_to_float64( uint64 STATUS_PARAM );
+floatx80 int64_to_floatx80( int64 STATUS_PARAM );
+float128 int64_to_float128( int64 STATUS_PARAM );
 
 /*----------------------------------------------------------------------------
 | Software half-precision conversion routines.
 *----------------------------------------------------------------------------*/
-bits16 float32_to_float16( float32, flag STATUS_PARAM );
-float32 float16_to_float32( bits16, flag STATUS_PARAM );
+float16 float32_to_float16( float32, flag STATUS_PARAM );
+float32 float16_to_float32( float16, flag STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision operations.
+*----------------------------------------------------------------------------*/
+int float16_is_quiet_nan( float16 );
+int float16_is_signaling_nan( float16 );
+float16 float16_maybe_silence_nan( float16 );
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_ARM)
+#define float16_default_nan make_float16(0x7E00)
+#elif SNAN_BIT_IS_ONE
+#define float16_default_nan make_float16(0x7DFF)
+#else
+#define float16_default_nan make_float16(0xFE00)
+#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE single-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int float32_to_int32( float32 STATUS_PARAM );
-int float32_to_int32_round_to_zero( float32 STATUS_PARAM );
-unsigned int float32_to_uint32( float32 STATUS_PARAM );
-unsigned int float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
-int64_t float32_to_int64( float32 STATUS_PARAM );
-int64_t float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+int16 float32_to_int16_round_to_zero( float32 STATUS_PARAM );
+unsigned int float32_to_uint16_round_to_zero( float32 STATUS_PARAM );
+int32 float32_to_int32( float32 STATUS_PARAM );
+int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+uint32 float32_to_uint32( float32 STATUS_PARAM );
+uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64 float32_to_int64( float32 STATUS_PARAM );
+int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
 float64 float32_to_float64( float32 STATUS_PARAM );
-#ifdef FLOATX80
 floatx80 float32_to_floatx80( float32 STATUS_PARAM );
-#endif
-#ifdef FLOAT128
 float128 float32_to_float128( float32 STATUS_PARAM );
-#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE single-precision operations.
@@ -288,22 +290,33 @@ float32 float32_log2( float32 STATUS_PAR
 int float32_eq( float32, float32 STATUS_PARAM );
 int float32_le( float32, float32 STATUS_PARAM );
 int float32_lt( float32, float32 STATUS_PARAM );
-int float32_eq_signaling( float32, float32 STATUS_PARAM );
+int float32_unordered( float32, float32 STATUS_PARAM );
+int float32_eq_quiet( float32, float32 STATUS_PARAM );
 int float32_le_quiet( float32, float32 STATUS_PARAM );
 int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_unordered_quiet( float32, float32 STATUS_PARAM );
 int float32_compare( float32, float32 STATUS_PARAM );
 int float32_compare_quiet( float32, float32 STATUS_PARAM );
-int float32_is_nan( float32 );
+float32 float32_min(float32, float32 STATUS_PARAM);
+float32 float32_max(float32, float32 STATUS_PARAM);
+int float32_is_quiet_nan( float32 );
 int float32_is_signaling_nan( float32 );
+float32 float32_maybe_silence_nan( float32 );
 float32 float32_scalbn( float32, int STATUS_PARAM );
 
 INLINE float32 float32_abs(float32 a)
 {
+    /* Note that abs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float32(float32_val(a) & 0x7fffffff);
 }
 
 INLINE float32 float32_chs(float32 a)
 {
+    /* Note that chs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float32(float32_val(a) ^ 0x80000000);
 }
 
@@ -322,28 +335,58 @@ INLINE int float32_is_zero(float32 a)
     return (float32_val(a) & 0x7fffffff) == 0;
 }
 
+INLINE int float32_is_any_nan(float32 a)
+{
+    return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
+}
+
+INLINE int float32_is_zero_or_denormal(float32 a)
+{
+    return (float32_val(a) & 0x7f800000) == 0;
+}
+
+INLINE float32 float32_set_sign(float32 a, int sign)
+{
+    return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
+}
+
 #define float32_zero make_float32(0)
 #define float32_one make_float32(0x3f800000)
 #define float32_ln2 make_float32(0x3f317218)
+#define float32_pi make_float32(0x40490fdb)
+#define float32_half make_float32(0x3f000000)
+#define float32_infinity make_float32(0x7f800000)
+
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float32_default_nan make_float32(0x7FFFFFFF)
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float32_default_nan make_float32(0x7FC00000)
+#elif SNAN_BIT_IS_ONE
+#define float32_default_nan make_float32(0x7FBFFFFF)
+#else
+#define float32_default_nan make_float32(0xFFC00000)
+#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE double-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int float64_to_int32( float64 STATUS_PARAM );
-int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
-unsigned int float64_to_uint32( float64 STATUS_PARAM );
-unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
-int64_t float64_to_int64( float64 STATUS_PARAM );
-int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM );
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM);
-uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
+int16 float64_to_int16_round_to_zero( float64 STATUS_PARAM );
+unsigned int float64_to_uint16_round_to_zero( float64 STATUS_PARAM );
+int32 float64_to_int32( float64 STATUS_PARAM );
+int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+uint32 float64_to_uint32( float64 STATUS_PARAM );
+uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64 float64_to_int64( float64 STATUS_PARAM );
+int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64 float64_to_uint64 (float64 a STATUS_PARAM);
+uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
 float32 float64_to_float32( float64 STATUS_PARAM );
-#ifdef FLOATX80
 floatx80 float64_to_floatx80( float64 STATUS_PARAM );
-#endif
-#ifdef FLOAT128
 float128 float64_to_float128( float64 STATUS_PARAM );
-#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE double-precision operations.
@@ -360,22 +403,33 @@ float64 float64_log2( float64 STATUS_PAR
 int float64_eq( float64, float64 STATUS_PARAM );
 int float64_le( float64, float64 STATUS_PARAM );
 int float64_lt( float64, float64 STATUS_PARAM );
-int float64_eq_signaling( float64, float64 STATUS_PARAM );
+int float64_unordered( float64, float64 STATUS_PARAM );
+int float64_eq_quiet( float64, float64 STATUS_PARAM );
 int float64_le_quiet( float64, float64 STATUS_PARAM );
 int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_unordered_quiet( float64, float64 STATUS_PARAM );
 int float64_compare( float64, float64 STATUS_PARAM );
 int float64_compare_quiet( float64, float64 STATUS_PARAM );
-int float64_is_nan( float64 a );
+float64 float64_min(float64, float64 STATUS_PARAM);
+float64 float64_max(float64, float64 STATUS_PARAM);
+int float64_is_quiet_nan( float64 a );
 int float64_is_signaling_nan( float64 );
+float64 float64_maybe_silence_nan( float64 );
 float64 float64_scalbn( float64, int STATUS_PARAM );
 
 INLINE float64 float64_abs(float64 a)
 {
+    /* Note that abs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
 }
 
 INLINE float64 float64_chs(float64 a)
 {
+    /* Note that chs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float64(float64_val(a) ^ 0x8000000000000000LL);
 }
 
@@ -394,24 +448,52 @@ INLINE int float64_is_zero(float64 a)
     return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
 }
 
+INLINE int float64_is_any_nan(float64 a)
+{
+    return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
+}
+
+INLINE int float64_is_zero_or_denormal(float64 a)
+{
+    return (float64_val(a) & 0x7ff0000000000000LL) == 0;
+}
+
+INLINE float64 float64_set_sign(float64 a, int sign)
+{
+    return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
+                        | ((int64_t)sign << 63));
+}
+
 #define float64_zero make_float64(0)
 #define float64_one make_float64(0x3ff0000000000000LL)
 #define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+#define float64_pi make_float64(0x400921fb54442d18LL)
+#define float64_half make_float64(0x3fe0000000000000LL)
+#define float64_infinity make_float64(0x7ff0000000000000LL)
 
-#ifdef FLOATX80
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF ))
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 ))
+#elif SNAN_BIT_IS_ONE
+#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF ))
+#else
+#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 ))
+#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int floatx80_to_int32( floatx80 STATUS_PARAM );
-int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
-int64_t floatx80_to_int64( floatx80 STATUS_PARAM );
-int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
 float32 floatx80_to_float32( floatx80 STATUS_PARAM );
 float64 floatx80_to_float64( floatx80 STATUS_PARAM );
-#ifdef FLOAT128
 float128 floatx80_to_float128( floatx80 STATUS_PARAM );
-#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision operations.
@@ -426,11 +508,16 @@ floatx80 floatx80_sqrt( floatx80 STATUS_
 int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
 int floatx80_le( floatx80, floatx80 STATUS_PARAM );
 int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
-int floatx80_eq_signaling( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
 int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
 int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_is_nan( floatx80 );
+int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_quiet_nan( floatx80 );
 int floatx80_is_signaling_nan( floatx80 );
+floatx80 floatx80_maybe_silence_nan( floatx80 );
 floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
 
 INLINE floatx80 floatx80_abs(floatx80 a)
@@ -447,7 +534,7 @@ INLINE floatx80 floatx80_chs(floatx80 a)
 
 INLINE int floatx80_is_infinity(floatx80 a)
 {
-    return (a.high & 0x7fff) == 0x7fff && a.low == 0;
+    return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
 }
 
 INLINE int floatx80_is_neg(floatx80 a)
@@ -460,22 +547,46 @@ INLINE int floatx80_is_zero(floatx80 a)
     return (a.high & 0x7fff) == 0 && a.low == 0;
 }
 
-#endif
+INLINE int floatx80_is_zero_or_denormal(floatx80 a)
+{
+    return (a.high & 0x7fff) == 0;
+}
+
+INLINE int floatx80_is_any_nan(floatx80 a)
+{
+    return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
+}
+
+#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
+#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
+#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
+#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
+#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
+#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
 
-#ifdef FLOAT128
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define floatx80_default_nan_high 0x7FFF
+#define floatx80_default_nan_low  LIT64( 0xBFFFFFFFFFFFFFFF )
+#else
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
+#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE quadruple-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int float128_to_int32( float128 STATUS_PARAM );
-int float128_to_int32_round_to_zero( float128 STATUS_PARAM );
-int64_t float128_to_int64( float128 STATUS_PARAM );
-int64_t float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+int32 float128_to_int32( float128 STATUS_PARAM );
+int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64 float128_to_int64( float128 STATUS_PARAM );
+int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
 float32 float128_to_float32( float128 STATUS_PARAM );
 float64 float128_to_float64( float128 STATUS_PARAM );
-#ifdef FLOATX80
 floatx80 float128_to_floatx80( float128 STATUS_PARAM );
-#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE quadruple-precision operations.
@@ -490,13 +601,16 @@ float128 float128_sqrt( float128 STATUS_
 int float128_eq( float128, float128 STATUS_PARAM );
 int float128_le( float128, float128 STATUS_PARAM );
 int float128_lt( float128, float128 STATUS_PARAM );
-int float128_eq_signaling( float128, float128 STATUS_PARAM );
+int float128_unordered( float128, float128 STATUS_PARAM );
+int float128_eq_quiet( float128, float128 STATUS_PARAM );
 int float128_le_quiet( float128, float128 STATUS_PARAM );
 int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_unordered_quiet( float128, float128 STATUS_PARAM );
 int float128_compare( float128, float128 STATUS_PARAM );
 int float128_compare_quiet( float128, float128 STATUS_PARAM );
-int float128_is_nan( float128 );
+int float128_is_quiet_nan( float128 );
 int float128_is_signaling_nan( float128 );
+float128 float128_maybe_silence_nan( float128 );
 float128 float128_scalbn( float128, int STATUS_PARAM );
 
 INLINE float128 float128_abs(float128 a)
@@ -526,12 +640,27 @@ INLINE int float128_is_zero(float128 a)
     return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
 }
 
-#endif
-
-#else /* CONFIG_SOFTFLOAT */
+INLINE int float128_is_zero_or_denormal(float128 a)
+{
+    return (a.high & 0x7fff000000000000LL) == 0;
+}
 
-#include "softfloat-native.h"
+INLINE int float128_is_any_nan(float128 a)
+{
+    return ((a.high >> 48) & 0x7fff) == 0x7fff &&
+        ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
+}
 
-#endif /* !CONFIG_SOFTFLOAT */
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
+#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+#else
+#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
+#define float128_default_nan_low  LIT64( 0x0000000000000000 )
+#endif
 
 #endif /* !SOFTFLOAT_H */
--- gen-icount.h	2013-12-18 11:11:37.000000000 -0500
+++ gen-icount.h	2014-01-14 15:21:31.000000000 -0500
@@ -29,7 +29,7 @@ static void gen_icount_end(TranslationBl
     if (use_icount) {
         *icount_arg = num_insns;
         gen_set_label(icount_label);
-        tcg_gen_exit_tb((uintptr_t)(tb + 2));
+        tcg_gen_exit_tb((tcg_target_long)tb + 2);
     }
 }
 
--- hostregs_helper.h	2013-12-18 11:11:38.000000000 -0500
+++ hostregs_helper.h	1969-12-31 19:00:00.000000000 -0500
@@ -1,70 +0,0 @@
-/*
- *  Save/restore host registers.
- *
- *  Copyright (c) 2007 CodeSourcery
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
- * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
- * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
- * a choice of LGPL license versions is made available with the language indicating
- * that LGPLv2 or any later version may be used, or where a choice of which version
- * of the LGPL is applied is otherwise unspecified.
- */
-
-/* The GCC global register variable extension is used to reserve some
-   host registers for use by generated code.  However only the core parts of
-   the translation engine are compiled with these settings.  We must manually
-   save/restore these registers when called from regular code.
-   It is not sufficient to save/restore T0 et. al. as these may be declared
-   with a datatype smaller than the actual register.  */
-
-#if defined(DECLARE_HOST_REGS)
-
-#define DO_REG(REG)					\
-    register host_reg_t reg_AREG##REG asm(AREG##REG);	\
-    volatile host_reg_t saved_AREG##REG;
-
-#elif defined(SAVE_HOST_REGS)
-
-#define DO_REG(REG)					\
-    __asm__ __volatile__ ("" : "=r" (reg_AREG##REG));	\
-    saved_AREG##REG = reg_AREG##REG;
-
-#else
-
-#define DO_REG(REG)                                     \
-    reg_AREG##REG = saved_AREG##REG;		        \
-    __asm__ __volatile__ ("" : : "r" (reg_AREG##REG));
-
-#endif
-
-#ifdef AREG0
-DO_REG(0)
-#endif
-
-#ifdef AREG1
-DO_REG(1)
-#endif
-
-#ifdef AREG2
-DO_REG(2)
-#endif
-
-#undef SAVE_HOST_REGS
-#undef DECLARE_HOST_REGS
-#undef DO_REG
--- ioport.h	2013-12-18 11:11:38.000000000 -0500
+++ ioport.h	2014-01-14 15:21:31.000000000 -0500
@@ -34,6 +34,7 @@
 #define IOPORT_H
 
 #include "qemu-common.h"
+#include "iorange.h"
 
 typedef uint32_t pio_addr_t;
 #define FMT_pioaddr     PRIx32
@@ -45,12 +46,13 @@ typedef uint32_t pio_addr_t;
 typedef void (IOPortWriteFunc)(void *opaque, uint32_t address, uint32_t data);
 typedef uint32_t (IOPortReadFunc)(void *opaque, uint32_t address);
 
+void ioport_register(IORange *iorange);
 int register_ioport_read(pio_addr_t start, int length, int size,
                          IOPortReadFunc *func, void *opaque);
 int register_ioport_write(pio_addr_t start, int length, int size,
                           IOPortWriteFunc *func, void *opaque);
 void isa_unassign_ioport(pio_addr_t start, int length);
-
+bool isa_is_ioport_assigned(pio_addr_t start);
 
 #ifndef VBOX
 void cpu_outb(pio_addr_t addr, uint8_t val);
--- iorange.h	1969-12-31 19:00:00.000000000 -0500
+++ iorange.h	2014-01-14 15:21:31.000000000 -0500
@@ -0,0 +1,30 @@
+#ifndef IORANGE_H
+#define IORANGE_H
+
+#include <stdint.h>
+
+typedef struct IORange IORange;
+typedef struct IORangeOps IORangeOps;
+
+struct IORangeOps {
+    void (*read)(IORange *iorange, uint64_t offset, unsigned width,
+                 uint64_t *data);
+    void (*write)(IORange *iorange, uint64_t offset, unsigned width,
+                  uint64_t data);
+};
+
+struct IORange {
+    const IORangeOps *ops;
+    uint64_t base;
+    uint64_t len;
+};
+
+static inline void iorange_init(IORange *iorange, const IORangeOps *ops,
+                                uint64_t base, uint64_t len)
+{
+    iorange->ops = ops;
+    iorange->base = base;
+    iorange->len = len;
+}
+
+#endif
--- osdep.h	2013-12-18 11:11:38.000000000 -0500
+++ osdep.h	2014-01-14 15:21:31.000000000 -0500
@@ -55,9 +55,7 @@
 #endif
 
 #ifndef VBOX
-#ifndef _WIN32
 #include <sys/time.h>
-#endif
 #endif /* !VBOX */
 
 #ifndef glue
@@ -105,6 +103,10 @@
 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
 #endif
 
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
@@ -139,24 +141,72 @@
 #endif
 
 #ifndef VBOX
+int qemu_daemon(int nochdir, int noclose);
 void *qemu_memalign(size_t alignment, size_t size);
 void *qemu_vmalloc(size_t size);
 void qemu_vfree(void *ptr);
 
+#define QEMU_MADV_INVALID -1
+
+#if defined(CONFIG_MADVISE)
+
+#define QEMU_MADV_WILLNEED  MADV_WILLNEED
+#define QEMU_MADV_DONTNEED  MADV_DONTNEED
+#ifdef MADV_DONTFORK
+#define QEMU_MADV_DONTFORK  MADV_DONTFORK
+#else
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#endif
+#ifdef MADV_MERGEABLE
+#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
+#else
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#endif
+
+#elif defined(CONFIG_POSIX_MADVISE)
+
+#define QEMU_MADV_WILLNEED  POSIX_MADV_WILLNEED
+#define QEMU_MADV_DONTNEED  POSIX_MADV_DONTNEED
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+
+#else /* no-op */
+
+#define QEMU_MADV_WILLNEED  QEMU_MADV_INVALID
+#define QEMU_MADV_DONTNEED  QEMU_MADV_INVALID
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+
+#endif
+
+int qemu_madvise(void *addr, size_t len, int advice);
+
+#if defined(__HAIKU__) && defined(__i386__)
+#define FMT_pid "%ld"
+#else
+#define FMT_pid "%d"
+#endif
+
 int qemu_create_pidfile(const char *filename);
+int qemu_get_thread_id(void);
 
 #ifdef _WIN32
-int ffs(int i);
+static inline void qemu_timersub(const struct timeval *val1,
+                                 const struct timeval *val2,
+                                 struct timeval *res)
+{
+    res->tv_sec = val1->tv_sec - val2->tv_sec;
+    if (val1->tv_usec < val2->tv_usec) {
+        res->tv_sec--;
+        res->tv_usec = val1->tv_usec - val2->tv_usec + 1000 * 1000;
+    } else {
+        res->tv_usec = val1->tv_usec - val2->tv_usec;
+    }
+}
+#else
+#define qemu_timersub timersub
+#endif
 
-typedef struct {
-    long tv_sec;
-    long tv_usec;
-} qemu_timeval;
-int qemu_gettimeofday(qemu_timeval *tp);
-#else
-typedef struct timeval qemu_timeval;
-#define qemu_gettimeofday(tp) gettimeofday(tp, NULL);
-#endif /* !_WIN32 */
 #else  /* VBOX */
 # define qemu_memalign(alignment, size) ( (alignment) <= PAGE_SIZE ? RTMemPageAlloc((size)) : NULL )
 # define qemu_vfree(pv)                 RTMemPageFree(pv, missing_size_parameter)
--- qemu-common.h	2013-12-18 11:11:38.000000000 -0500
+++ qemu-common.h	2014-01-14 15:21:31.000000000 -0500
@@ -2,6 +2,7 @@
 #ifndef QEMU_COMMON_H
 #define QEMU_COMMON_H
 
+#include "compiler.h"
 #include "config-host.h"
 
 #ifdef VBOX
@@ -32,17 +33,10 @@ char *pstrcat(char *buf, int buf_size, c
 
 # define qemu_init_vcpu(env)    do { } while (0) /* we don't need this :-) */
 
-# define QEMU_NORETURN              __attribute__((__noreturn__))
-# ifdef CONFIG_GCC_ATTRIBUTE_WARN_UNUSED_RESULT
-#  define QEMU_WARN_UNUSED_RESULT   __attribute__((warn_unused_result))
-# else
-#  define QEMU_WARN_UNUSED_RESULT
-# endif
-#define QEMU_BUILD_BUG_ON(x) typedef char __build_bug_on__##__LINE__[(x)?-1:1];
-
 #include <stdio.h>
-#include "cpu.h"
 
+typedef int (*fprintf_function)(FILE *f, const char *fmt, ...)
+    GCC_FMT_ATTR(2, 3);
 
 #else /* !VBOX */
 #ifdef _WIN32
@@ -51,25 +45,15 @@ char *pstrcat(char *buf, int buf_size, c
 #include <windows.h>
 #endif
 
-#define QEMU_NORETURN __attribute__ ((__noreturn__))
-#ifdef CONFIG_GCC_ATTRIBUTE_WARN_UNUSED_RESULT
-#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
-#else
-#define QEMU_WARN_UNUSED_RESULT
-#endif
-
-#define QEMU_BUILD_BUG_ON(x) typedef char __build_bug_on__##__LINE__[(x)?-1:1];
+#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
 
 typedef struct QEMUTimer QEMUTimer;
 typedef struct QEMUFile QEMUFile;
 typedef struct QEMUBH QEMUBH;
 typedef struct DeviceState DeviceState;
 
-
-/* Hack around the mess dyngen-exec.h causes: We need QEMU_NORETURN in files that
-   cannot include the following headers without conflicts. This condition has
-   to be removed once dyngen is gone. */
-#ifndef __DYNGEN_EXEC_H__
+struct Monitor;
+typedef struct Monitor Monitor;
 
 /* we put basic includes here to avoid repeating them in device drivers */
 #include <stdlib.h>
@@ -86,7 +70,17 @@ typedef struct DeviceState DeviceState;
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/stat.h>
+#include <sys/time.h>
 #include <assert.h>
+#include <signal.h>
+
+#ifdef _WIN32
+#include "qemu-os-win32.h"
+#endif
+
+#ifdef CONFIG_POSIX
+#include "qemu-os-posix.h"
+#endif
 
 #ifndef O_LARGEFILE
 #define O_LARGEFILE 0
@@ -103,6 +97,9 @@ typedef struct DeviceState DeviceState;
 #if !defined(ENOTSUP)
 #define ENOTSUP 4096
 #endif
+#ifndef TIME_MAX
+#define TIME_MAX LONG_MAX
+#endif
 
 #ifndef CONFIG_IOVEC
 #define CONFIG_IOVEC
@@ -118,10 +115,13 @@ struct iovec {
 #include <sys/uio.h>
 #endif
 
+typedef int (*fprintf_function)(FILE *f, const char *fmt, ...)
+    GCC_FMT_ATTR(2, 3);
+
 #ifdef _WIN32
 #define fsync _commit
 #define lseek _lseeki64
-extern int qemu_ftruncate64(int, int64_t);
+int qemu_ftruncate64(int, int64_t);
 #define ftruncate qemu_ftruncate64
 
 static inline char *realpath(const char *path, char *resolved_path)
@@ -129,17 +129,11 @@ static inline char *realpath(const char 
     _fullpath(resolved_path, path, _MAX_PATH);
     return resolved_path;
 }
-
-#define PRId64 "I64d"
-#define PRIx64 "I64x"
-#define PRIu64 "I64u"
-#define PRIo64 "I64o"
 #endif
 
 /* FIXME: Remove NEED_CPU_H.  */
 #ifndef NEED_CPU_H
 
-#include <setjmp.h>
 #include "osdep.h"
 #include "bswap.h"
 
@@ -149,6 +143,11 @@ static inline char *realpath(const char 
 
 #endif /* !defined(NEED_CPU_H) */
 
+/* main function, renamed */
+#if defined(CONFIG_COCOA)
+int qemu_main(int argc, char **argv, char **envp);
+#endif
+
 /* bottom halves */
 typedef void QEMUBHFunc(void *opaque);
 
@@ -170,8 +169,6 @@ void qemu_bh_delete(QEMUBH *bh);
 int qemu_bh_poll(void);
 void qemu_bh_update_timeout(int *timeout);
 
-uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c);
-
 void qemu_get_timedate(struct tm *tm, int offset);
 int qemu_timedate_diff(struct tm *tm);
 
@@ -186,6 +183,21 @@ int qemu_fls(int i);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 
+/*
+ * strtosz() suffixes used to specify the default treatment of an
+ * argument passed to strtosz() without an explicit suffix.
+ * These should be defined using upper case characters in the range
+ * A-Z, as strtosz() will use qemu_toupper() on the given argument
+ * prior to comparison.
+ */
+#define STRTOSZ_DEFSUFFIX_TB	'T'
+#define STRTOSZ_DEFSUFFIX_GB	'G'
+#define STRTOSZ_DEFSUFFIX_MB	'M'
+#define STRTOSZ_DEFSUFFIX_KB	'K'
+#define STRTOSZ_DEFSUFFIX_B	'B'
+int64_t strtosz(const char *nptr, char **end);
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix);
+
 /* path.c */
 void init_paths(const char *prefix);
 const char *path(const char *pathname);
@@ -206,6 +218,7 @@ const char *path(const char *pathname);
 #define qemu_isascii(c)		isascii((unsigned char)(c))
 #define qemu_toascii(c)		toascii((unsigned char)(c))
 
+void *qemu_oom_check(void *ptr);
 void *qemu_malloc(size_t size);
 void *qemu_realloc(void *ptr, size_t size);
 void *qemu_mallocz(size_t size);
@@ -222,20 +235,29 @@ ssize_t qemu_write_full(int fd, const vo
 void qemu_set_cloexec(int fd);
 
 #ifndef _WIN32
+int qemu_add_child_watch(pid_t pid);
 int qemu_eventfd(int pipefd[2]);
 int qemu_pipe(int pipefd[2]);
 #endif
 
+#ifdef _WIN32
+#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, (void *)buf, len, flags)
+#else
+#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
+#endif
+
 /* Error handling.  */
 
-void QEMU_NORETURN hw_error(const char *fmt, ...)
-    __attribute__ ((__format__ (__printf__, 1, 2)));
+void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 
 /* IO callbacks.  */
 typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
 typedef int IOCanReadHandler(void *opaque);
 typedef void IOHandler(void *opaque);
 
+void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds);
+void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc);
+
 struct ParallelIOArg {
     void *buffer;
     int count;
@@ -249,6 +271,7 @@ typedef struct NICInfo NICInfo;
 typedef struct HCIInfo HCIInfo;
 typedef struct AudioState AudioState;
 typedef struct BlockDriverState BlockDriverState;
+typedef struct DriveInfo DriveInfo;
 typedef struct DisplayState DisplayState;
 typedef struct DisplayChangeListener DisplayChangeListener;
 typedef struct DisplaySurface DisplaySurface;
@@ -267,6 +290,13 @@ typedef struct PCIHostState PCIHostState
 typedef struct PCIExpressHost PCIExpressHost;
 typedef struct PCIBus PCIBus;
 typedef struct PCIDevice PCIDevice;
+typedef struct PCIExpressDevice PCIExpressDevice;
+typedef struct PCIBridge PCIBridge;
+typedef struct PCIEAERMsg PCIEAERMsg;
+typedef struct PCIEAERLog PCIEAERLog;
+typedef struct PCIEAERErr PCIEAERErr;
+typedef struct PCIEPort PCIEPort;
+typedef struct PCIESlot PCIESlot;
 typedef struct SerialState SerialState;
 typedef struct IRQState *qemu_irq;
 typedef struct PCMCIACardState PCMCIACardState;
@@ -293,7 +323,9 @@ void qemu_notify_event(void);
 
 /* Unblock cpu */
 void qemu_cpu_kick(void *env);
-int qemu_cpu_self(void *env);
+void qemu_cpu_kick_self(void);
+int qemu_cpu_is_self(void *env);
+bool all_cpu_threads_idle(void);
 
 /* work queue */
 struct qemu_work_item {
@@ -319,14 +351,30 @@ typedef struct QEMUIOVector {
 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
 void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
+void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
+    size_t size);
 void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
 void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
-
-struct Monitor;
-typedef struct Monitor Monitor;
+void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count);
+void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
+                            size_t skip);
+
+void qemu_progress_init(int enabled, float min_skip);
+void qemu_progress_end(void);
+void qemu_progress_print(float delta, int max);
+
+#define QEMU_FILE_TYPE_BIOS   0
+#define QEMU_FILE_TYPE_KEYMAP 1
+char *qemu_find_file(int type, const char *name);
+
+/* OS specific functions */
+void os_setup_early_signal_handling(void);
+char *os_find_datadir(const char *argv0);
+void os_parse_cmd_args(int index, const char *optarg);
+void os_pidfile_error(void);
 
 /* Convert a byte between binary and BCD.  */
 static inline uint8_t to_bcd(uint8_t val)
@@ -339,9 +387,31 @@ static inline uint8_t from_bcd(uint8_t v
     return ((val >> 4) * 10) + (val & 0x0f);
 }
 
-#include "module.h"
+/* compute with 96 bit intermediate result: (a*b)/c */
+static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    union {
+        uint64_t ll;
+        struct {
+#ifdef HOST_WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif
+        } l;
+    } u, res;
+    uint64_t rl, rh;
+
+    u.ll = a;
+    rl = (uint64_t)u.l.low * (uint64_t)b;
+    rh = (uint64_t)u.l.high * (uint64_t)b;
+    rh += (rl >> 32);
+    res.l.high = rh / c;
+    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+    return res.ll;
+}
 
-#endif /* dyngen-exec.h hack */
+#include "module.h"
 
 #endif /* !VBOX */
 
--- qemu-lock.h	2013-12-18 11:11:38.000000000 -0500
+++ qemu-lock.h	2014-01-14 15:21:31.000000000 -0500
@@ -24,15 +24,11 @@
  * of the LGPL is applied is otherwise unspecified.
  */
 
-/* Locking primitives.  Most of this code should be redundant -
-   system emulation doesn't need/use locking, NPTL userspace uses
-   pthread mutexes, and non-NPTL userspace isn't threadsafe anyway.
-   In either case a spinlock is probably the wrong kind of lock.
-   Spinlocks are only good if you know annother CPU has the lock and is
-   likely to release it soon.  In environments where you have more threads
-   than physical CPUs (the extreme case being a single CPU host) a spinlock
-   simply wastes CPU until the OS decides to preempt it.  */
-#if defined(CONFIG_USE_NPTL)
+/* configure guarantees us that we have pthreads on any host except
+ * mingw32, which doesn't support any of the user-only targets.
+ * So we can simply assume we have pthread mutexes here.
+ */
+#if defined(CONFIG_USER_ONLY)
 
 #include <pthread.h>
 #define spin_lock pthread_mutex_lock
@@ -42,208 +38,15 @@
 
 #else
 
-#if defined(__hppa__)
-
-typedef int spinlock_t[4];
-
-#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 }
-
-static inline void resetlock (spinlock_t *p)
-{
-    (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1;
-}
-
-#else
-
+/* Empty implementations, on the theory that system mode emulation
+ * is single-threaded. This means that these functions should only
+ * be used from code run in the TCG cpu thread, and cannot protect
+ * data structures which might also be accessed from the IO thread
+ * or from signal handlers.
+ */
 typedef int spinlock_t;
-
 #define SPIN_LOCK_UNLOCKED 0
 
-static inline void resetlock (spinlock_t *p)
-{
-    *p = SPIN_LOCK_UNLOCKED;
-}
-
-#endif
-
-#ifdef VBOX
-DECLINLINE(int) testandset (int *p)
-{
-    return ASMAtomicCmpXchgU32((volatile uint32_t *)p, 1, 0) ? 0 : 1;
-}
-#elif defined(_ARCH_PPC)
-static inline int testandset (int *p)
-{
-    int ret;
-    __asm__ __volatile__ (
-                          "      lwarx %0,0,%1\n"
-                          "      xor. %0,%3,%0\n"
-                          "      bne $+12\n"
-                          "      stwcx. %2,0,%1\n"
-                          "      bne- $-16\n"
-                          : "=&r" (ret)
-                          : "r" (p), "r" (1), "r" (0)
-                          : "cr0", "memory");
-    return ret;
-}
-#elif defined(__i386__)
-static inline int testandset (int *p)
-{
-    long int readval = 0;
-
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#elif defined(__x86_64__)
-static inline int testandset (int *p)
-{
-    long int64_t readval = 0;
-
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#elif defined(__s390__)
-static inline int testandset (int *p)
-{
-    int ret;
-
-    __asm__ __volatile__ ("0: cs    %0,%1,0(%2)\n"
-			  "   jl    0b"
-			  : "=&d" (ret)
-			  : "r" (1), "a" (p), "0" (*p)
-			  : "cc", "memory" );
-    return ret;
-}
-#elif defined(__alpha__)
-static inline int testandset (int *p)
-{
-    int ret;
-    unsigned long one;
-
-    __asm__ __volatile__ ("0:	mov 1,%2\n"
-			  "	ldl_l %0,%1\n"
-			  "	stl_c %2,%1\n"
-			  "	beq %2,1f\n"
-			  ".subsection 2\n"
-			  "1:	br 0b\n"
-			  ".previous"
-			  : "=r" (ret), "=m" (*p), "=r" (one)
-			  : "m" (*p));
-    return ret;
-}
-#elif defined(__sparc__)
-static inline int testandset (int *p)
-{
-	int ret;
-
-	__asm__ __volatile__("ldstub	[%1], %0"
-			     : "=r" (ret)
-			     : "r" (p)
-			     : "memory");
-
-	return (ret ? 1 : 0);
-}
-#elif defined(__arm__)
-static inline int testandset (int *spinlock)
-{
-    register unsigned int ret;
-    __asm__ __volatile__("swp %0, %1, [%2]"
-                         : "=r"(ret)
-                         : "0"(1), "r"(spinlock));
-
-    return ret;
-}
-#elif defined(__mc68000)
-static inline int testandset (int *p)
-{
-    char ret;
-    __asm__ __volatile__("tas %1; sne %0"
-                         : "=r" (ret)
-                         : "m" (p)
-                         : "cc","memory");
-    return ret;
-}
-#elif defined(__hppa__)
-
-/* Because malloc only guarantees 8-byte alignment for malloc'd data,
-   and GCC only guarantees 8-byte alignment for stack locals, we can't
-   be assured of 16-byte alignment for atomic lock data even if we
-   specify "__attribute ((aligned(16)))" in the type declaration.  So,
-   we use a struct containing an array of four ints for the atomic lock
-   type and dynamically select the 16-byte aligned int from the array
-   for the semaphore.  */
-#define __PA_LDCW_ALIGNMENT 16
-static inline void *ldcw_align (void *p) {
-    unsigned long a = (unsigned long)p;
-    a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1);
-    return (void *)a;
-}
-
-static inline int testandset (spinlock_t *p)
-{
-    unsigned int ret;
-    p = ldcw_align(p);
-    __asm__ __volatile__("ldcw 0(%1),%0"
-                         : "=r" (ret)
-                         : "r" (p)
-                         : "memory" );
-    return !ret;
-}
-
-#elif defined(__ia64)
-
-#include <ia64intrin.h>
-
-static inline int testandset (int *p)
-{
-    return __sync_lock_test_and_set (p, 1);
-}
-#elif defined(__mips__)
-static inline int testandset (int *p)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-	"	.set push		\n"
-	"	.set noat		\n"
-	"	.set mips2		\n"
-	"1:	li	$1, 1		\n"
-	"	ll	%0, %1		\n"
-	"	sc	$1, %1		\n"
-	"	beqz	$1, 1b		\n"
-	"	.set pop		"
-	: "=r" (ret), "+R" (*p)
-	:
-	: "memory");
-
-    return ret;
-}
-#else
-#error unimplemented CPU support
-#endif
-
-#if defined(CONFIG_USER_ONLY)
-static inline void spin_lock(spinlock_t *lock)
-{
-    while (testandset(lock));
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-    resetlock(lock);
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return !testandset(lock);
-}
-#else
 static inline void spin_lock(spinlock_t *lock)
 {
 }
@@ -252,10 +55,4 @@ static inline void spin_unlock(spinlock_
 {
 }
 
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return 1;
-}
-#endif
-
 #endif
--- qemu-timer.h	2013-12-18 11:11:38.000000000 -0500
+++ qemu-timer.h	2014-01-15 15:30:05.000000000 -0500
@@ -2,9 +2,26 @@
 #define QEMU_TIMER_H
 
 #include "qemu-common.h"
+#ifndef VBOX
+#include "notify.h"
+#include <time.h>
+#include <sys/time.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+#endif
 
 /* timers */
-#ifndef VBOX
+#ifdef VBOX
+
+#define use_icount 0
+
+#else
+
+#define SCALE_MS 1000000
+#define SCALE_US 1000
+#define SCALE_NS 1
 
 typedef struct QEMUClock QEMUClock;
 typedef void QEMUTimerCB(void *opaque);
@@ -27,11 +44,16 @@ extern QEMUClock *vm_clock;
    the virtual clock. */
 extern QEMUClock *host_clock;
 
-int64_t qemu_get_clock(QEMUClock *clock);
 int64_t qemu_get_clock_ns(QEMUClock *clock);
 void qemu_clock_enable(QEMUClock *clock, int enabled);
+void qemu_clock_warp(QEMUClock *clock);
 
-QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque);
+void qemu_register_clock_reset_notifier(QEMUClock *clock, Notifier *notifier);
+void qemu_unregister_clock_reset_notifier(QEMUClock *clock,
+                                          Notifier *notifier);
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
+                          QEMUTimerCB *cb, void *opaque);
 void qemu_free_timer(QEMUTimer *ts);
 void qemu_del_timer(QEMUTimer *ts);
 void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
@@ -40,7 +62,7 @@ int qemu_timer_expired(QEMUTimer *timer_
 
 void qemu_run_all_timers(void);
 int qemu_alarm_pending(void);
-int64_t qemu_next_deadline(void);
+int64_t qemu_next_icount_deadline(void);
 void configure_alarms(char const *opt);
 void configure_icount(const char *option);
 int qemu_calculate_timeout(void);
@@ -48,11 +70,75 @@ void init_clocks(void);
 int init_timer_alarm(void);
 void quit_timers(void);
 
+int64_t cpu_get_ticks(void);
+void cpu_enable_ticks(void);
+void cpu_disable_ticks(void);
+
+static inline QEMUTimer *qemu_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb,
+                                           void *opaque)
+{
+    return qemu_new_timer(clock, SCALE_NS, cb, opaque);
+}
+
+static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
+                                           void *opaque)
+{
+    return qemu_new_timer(clock, SCALE_MS, cb, opaque);
+}
+
+static inline int64_t qemu_get_clock_ms(QEMUClock *clock)
+{
+    return qemu_get_clock_ns(clock) / SCALE_MS;
+}
+
 static inline int64_t get_ticks_per_sec(void)
 {
     return 1000000000LL;
 }
 
+/* real time host monotonic timer */
+static inline int64_t get_clock_realtime(void)
+{
+    struct timeval tv;
+
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000);
+}
+
+/* Warning: don't insert tracepoints into these functions, they are
+   also used by simpletrace backend and tracepoints would cause
+   an infinite recursion! */
+#ifdef _WIN32
+extern int64_t clock_freq;
+
+static inline int64_t get_clock(void)
+{
+    LARGE_INTEGER ti;
+    QueryPerformanceCounter(&ti);
+    return muldiv64(ti.QuadPart, get_ticks_per_sec(), clock_freq);
+}
+
+#else
+
+extern int use_rt_clock;
+
+static inline int64_t get_clock(void)
+{
+#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 500000) \
+    || defined(__DragonFly__) || defined(__FreeBSD_kernel__)
+    if (use_rt_clock) {
+        struct timespec ts;
+        clock_gettime(CLOCK_MONOTONIC, &ts);
+        return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+    } else
+#endif
+    {
+        /* XXX: using gettimeofday leads to problems if the date
+           changes, so it should be avoided. */
+        return get_clock_realtime();
+    }
+}
+#endif
 
 void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
 void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
@@ -69,15 +155,11 @@ uint64_t ptimer_get_count(ptimer_state *
 void ptimer_set_count(ptimer_state *s, uint64_t count);
 void ptimer_run(ptimer_state *s, int oneshot);
 void ptimer_stop(ptimer_state *s);
-void qemu_put_ptimer(QEMUFile *f, ptimer_state *s);
-void qemu_get_ptimer(QEMUFile *f, ptimer_state *s);
 
 /* icount */
 int64_t qemu_icount_round(int64_t count);
 extern int64_t qemu_icount;
-#endif /* !VBOX */
 extern int use_icount;
-#ifndef VBOX
 extern int icount_time_shift;
 extern int64_t qemu_icount_bias;
 int64_t cpu_get_icount(void);
@@ -236,8 +318,6 @@ static inline int64_t cpu_get_real_ticks
 }
 #endif
 
-#endif /* !VBOX */
-
 #ifdef NEED_CPU_H
 /* Deterministic execution requires that IO only be performed on the last
    instruction of a TB so that interrupts take effect immediately.  */
@@ -254,8 +334,6 @@ static inline int can_do_io(CPUState *en
 }
 #endif
 
-#ifndef VBOX
-
 #ifdef CONFIG_PROFILER
 static inline int64_t profile_getclock(void)
 {
--- softmmu_template.h	2013-12-18 11:11:38.000000000 -0500
+++ softmmu_template.h	2014-01-15 15:37:03.000000000 -0500
@@ -82,10 +82,12 @@ static inline DATA_TYPE glue(io_read, SU
     index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     env->mem_io_pc = (uintptr_t)retaddr;
+#ifndef VBOX
     if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
             && !can_do_io(env)) {
         cpu_io_recompile(env, retaddr);
     }
+#endif
 
     env->mem_io_vaddr = addr;
 #if SHIFT <= 2
@@ -233,10 +235,12 @@ static inline void glue(io_write, SUFFIX
     int index;
     index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+#ifndef VBOX
     if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
             && !can_do_io(env)) {
         cpu_io_recompile(env, retaddr);
     }
+#endif
 
     env->mem_io_vaddr = addr;
     env->mem_io_pc = (uintptr_t)retaddr;
--- target-i386/cpu.h	2013-12-18 11:11:38.000000000 -0500
+++ target-i386/cpu.h	2014-01-14 15:21:31.000000000 -0500
@@ -30,6 +30,7 @@
 #define CPU_I386_H
 
 #include "config.h"
+#include "qemu-common.h"
 
 #ifdef TARGET_X86_64
 #define TARGET_LONG_BITS 64
@@ -275,16 +276,32 @@
 #define PG_ERROR_RSVD_MASK 0x08
 #define PG_ERROR_I_D_MASK  0x10
 
-#define MCG_CTL_P	(1UL<<8)   /* MCG_CAP register available */
+#define MCG_CTL_P	(1ULL<<8)   /* MCG_CAP register available */
+#define MCG_SER_P	(1ULL<<24) /* MCA recovery/new status bits */
 
-#define MCE_CAP_DEF	MCG_CTL_P
+#define MCE_CAP_DEF	(MCG_CTL_P|MCG_SER_P)
 #define MCE_BANKS_DEF	10
 
+#define MCG_STATUS_RIPV	(1ULL<<0)   /* restart ip valid */
+#define MCG_STATUS_EIPV	(1ULL<<1)   /* ip points to correct instruction */
 #define MCG_STATUS_MCIP	(1ULL<<2)   /* machine check in progress */
 
 #define MCI_STATUS_VAL	(1ULL<<63)  /* valid error */
 #define MCI_STATUS_OVER	(1ULL<<62)  /* previous errors lost */
 #define MCI_STATUS_UC	(1ULL<<61)  /* uncorrected error */
+#define MCI_STATUS_EN	(1ULL<<60)  /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
+#define MCI_STATUS_PCC	(1ULL<<57)  /* processor context corrupt */
+#define MCI_STATUS_S	(1ULL<<56)  /* Signaled machine check */
+#define MCI_STATUS_AR	(1ULL<<55)  /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF	0	/* segment offset */
+#define MCM_ADDR_LINEAR	1	/* linear address */
+#define MCM_ADDR_PHYS	2	/* physical address */
+#define MCM_ADDR_MEM	3	/* memory address */
+#define MCM_ADDR_GENERIC 7	/* generic */
 
 #define MSR_IA32_TSC                    0x10
 #define MSR_IA32_APICBASE               0x1b
@@ -435,6 +452,17 @@
 #define CPUID_EXT3_IBS     (1 << 10)
 #define CPUID_EXT3_SKINIT  (1 << 12)
 
+#define CPUID_SVM_NPT          (1 << 0)
+#define CPUID_SVM_LBRV         (1 << 1)
+#define CPUID_SVM_SVMLOCK      (1 << 2)
+#define CPUID_SVM_NRIPSAVE     (1 << 3)
+#define CPUID_SVM_TSCSCALE     (1 << 4)
+#define CPUID_SVM_VMCBCLEAN    (1 << 5)
+#define CPUID_SVM_FLUSHASID    (1 << 6)
+#define CPUID_SVM_DECODEASSIST (1 << 7)
+#define CPUID_SVM_PAUSEFILTER  (1 << 10)
+#define CPUID_SVM_PFTHRESHOLD  (1 << 12)
+
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
 #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
 #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
@@ -443,6 +471,10 @@
 #define CPUID_VENDOR_AMD_2   0x69746e65 /* "enti" */
 #define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
 
+#define CPUID_VENDOR_VIA_1   0x746e6543 /* "Cent" */
+#define CPUID_VENDOR_VIA_2   0x48727561 /* "aurH" */
+#define CPUID_VENDOR_VIA_3   0x736c7561 /* "auls" */
+
 #define CPUID_MWAIT_IBE     (1 << 1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX     (1 << 0) /* enumeration supported */
 
@@ -468,6 +500,15 @@
 #define EXCP_SYSCALL    0x100 /* only happens in user only emulation
                                  for syscall instruction */
 
+/* i386-specific interrupt pending bits.  */
+#define CPU_INTERRUPT_SMI       CPU_INTERRUPT_TGT_EXT_2
+#define CPU_INTERRUPT_NMI       CPU_INTERRUPT_TGT_EXT_3
+#define CPU_INTERRUPT_MCE       CPU_INTERRUPT_TGT_EXT_4
+#define CPU_INTERRUPT_VIRQ      CPU_INTERRUPT_TGT_INT_0
+#define CPU_INTERRUPT_INIT      CPU_INTERRUPT_TGT_INT_1
+#define CPU_INTERRUPT_SIPI      CPU_INTERRUPT_TGT_INT_2
+
+
 enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
     CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
@@ -525,16 +566,6 @@ enum {
     CC_OP_NB,
 };
 
-#ifdef FLOATX80
-#define USE_X86LDOUBLE
-#endif
-
-#ifdef USE_X86LDOUBLE
-typedef floatx80 CPU86_LDouble;
-#else
-typedef float64 CPU86_LDouble;
-#endif
-
 typedef struct SegmentCache {
     uint32_t selector;
 #ifdef VBOX
@@ -592,11 +623,7 @@ typedef union {
 #define MMX_Q(n) q
 
 typedef union {
-#ifdef USE_X86LDOUBLE
-    CPU86_LDouble d __attribute__((aligned(16)));
-#else
-    CPU86_LDouble d;
-#endif
+    floatx80 d __attribute__((aligned(16)));
     MMXReg mmx;
 } FPReg;
 
@@ -649,13 +676,17 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
+    /* KVM-only so far */
+    uint16_t fpop;
+    uint64_t fpip;
+    uint64_t fpdp;
 
     /* emulator internal variables */
     float_status fp_status;
 #ifdef VBOX
     uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
 #endif
-    CPU86_LDouble ft0;
+    floatx80 ft0;
 #if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
     uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
 #endif
@@ -697,10 +728,11 @@ typedef struct CPUX86State {
 #endif
     uint64_t system_time_msr;
     uint64_t wall_clock_msr;
+    uint64_t async_pf_en_msr;
 
     uint64_t tsc;
 
-    uint64_t pat;
+    uint64_t mcg_status;
 
     /* exception/interrupt handling */
     int error_code;
@@ -714,8 +746,14 @@ typedef struct CPUX86State {
     uint32_t smbase;
     int old_exception;  /* exception in flight */
 
+    /* KVM states, automatically cleared on reset */
+    uint8_t nmi_injected;
+    uint8_t nmi_pending;
+
     CPU_COMMON
 
+    uint64_t pat;
+
 #ifdef VBOX
     /** cpu state flags. (see defines below) */
     uint32_t    state;
@@ -748,6 +786,9 @@ typedef struct CPUX86State {
     uint32_t cpuid_apic_id;
 #ifndef VBOX
     int cpuid_vendor_override;
+    /* Store the results of Centaur's CPUID instructions */
+    uint32_t cpuid_xlevel2;
+    uint32_t cpuid_ext4_features;
 
     /* MTRRs */
     uint64_t mtrr_fixed[11];
@@ -759,21 +800,19 @@ typedef struct CPUX86State {
     int32_t exception_injected;
     int32_t interrupt_injected;
     uint8_t soft_interrupt;
-    uint8_t nmi_injected;
-    uint8_t nmi_pending;
     uint8_t has_error_code;
     uint32_t sipi_vector;
-
     uint32_t cpuid_kvm_features;
+    uint32_t cpuid_svm_features;
+    bool tsc_valid;
 
     /* in order to simplify APIC support, we leave this pointer to the
        user */
     struct DeviceState *apic_state;
 
-    uint64 mcg_cap;
-    uint64 mcg_status;
-    uint64 mcg_ctl;
-    uint64 mce_banks[MCE_BANKS_DEF*4];
+    uint64_t mcg_cap;
+    uint64_t mcg_ctl;
+    uint64_t mce_banks[MCE_BANKS_DEF*4];
 
     uint64_t tsc_aux;
 
@@ -856,11 +895,7 @@ typedef struct CPUX86State_Ver16 {
     unsigned int fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     union {
-# ifdef USE_X86LDOUBLE
-        CPU86_LDouble d __attribute__((aligned(16)));
-# else
-        CPU86_LDouble d;
-# endif
+        floatx80 d __attribute__((aligned(16)));
         MMXReg mmx;
     } fpregs[8];
 
@@ -869,7 +904,7 @@ typedef struct CPUX86State_Ver16 {
 # ifdef VBOX
     uint32_t alignment3[3]; /* force the long double to start a 16 byte line. */
 # endif
-    CPU86_LDouble ft0;
+    floatx80 ft0;
 # if defined(VBOX) && defined(RT_ARCH_X86) && !defined(RT_OS_DARWIN)
     uint32_t alignment4; /* long double is 12 byte, pad it to 16. */
 # endif
@@ -925,9 +960,9 @@ CPUX86State *cpu_x86_init(const char *cp
 #endif /* !VBOX */
 int cpu_x86_exec(CPUX86State *s);
 void cpu_x86_close(CPUX86State *s);
-void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
-                   const char *optarg);
+void x86_cpu_list (FILE *f, fprintf_function cpu_fprintf, const char *optarg);
 void x86_cpudef_setup(void);
+int cpu_x86_support_mca_broadcast(CPUState *env);
 
 int cpu_get_pic_interrupt(CPUX86State *s);
 /* MSDOS compatibility mode FPU exception support */
@@ -1048,8 +1083,8 @@ static inline void cpu_x86_set_cpl(CPUX8
 
 /* op_helper.c */
 /* used for debug or cpu save/restore */
-void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f);
-CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper);
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f);
+floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 
 /* cpu-exec.c */
 /* the following helpers are only usable in user mode simulation as
@@ -1070,6 +1105,8 @@ void cpu_x86_cpuid(CPUX86State *env, uin
                    uint32_t *ecx, uint32_t *edx);
 int cpu_x86_register (CPUX86State *env, const char *cpu_model);
 void cpu_clear_apic_feature(CPUX86State *env);
+void host_cpuid(uint32_t function, uint32_t count,
+                uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
 
 /* helper.c */
 int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
@@ -1161,6 +1198,36 @@ static inline int cpu_mmu_index (CPUStat
     return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
 }
 
+#undef EAX
+#define EAX (env->regs[R_EAX])
+#undef ECX
+#define ECX (env->regs[R_ECX])
+#undef EDX
+#define EDX (env->regs[R_EDX])
+#undef EBX
+#define EBX (env->regs[R_EBX])
+#undef ESP
+#define ESP (env->regs[R_ESP])
+#undef EBP
+#define EBP (env->regs[R_EBP])
+#undef ESI
+#define ESI (env->regs[R_ESI])
+#undef EDI
+#define EDI (env->regs[R_EDI])
+#undef EIP
+#define EIP (env->eip)
+#define DF  (env->df)
+
+#define CC_SRC (env->cc_src)
+#define CC_DST (env->cc_dst)
+#define CC_OP  (env->cc_op)
+
+/* float macros */
+#define FT0    (env->ft0)
+#define ST0    (env->fpregs[env->fpstt].d)
+#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
+#define ST1    ST(1)
+
 /* translate.c */
 void optimize_flags_init(void);
 
@@ -1191,6 +1258,23 @@ extern uint8_t  cpu_get_apic_tpr(CPUX86S
 extern uint64_t cpu_get_apic_base(CPUX86State *env);
 #endif /* VBOX */
 
+static inline bool cpu_has_work(CPUState *env)
+{
+    return ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
+            (env->eflags & IF_MASK)) ||
+           (env->interrupt_request & (CPU_INTERRUPT_NMI |
+                                      CPU_INTERRUPT_INIT |
+                                      CPU_INTERRUPT_SIPI |
+                                      CPU_INTERRUPT_MCE));
+}
+
+#include "exec-all.h"
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->eip = tb->pc - tb->cs_base;
+}
+
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
@@ -1205,5 +1289,23 @@ void apic_init_reset(CPUState *env);
 void apic_sipi(CPUState *env);
 void do_cpu_init(CPUState *env);
 void do_cpu_sipi(CPUState *env);
+
+#define MCE_INJECT_BROADCAST    1
+#define MCE_INJECT_UNCOND_AO    2
+
+void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank,
+                        uint64_t status, uint64_t mcg_status, uint64_t addr,
+                        uint64_t misc, int flags);
 #endif /* !VBOX */
+
+/* op_helper.c */
+void do_interrupt(CPUState *env);
+void do_interrupt_x86_hardirq(CPUState *env, int intno, int is_hw);
+
+void do_smm_enter(CPUState *env1);
+
+void svm_check_intercept(CPUState *env1, uint32_t type);
+
+uint32_t cpu_cc_compute_all(CPUState *env1, int op);
+
 #endif /* CPU_I386_H */
--- target-i386/exec.h	2013-12-18 11:11:38.000000000 -0500
+++ target-i386/exec.h	2014-01-14 15:21:31.000000000 -0500
@@ -43,48 +43,12 @@ register struct CPUX86State *env asm(ARE
 #include "qemu-common.h"
 #include "qemu-log.h"
 
-#undef EAX
-#define EAX (env->regs[R_EAX])
-#undef ECX
-#define ECX (env->regs[R_ECX])
-#undef EDX
-#define EDX (env->regs[R_EDX])
-#undef EBX
-#define EBX (env->regs[R_EBX])
-#undef ESP
-#define ESP (env->regs[R_ESP])
-#undef EBP
-#define EBP (env->regs[R_EBP])
-#undef ESI
-#define ESI (env->regs[R_ESI])
-#undef EDI
-#define EDI (env->regs[R_EDI])
-#undef EIP
-#define EIP (env->eip)
-#define DF  (env->df)
-
-#define CC_SRC (env->cc_src)
-#define CC_DST (env->cc_dst)
-#define CC_OP  (env->cc_op)
-
-/* float macros */
-#define FT0    (env->ft0)
-#define ST0    (env->fpregs[env->fpstt].d)
-#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
-#define ST1    ST(1)
-
 #include "cpu.h"
-#include "exec-all.h"
 
 /* op_helper.c */
-void do_interrupt(int intno, int is_int, int error_code,
-                  target_ulong next_eip, int is_hw);
-void do_interrupt_user(int intno, int is_int, int error_code,
-                       target_ulong next_eip);
 void QEMU_NORETURN raise_exception_err(int exception_index, int error_code);
 void QEMU_NORETURN raise_exception(int exception_index);
 void QEMU_NORETURN raise_exception_env(int exception_index, CPUState *nenv);
-void do_smm_enter(void);
 
 /* n must be a constant to be efficient */
 static inline target_long lshift(target_long x, int n)
@@ -97,52 +61,12 @@ static inline target_long lshift(target_
 
 #include "helper.h"
 
-static inline void svm_check_intercept(uint32_t type)
-{
-    helper_svm_check_intercept_param(type, 0);
-}
-
 #if !defined(CONFIG_USER_ONLY)
 
 #include "softmmu_exec.h"
 
 #endif /* !defined(CONFIG_USER_ONLY) */
 
-#ifdef USE_X86LDOUBLE
-/* use long double functions */
-#define floatx_to_int32 floatx80_to_int32
-#define floatx_to_int64 floatx80_to_int64
-#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
-#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
-#define int32_to_floatx int32_to_floatx80
-#define int64_to_floatx int64_to_floatx80
-#define float32_to_floatx float32_to_floatx80
-#define float64_to_floatx float64_to_floatx80
-#define floatx_to_float32 floatx80_to_float32
-#define floatx_to_float64 floatx80_to_float64
-#define floatx_abs floatx80_abs
-#define floatx_chs floatx80_chs
-#define floatx_round_to_int floatx80_round_to_int
-#define floatx_compare floatx80_compare
-#define floatx_compare_quiet floatx80_compare_quiet
-#else
-#define floatx_to_int32 float64_to_int32
-#define floatx_to_int64 float64_to_int64
-#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
-#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
-#define int32_to_floatx int32_to_float64
-#define int64_to_floatx int64_to_float64
-#define float32_to_floatx float32_to_float64
-#define float64_to_floatx(x, e) (x)
-#define floatx_to_float32 float64_to_float32
-#define floatx_to_float64(x, e) (x)
-#define floatx_abs float64_abs
-#define floatx_chs float64_chs
-#define floatx_round_to_int float64_round_to_int
-#define floatx_compare float64_compare
-#define floatx_compare_quiet float64_compare_quiet
-#endif
-
 #ifdef VBOX
 # ifdef IPRT_NO_CRT
 #  undef  sin  
@@ -176,17 +100,6 @@ static inline void svm_check_intercept(u
 
 #define MAXTAN 9223372036854775808.0
 
-#ifdef USE_X86LDOUBLE
-
-/* only for x86 */
-typedef union {
-    long double d;
-    struct {
-        unsigned long long lower;
-        unsigned short upper;
-    } l;
-} CPU86_LDoubleU;
-
 /* the following deal with x86 long double-precision numbers */
 #define MAXEXPD 0x7fff
 #define EXPBIAS 16383
@@ -195,40 +108,6 @@ typedef union {
 #define MANTD(fp)       (fp.l.lower)
 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
 
-#else
-
-/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
-typedef union {
-    double d;
-#if !defined(HOST_WORDS_BIGENDIAN) && !defined(__arm__)
-    struct {
-        uint32_t lower;
-        int32_t upper;
-    } l;
-#else
-    struct {
-        int32_t upper;
-        uint32_t lower;
-    } l;
-#endif
-#ifndef __arm__
-    int64_t ll;
-#endif
-} CPU86_LDoubleU;
-
-/* the following deal with IEEE double-precision numbers */
-#define MAXEXPD 0x7ff
-#define EXPBIAS 1023
-#define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
-#define SIGND(fp)	((fp.l.upper) & 0x80000000)
-#ifdef __arm__
-#define MANTD(fp)	(fp.l.lower | ((uint64_t)(fp.l.upper & ((1 << 20) - 1)) << 32))
-#else
-#define MANTD(fp)	(fp.ll & ((1LL << 52) - 1))
-#endif
-#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7ff << 20)) | (EXPBIAS << 20)
-#endif
-
 static inline void fpush(void)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -241,65 +120,24 @@ static inline void fpop(void)
     env->fpstt = (env->fpstt + 1) & 7;
 }
 
-#ifndef USE_X86LDOUBLE
-static inline CPU86_LDouble helper_fldt(target_ulong ptr)
-{
-    CPU86_LDoubleU temp;
-    int upper, e;
-    uint64_t ll;
-
-    /* mantissa */
-    upper = lduw(ptr + 8);
-    /* XXX: handle overflow ? */
-    e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
-    e |= (upper >> 4) & 0x800; /* sign */
-    ll = (ldq(ptr) >> 11) & ((1LL << 52) - 1);
-#ifdef __arm__
-    temp.l.upper = (e << 20) | (ll >> 32);
-    temp.l.lower = ll;
-#else
-    temp.ll = ll | ((uint64_t)e << 52);
-#endif
-    return temp.d;
-}
-
-static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
-{
-    CPU86_LDoubleU temp;
-    int e;
-
-    temp.d = f;
-    /* mantissa */
-    stq(ptr, (MANTD(temp) << 11) | (1LL << 63));
-    /* exponent + sign */
-    e = EXPD(temp) - EXPBIAS + 16383;
-    e |= SIGND(temp) >> 16;
-    stw(ptr + 8, e);
-}
-#else
-
-/* we use memory access macros */
-
-static inline CPU86_LDouble helper_fldt(target_ulong ptr)
+static inline floatx80 helper_fldt(target_ulong ptr)
 {
-    CPU86_LDoubleU temp;
+    CPU_LDoubleU temp;
 
     temp.l.lower = ldq(ptr);
     temp.l.upper = lduw(ptr + 8);
     return temp.d;
 }
 
-static inline void helper_fstt(CPU86_LDouble f, target_ulong ptr)
+static inline void helper_fstt(floatx80 f, target_ulong ptr)
 {
-    CPU86_LDoubleU temp;
+    CPU_LDoubleU temp;
 
     temp.d = f;
     stq(ptr, temp.l.lower);
     stw(ptr + 8, temp.l.upper);
 }
 
-#endif /* USE_X86LDOUBLE */
-
 #define FPUS_IE (1 << 0)
 #define FPUS_DE (1 << 1)
 #define FPUS_ZE (1 << 2)
@@ -326,31 +164,6 @@ static inline void load_eflags(int eflag
         (eflags & update_mask) | 0x2;
 }
 
-static inline int cpu_has_work(CPUState *env)
-{
-    int work;
-
-    work = (env->interrupt_request & CPU_INTERRUPT_HARD) &&
-           (env->eflags & IF_MASK);
-    work |= env->interrupt_request & CPU_INTERRUPT_NMI;
-    work |= env->interrupt_request & CPU_INTERRUPT_INIT;
-    work |= env->interrupt_request & CPU_INTERRUPT_SIPI;
-
-    return work;
-}
-
-static inline int cpu_halted(CPUState *env) {
-    /* handle exit of HALTED state */
-    if (!env->halted)
-        return 0;
-    /* disable halt condition */
-    if (cpu_has_work(env)) {
-        env->halted = 0;
-        return 0;
-    }
-    return EXCP_HALTED;
-}
-
 /* load efer and update the corresponding hflags. XXX: do consistency
    checks with cpuid bits ? */
 static inline void cpu_load_efer(CPUState *env, uint64_t val)
@@ -362,9 +175,3 @@ static inline void cpu_load_efer(CPUStat
     if (env->efer & MSR_EFER_SVME)
         env->hflags |= HF_SVME_MASK;
 }
-
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
-    env->eip = tb->pc - tb->cs_base;
-}
-
--- target-i386/helper.c	2013-12-18 11:11:38.000000000 -0500
+++ target-i386/helper.c	2014-01-14 15:21:31.000000000 -0500
@@ -32,13 +32,17 @@
 #include <string.h>
 #ifndef VBOX
 #include <inttypes.h>
-#include <signal.h>
 #endif /* !VBOX */
 
 #include "cpu.h"
-#include "exec-all.h"
 #include "qemu-common.h"
 #include "kvm.h"
+#ifndef CONFIG_USER_ONLY
+#ifndef VBOX
+#include "sysemu.h"
+#include "monitor.h"
+#endif /* !VBOX */
+#endif
 
 //#define DEBUG_MMU
 
@@ -109,22 +113,50 @@ void cpu_reset(CPUX86State *env)
 
     env->mxcsr = 0x1f80;
 
+    env->pat = 0x0007040600070406ULL;
+
     memset(env->dr, 0, sizeof(env->dr));
     env->dr[6] = DR6_FIXED_1;
     env->dr[7] = DR7_FIXED_1;
     cpu_breakpoint_remove_all(env, BP_CPU);
     cpu_watchpoint_remove_all(env, BP_CPU);
+}
 
+void cpu_x86_close(CPUX86State *env)
+{
 #ifndef VBOX
-    env->mcg_status = 0;
+    qemu_free(env);
 #endif
 }
 
-void cpu_x86_close(CPUX86State *env)
+static void cpu_x86_version(CPUState *env, int *family, int *model)
 {
 #ifndef VBOX
-    qemu_free(env);
+    int cpuver = env->cpuid_version;
+
+    if (family == NULL || model == NULL) {
+        return;
+    }
+
+    *family = (cpuver >> 8) & 0x0f;
+    *model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0x0f);
+#endif
+}
+
+/* Broadcast MCA signal for processor version 06H_EH and above */
+int cpu_x86_support_mca_broadcast(CPUState *env)
+{
+#ifndef VBOX
+    int family = 0;
+    int model = 0;
+
+    cpu_x86_version(env, &family, &model);
+    if ((family == 6 && model >= 14) || family > 6) {
+        return 1;
+    }
 #endif
+
+    return 0;
 }
 
 /***********************************************************/
@@ -186,8 +218,7 @@ static const char *cc_op_str[] = {
 };
 
 static void
-cpu_x86_dump_seg_cache(CPUState *env, FILE *f,
-                       int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+cpu_x86_dump_seg_cache(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
                        const char *name, struct SegmentCache *sc)
 {
 #ifdef VBOX
@@ -196,12 +227,12 @@ cpu_x86_dump_seg_cache(CPUState *env, FI
 #ifdef TARGET_X86_64
     if (env->hflags & HF_CS64_MASK) {
         cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
-                    sc->selector, sc->base, sc->limit, sc->flags);
+                    sc->selector, sc->base, sc->limit, sc->flags & 0x00ffff00);
     } else
 #endif
     {
         cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector,
-                    (uint32_t)sc->base, sc->limit, sc->flags);
+                    (uint32_t)sc->base, sc->limit, sc->flags & 0x00ffff00);
     }
 
     if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK))
@@ -247,8 +278,10 @@ done:
 #endif
 }
 
-void cpu_dump_state(CPUState *env, FILE *f,
-                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+#define DUMP_CODE_BYTES_TOTAL    50
+#define DUMP_CODE_BYTES_BACKWARD 20
+
+void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
                     int flags)
 {
     int eflags, i, nb;
@@ -361,9 +394,11 @@ void cpu_dump_state(CPUState *env, FILE 
                     (uint32_t)env->cr[2],
                     (uint32_t)env->cr[3],
                     (uint32_t)env->cr[4]);
-        for(i = 0; i < 4; i++)
-            cpu_fprintf(f, "DR%d=%08x ", i, env->dr[i]);
-        cpu_fprintf(f, "\nDR6=%08x DR7=%08x\n", env->dr[6], env->dr[7]);
+        for(i = 0; i < 4; i++) {
+            cpu_fprintf(f, "DR%d=" TARGET_FMT_lx " ", i, env->dr[i]);
+        }
+        cpu_fprintf(f, "\nDR6=" TARGET_FMT_lx " DR7=" TARGET_FMT_lx "\n",
+                    env->dr[6], env->dr[7]);
     }
     if (flags & X86_DUMP_CCOP) {
         if ((unsigned)env->cc_op < CC_OP_NB)
@@ -397,21 +432,10 @@ void cpu_dump_state(CPUState *env, FILE 
                     fptag,
                     env->mxcsr);
         for(i=0;i<8;i++) {
-#if defined(USE_X86LDOUBLE)
-            union {
-                long double d;
-                struct {
-                    uint64_t lower;
-                    uint16_t upper;
-                } l;
-            } tmp;
-            tmp.d = env->fpregs[i].d;
+            CPU_LDoubleU u;
+            u.d = env->fpregs[i].d;
             cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x",
-                        i, tmp.l.lower, tmp.l.upper);
-#else
-            cpu_fprintf(f, "FPR%d=%016" PRIx64,
-                        i, env->fpregs[i].mmx.q);
-#endif
+                        i, u.l.lower, u.l.upper);
             if ((i & 1) == 1)
                 cpu_fprintf(f, "\n");
             else
@@ -434,6 +458,26 @@ void cpu_dump_state(CPUState *env, FILE 
                 cpu_fprintf(f, " ");
         }
     }
+#ifndef VBOX
+    if (flags & CPU_DUMP_CODE) {
+        target_ulong base = env->segs[R_CS].base + env->eip;
+        target_ulong offs = MIN(env->eip, DUMP_CODE_BYTES_BACKWARD);
+        uint8_t code;
+        char codestr[3];
+
+        cpu_fprintf(f, "Code=");
+        for (i = 0; i < DUMP_CODE_BYTES_TOTAL; i++) {
+            if (cpu_memory_rw_debug(env, base - offs + i, &code, 1, 0) == 0) {
+                snprintf(codestr, sizeof(codestr), "%02x", code);
+            } else {
+                snprintf(codestr, sizeof(codestr), "??");
+            }
+            cpu_fprintf(f, "%s%s%s%s", i > 0 ? " " : "",
+                        i == offs ? "<" : "", codestr, i == offs ? ">" : "");
+        }
+        cpu_fprintf(f, "\n");
+    }
+#endif
 #ifdef VBOX
 # undef cpu_fprintf
 #endif
@@ -1059,58 +1103,140 @@ static void breakpoint_handler(CPUState 
 }
 
 #ifndef VBOX
-/* This should come from sysemu.h - if we could include it here... */
-void qemu_system_reset_request(void);
+typedef struct MCEInjectionParams {
+    Monitor *mon;
+    CPUState *env;
+    int bank;
+    uint64_t status;
+    uint64_t mcg_status;
+    uint64_t addr;
+    uint64_t misc;
+    int flags;
+} MCEInjectionParams;
+
+static void do_inject_x86_mce(void *data)
+{
+    MCEInjectionParams *params = data;
+    CPUState *cenv = params->env;
+    uint64_t *banks = cenv->mce_banks + 4 * params->bank;
 
-void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
-                        uint64_t mcg_status, uint64_t addr, uint64_t misc)
-{
-    uint64_t mcg_cap = cenv->mcg_cap;
-    unsigned bank_num = mcg_cap & 0xff;
-    uint64_t *banks = cenv->mce_banks;
-
-    if (bank >= bank_num || !(status & MCI_STATUS_VAL))
-        return;
+    cpu_synchronize_state(cenv);
 
     /*
-     * if MSR_MCG_CTL is not all 1s, the uncorrected error
-     * reporting is disabled
+     * If there is an MCE exception being processed, ignore this SRAO MCE
+     * unless unconditional injection was requested.
      */
-    if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
-        cenv->mcg_ctl != ~(uint64_t)0)
+    if (!(params->flags & MCE_INJECT_UNCOND_AO)
+        && !(params->status & MCI_STATUS_AR)
+        && (cenv->mcg_status & MCG_STATUS_MCIP)) {
         return;
-    banks += 4 * bank;
-    /*
-     * if MSR_MCi_CTL is not all 1s, the uncorrected error
-     * reporting is disabled for the bank
-     */
-    if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0)
-        return;
-    if (status & MCI_STATUS_UC) {
+    }
+
+    if (params->status & MCI_STATUS_UC) {
+        /*
+         * if MSR_MCG_CTL is not all 1s, the uncorrected error
+         * reporting is disabled
+         */
+        if ((cenv->mcg_cap & MCG_CTL_P) && cenv->mcg_ctl != ~(uint64_t)0) {
+            monitor_printf(params->mon,
+                           "CPU %d: Uncorrected error reporting disabled\n",
+                           cenv->cpu_index);
+            return;
+        }
+
+        /*
+         * if MSR_MCi_CTL is not all 1s, the uncorrected error
+         * reporting is disabled for the bank
+         */
+        if (banks[0] != ~(uint64_t)0) {
+            monitor_printf(params->mon,
+                           "CPU %d: Uncorrected error reporting disabled for"
+                           " bank %d\n",
+                           cenv->cpu_index, params->bank);
+            return;
+        }
+
         if ((cenv->mcg_status & MCG_STATUS_MCIP) ||
             !(cenv->cr[4] & CR4_MCE_MASK)) {
-            fprintf(stderr, "injects mce exception while previous "
-                    "one is in progress!\n");
+            monitor_printf(params->mon,
+                           "CPU %d: Previous MCE still in progress, raising"
+                           " triple fault\n",
+                           cenv->cpu_index);
             qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
             qemu_system_reset_request();
             return;
         }
-        if (banks[1] & MCI_STATUS_VAL)
-            status |= MCI_STATUS_OVER;
-        banks[2] = addr;
-        banks[3] = misc;
-        cenv->mcg_status = mcg_status;
-        banks[1] = status;
+        if (banks[1] & MCI_STATUS_VAL) {
+            params->status |= MCI_STATUS_OVER;
+        }
+        banks[2] = params->addr;
+        banks[3] = params->misc;
+        cenv->mcg_status = params->mcg_status;
+        banks[1] = params->status;
         cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
     } else if (!(banks[1] & MCI_STATUS_VAL)
                || !(banks[1] & MCI_STATUS_UC)) {
-        if (banks[1] & MCI_STATUS_VAL)
-            status |= MCI_STATUS_OVER;
-        banks[2] = addr;
-        banks[3] = misc;
-        banks[1] = status;
-    } else
+        if (banks[1] & MCI_STATUS_VAL) {
+            params->status |= MCI_STATUS_OVER;
+        }
+        banks[2] = params->addr;
+        banks[3] = params->misc;
+        banks[1] = params->status;
+    } else {
         banks[1] |= MCI_STATUS_OVER;
+    }
+}
+
+void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank,
+                        uint64_t status, uint64_t mcg_status, uint64_t addr,
+                        uint64_t misc, int flags)
+{
+    MCEInjectionParams params = {
+        .mon = mon,
+        .env = cenv,
+        .bank = bank,
+        .status = status,
+        .mcg_status = mcg_status,
+        .addr = addr,
+        .misc = misc,
+        .flags = flags,
+    };
+    unsigned bank_num = cenv->mcg_cap & 0xff;
+    CPUState *env;
+
+    if (!cenv->mcg_cap) {
+        monitor_printf(mon, "MCE injection not supported\n");
+        return;
+    }
+    if (bank >= bank_num) {
+        monitor_printf(mon, "Invalid MCE bank number\n");
+        return;
+    }
+    if (!(status & MCI_STATUS_VAL)) {
+        monitor_printf(mon, "Invalid MCE status code\n");
+        return;
+    }
+    if ((flags & MCE_INJECT_BROADCAST)
+        && !cpu_x86_support_mca_broadcast(cenv)) {
+        monitor_printf(mon, "Guest CPU does not support MCA broadcast\n");
+        return;
+    }
+
+    run_on_cpu(cenv, do_inject_x86_mce, &params);
+    if (flags & MCE_INJECT_BROADCAST) {
+        params.bank = 1;
+        params.status = MCI_STATUS_VAL | MCI_STATUS_UC;
+        params.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
+        params.addr = 0;
+        params.misc = 0;
+        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+            if (cenv == env) {
+               continue;
+            }
+            params.env = env;
+            run_on_cpu(cenv, do_inject_x86_mce, &params);
+        }
+    }
 }
 #endif /* !VBOX */
 #endif /* !CONFIG_USER_ONLY */
@@ -1119,15 +1245,16 @@ void cpu_inject_x86_mce(CPUState *cenv, 
 
 static void mce_init(CPUX86State *cenv)
 {
-    unsigned int bank, bank_num;
+    unsigned int bank;
 
-    if (((cenv->cpuid_version >> 8)&0xf) >= 6
-        && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) {
+    if (((cenv->cpuid_version >> 8) & 0xf) >= 6
+        && (cenv->cpuid_features & (CPUID_MCE | CPUID_MCA)) ==
+            (CPUID_MCE | CPUID_MCA)) {
         cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF;
         cenv->mcg_ctl = ~(uint64_t)0;
-        bank_num = MCE_BANKS_DEF;
-        for (bank = 0; bank < bank_num; bank++)
-            cenv->mce_banks[bank*4] = ~(uint64_t)0;
+        for (bank = 0; bank < MCE_BANKS_DEF; bank++) {
+            cenv->mce_banks[bank * 4] = ~(uint64_t)0;
+        }
     }
 }
 
@@ -1206,8 +1333,11 @@ CPUX86State *cpu_x86_init(CPUX86State *e
 void do_cpu_init(CPUState *env)
 {
     int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI;
+    uint64_t pat = env->pat;
+
     cpu_reset(env);
     env->interrupt_request = sipi;
+    env->pat = pat;
     apic_init_reset(env->apic_state);
     env->halted = !cpu_is_bsp(env);
 }
--- target-i386/op_helper.c	2013-12-18 11:11:38.000000000 -0500
+++ target-i386/op_helper.c	2014-01-14 15:21:31.000000000 -0500
@@ -26,8 +26,8 @@
  * of the LGPL is applied is otherwise unspecified.
  */
 
+#include <math.h>
 #include "exec.h"
-#include "exec-all.h"
 #include "host-utils.h"
 #include "ioport.h"
 
@@ -109,16 +109,9 @@ static const uint8_t rclb_table[32] = {
     6, 7, 8, 0, 1, 2, 3, 4,
 };
 
-static const CPU86_LDouble f15rk[7] =
-{
-    0.00000000000000000000L,
-    1.00000000000000000000L,
-    3.14159265358979323851L,  /*pi*/
-    0.30102999566398119523L,  /*lg2*/
-    0.69314718055994530943L,  /*ln2*/
-    1.44269504088896340739L,  /*l2e*/
-    3.32192809488736234781L,  /*l2t*/
-};
+#define floatx80_lg2 make_floatx80( 0x3ffd, 0x9a209a84fbcff799LL )
+#define floatx80_l2e make_floatx80( 0x3fff, 0xb8aa3b295c17f0bcLL )
+#define floatx80_l2t make_floatx80( 0x4000, 0xd49a784bcd1b8afeLL )
 
 /* broken thread support */
 
@@ -432,6 +425,10 @@ static void switch_tss(int tss_selector,
         if ((type & 7) != 1)
             raise_exception_err(EXCP0D_GPF, tss_selector & 0xfffc);
     }
+    /* XXX: avoid a compiler warning, see
+     http://support.amd.com/us/Processor_TechDocs/24593.pdf
+     chapters 12.2.5 and 13.2.4 on how to implement TSS Trap bit */
+    (void)new_trap;
 
     if (!(e2 & DESC_P_MASK))
         raise_exception_err(EXCP0B_NOSEG, tss_selector & 0xfffc);
@@ -895,7 +892,7 @@ static void do_interrupt_protected(int i
 
 #ifdef VBOX
     if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
-        cpu_loop_exit();
+        cpu_loop_exit(env);
 #endif
 
     has_error_code = 0;
@@ -1260,7 +1257,7 @@ static void do_interrupt64(int intno, in
 
 #ifdef VBOX
     if (remR3NotifyTrap(env, intno, error_code, next_eip) != VINF_SUCCESS)
-        cpu_loop_exit();
+        cpu_loop_exit(env);
 #endif
 
     has_error_code = 0;
@@ -1389,7 +1386,7 @@ void helper_syscall(int next_eip_addend)
 {
     env->exception_index = EXCP_SYSCALL;
     env->exception_next_eip = env->eip + next_eip_addend;
-    cpu_loop_exit();
+    cpu_loop_exit(env);
 }
 #else
 void helper_syscall(int next_eip_addend)
@@ -1604,9 +1601,10 @@ static void do_interrupt_real(int intno,
     env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
 }
 
+#if defined(CONFIG_USER_ONLY)
 /* fake user mode interrupt */
-void do_interrupt_user(int intno, int is_int, int error_code,
-                       target_ulong next_eip)
+static void do_interrupt_user(int intno, int is_int, int error_code,
+                              target_ulong next_eip)
 {
     SegmentCache *dt;
     target_ulong ptr;
@@ -1635,7 +1633,8 @@ void do_interrupt_user(int intno, int is
         EIP = next_eip;
 }
 
-#if !defined(CONFIG_USER_ONLY)
+#else
+
 static void handle_even_inj(int intno, int is_int, int error_code,
 		int is_hw, int rm)
 {
@@ -1661,8 +1660,8 @@ static void handle_even_inj(int intno, i
  * the int instruction. next_eip is the EIP value AFTER the interrupt
  * instruction. It is only relevant if is_int is TRUE.
  */
-void do_interrupt(int intno, int is_int, int error_code,
-                  target_ulong next_eip, int is_hw)
+static void do_interrupt_all(int intno, int is_int, int error_code,
+                             target_ulong next_eip, int is_hw)
 {
     if (qemu_loglevel_mask(CPU_LOG_INT)) {
         if ((env->cr[0] & CR0_PE_MASK)) {
@@ -1746,6 +1745,54 @@ void do_interrupt(int intno, int is_int,
 #endif
 }
 
+void do_interrupt(CPUState *env1)
+{
+    CPUState *saved_env;
+
+    saved_env = env;
+    env = env1;
+#if defined(CONFIG_USER_ONLY)
+    /* if user mode only, we simulate a fake exception
+       which will be handled outside the cpu execution
+       loop */
+    do_interrupt_user(env->exception_index,
+                      env->exception_is_int,
+                      env->error_code,
+                      env->exception_next_eip);
+    /* successfully delivered */
+    env->old_exception = -1;
+#else
+    /* simulate a real cpu exception. On i386, it can
+       trigger new exceptions, but we do not handle
+       double or triple faults yet. */
+# ifdef IEM_VERIFICATION_MODE /* Ugly hack*/
+    do_interrupt_all(env->exception_index,
+                     env->exception_is_int && env->exception_is_int != 0x42,
+                     env->error_code,
+                     env->exception_next_eip,
+                     env->exception_is_int == 0x42);
+# else
+    do_interrupt_all(env->exception_index,
+                     env->exception_is_int,
+                     env->error_code,
+                     env->exception_next_eip, 0);
+# endif
+    /* successfully delivered */
+    env->old_exception = -1;
+#endif
+    env = saved_env;
+}
+
+void do_interrupt_x86_hardirq(CPUState *env1, int intno, int is_hw)
+{
+    CPUState *saved_env;
+
+    saved_env = env;
+    env = env1;
+    do_interrupt_all(intno, 0, 0, 0, is_hw);
+    env = saved_env;
+}
+
 /* This should come from sysemu.h - if we could include it here... */
 void qemu_system_reset_request(void);
 
@@ -1819,7 +1866,7 @@ static void QEMU_NORETURN raise_interrup
     env->error_code = error_code;
     env->exception_is_int = is_int;
     env->exception_next_eip = env->eip + next_eip_addend;
-    cpu_loop_exit();
+    cpu_loop_exit(env);
 }
 
 /* shortcuts to generate exceptions */
@@ -1843,7 +1890,7 @@ void raise_exception_env(int exception_i
 
 #if defined(CONFIG_USER_ONLY)
 
-void do_smm_enter(void)
+void do_smm_enter(CPUState *env1)
 {
 }
 
@@ -1859,11 +1906,15 @@ void helper_rsm(void)
 #define SMM_REVISION_ID 0x00020000
 #endif
 
-void do_smm_enter(void)
+void do_smm_enter(CPUState *env1)
 {
     target_ulong sm_state;
     SegmentCache *dt;
     int i, offset;
+    CPUState *saved_env;
+
+    saved_env = env;
+    env = env1;
 
     qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
     log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
@@ -1990,6 +2041,7 @@ void do_smm_enter(void)
     cpu_x86_update_cr4(env, 0);
     env->dr[7] = 0x00000400;
     CC_OP = CC_OP_EFLAGS;
+    env = saved_env;
 }
 
 void helper_rsm(void)
@@ -4263,6 +4315,28 @@ void helper_verw(target_ulong selector1)
 
 /* x87 FPU helpers */
 
+static inline double floatx80_to_double(floatx80 a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.f64 = floatx80_to_float64(a, &env->fp_status);
+    return u.d;
+}
+
+static inline floatx80 double_to_floatx80(double a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.d = a;
+    return float64_to_floatx80(u.f64, &env->fp_status);
+}
+
 static void fpu_set_exception(int mask)
 {
     env->fpus |= mask;
@@ -4270,11 +4344,12 @@ static void fpu_set_exception(int mask)
         env->fpus |= FPUS_SE | FPUS_B;
 }
 
-static inline CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b)
+static inline floatx80 helper_fdiv(floatx80 a, floatx80 b)
 {
-    if (b == 0.0)
+    if (floatx80_is_zero(b)) {
         fpu_set_exception(FPUS_ZE);
-    return a / b;
+    }
+    return floatx80_div(a, b, &env->fp_status);
 }
 
 static void fpu_raise_exception(void)
@@ -4296,7 +4371,7 @@ void helper_flds_FT0(uint32_t val)
         uint32_t i;
     } u;
     u.i = val;
-    FT0 = float32_to_floatx(u.f, &env->fp_status);
+    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 }
 
 void helper_fldl_FT0(uint64_t val)
@@ -4306,12 +4381,12 @@ void helper_fldl_FT0(uint64_t val)
         uint64_t i;
     } u;
     u.i = val;
-    FT0 = float64_to_floatx(u.f, &env->fp_status);
+    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 }
 
 void helper_fildl_FT0(int32_t val)
 {
-    FT0 = int32_to_floatx(val, &env->fp_status);
+    FT0 = int32_to_floatx80(val, &env->fp_status);
 }
 
 void helper_flds_ST0(uint32_t val)
@@ -4323,7 +4398,7 @@ void helper_flds_ST0(uint32_t val)
     } u;
     new_fpstt = (env->fpstt - 1) & 7;
     u.i = val;
-    env->fpregs[new_fpstt].d = float32_to_floatx(u.f, &env->fp_status);
+    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -4337,7 +4412,7 @@ void helper_fldl_ST0(uint64_t val)
     } u;
     new_fpstt = (env->fpstt - 1) & 7;
     u.i = val;
-    env->fpregs[new_fpstt].d = float64_to_floatx(u.f, &env->fp_status);
+    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -4346,7 +4421,7 @@ void helper_fildl_ST0(int32_t val)
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = int32_to_floatx(val, &env->fp_status);
+    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -4355,7 +4430,7 @@ void helper_fildll_ST0(int64_t val)
 {
     int new_fpstt;
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = int64_to_floatx(val, &env->fp_status);
+    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
@@ -4370,7 +4445,7 @@ RTCCUINTREG helper_fsts_ST0(void)
         float32 f;
         uint32_t i;
     } u;
-    u.f = floatx_to_float32(ST0, &env->fp_status);
+    u.f = floatx80_to_float32(ST0, &env->fp_status);
     return u.i;
 }
 
@@ -4380,7 +4455,7 @@ uint64_t helper_fstl_ST0(void)
         float64 f;
         uint64_t i;
     } u;
-    u.f = floatx_to_float64(ST0, &env->fp_status);
+    u.f = floatx80_to_float64(ST0, &env->fp_status);
     return u.i;
 }
 
@@ -4391,7 +4466,7 @@ RTCCINTREG helper_fist_ST0(void)
 #endif
 {
     int32_t val;
-    val = floatx_to_int32(ST0, &env->fp_status);
+    val = floatx80_to_int32(ST0, &env->fp_status);
     if (val != (int16_t)val)
         val = -32768;
     return val;
@@ -4404,14 +4479,14 @@ RTCCINTREG helper_fistl_ST0(void)
 #endif
 {
     int32_t val;
-    val = floatx_to_int32(ST0, &env->fp_status);
+    val = floatx80_to_int32(ST0, &env->fp_status);
     return val;
 }
 
 int64_t helper_fistll_ST0(void)
 {
     int64_t val;
-    val = floatx_to_int64(ST0, &env->fp_status);
+    val = floatx80_to_int64(ST0, &env->fp_status);
     return val;
 }
 
@@ -4422,7 +4497,7 @@ RTCCINTREG helper_fistt_ST0(void)
 #endif
 {
     int32_t val;
-    val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
     if (val != (int16_t)val)
         val = -32768;
     return val;
@@ -4435,14 +4510,14 @@ RTCCINTREG helper_fisttl_ST0(void)
 #endif
 {
     int32_t val;
-    val = floatx_to_int32_round_to_zero(ST0, &env->fp_status);
+    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
     return val;
 }
 
 int64_t helper_fisttll_ST0(void)
 {
     int64_t val;
-    val = floatx_to_int64_round_to_zero(ST0, &env->fp_status);
+    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
     return val;
 }
 
@@ -4511,7 +4586,7 @@ void helper_fmov_STN_ST0(int st_index)
 
 void helper_fxchg_ST0_STN(int st_index)
 {
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     tmp = ST(st_index);
     ST(st_index) = ST0;
     ST0 = tmp;
@@ -4525,7 +4600,7 @@ void helper_fcom_ST0_FT0(void)
 {
     int ret;
 
-    ret = floatx_compare(ST0, FT0, &env->fp_status);
+    ret = floatx80_compare(ST0, FT0, &env->fp_status);
     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 }
 
@@ -4533,7 +4608,7 @@ void helper_fucom_ST0_FT0(void)
 {
     int ret;
 
-    ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
 }
 
@@ -4544,7 +4619,7 @@ void helper_fcomi_ST0_FT0(void)
     int eflags;
     int ret;
 
-    ret = floatx_compare(ST0, FT0, &env->fp_status);
+    ret = floatx80_compare(ST0, FT0, &env->fp_status);
     eflags = helper_cc_compute_all(CC_OP);
     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
     CC_SRC = eflags;
@@ -4555,7 +4630,7 @@ void helper_fucomi_ST0_FT0(void)
     int eflags;
     int ret;
 
-    ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
+    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
     eflags = helper_cc_compute_all(CC_OP);
     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
     CC_SRC = eflags;
@@ -4563,22 +4638,22 @@ void helper_fucomi_ST0_FT0(void)
 
 void helper_fadd_ST0_FT0(void)
 {
-    ST0 += FT0;
+    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 }
 
 void helper_fmul_ST0_FT0(void)
 {
-    ST0 *= FT0;
+    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 }
 
 void helper_fsub_ST0_FT0(void)
 {
-    ST0 -= FT0;
+    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 }
 
 void helper_fsubr_ST0_FT0(void)
 {
-    ST0 = FT0 - ST0;
+    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 }
 
 void helper_fdiv_ST0_FT0(void)
@@ -4595,36 +4670,34 @@ void helper_fdivr_ST0_FT0(void)
 
 void helper_fadd_STN_ST0(int st_index)
 {
-    ST(st_index) += ST0;
+    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 }
 
 void helper_fmul_STN_ST0(int st_index)
 {
-    ST(st_index) *= ST0;
+    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 }
 
 void helper_fsub_STN_ST0(int st_index)
 {
-    ST(st_index) -= ST0;
+    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 }
 
 void helper_fsubr_STN_ST0(int st_index)
 {
-    CPU86_LDouble *p;
-    p = &ST(st_index);
-    *p = ST0 - *p;
+    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 }
 
 void helper_fdiv_STN_ST0(int st_index)
 {
-    CPU86_LDouble *p;
+    floatx80 *p;
     p = &ST(st_index);
     *p = helper_fdiv(*p, ST0);
 }
 
 void helper_fdivr_STN_ST0(int st_index)
 {
-    CPU86_LDouble *p;
+    floatx80 *p;
     p = &ST(st_index);
     *p = helper_fdiv(ST0, *p);
 }
@@ -4632,52 +4705,52 @@ void helper_fdivr_STN_ST0(int st_index)
 /* misc FPU operations */
 void helper_fchs_ST0(void)
 {
-    ST0 = floatx_chs(ST0);
+    ST0 = floatx80_chs(ST0);
 }
 
 void helper_fabs_ST0(void)
 {
-    ST0 = floatx_abs(ST0);
+    ST0 = floatx80_abs(ST0);
 }
 
 void helper_fld1_ST0(void)
 {
-    ST0 = f15rk[1];
+    ST0 = floatx80_one;
 }
 
 void helper_fldl2t_ST0(void)
 {
-    ST0 = f15rk[6];
+    ST0 = floatx80_l2t;
 }
 
 void helper_fldl2e_ST0(void)
 {
-    ST0 = f15rk[5];
+    ST0 = floatx80_l2e;
 }
 
 void helper_fldpi_ST0(void)
 {
-    ST0 = f15rk[2];
+    ST0 = floatx80_pi;
 }
 
 void helper_fldlg2_ST0(void)
 {
-    ST0 = f15rk[3];
+    ST0 = floatx80_lg2;
 }
 
 void helper_fldln2_ST0(void)
 {
-    ST0 = f15rk[4];
+    ST0 = floatx80_ln2;
 }
 
 void helper_fldz_ST0(void)
 {
-    ST0 = f15rk[0];
+    ST0 = floatx80_zero;
 }
 
 void helper_fldz_FT0(void)
 {
-    FT0 = f15rk[0];
+    FT0 = floatx80_zero;
 }
 
 #ifndef VBOX
@@ -4719,7 +4792,6 @@ static void update_fp_status(void)
         break;
     }
     set_float_rounding_mode(rnd_type, &env->fp_status);
-#ifdef FLOATX80
     switch((env->fpuc >> 8) & 3) {
     case 0:
         rnd_type = 32;
@@ -4733,7 +4805,6 @@ static void update_fp_status(void)
         break;
     }
     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
-#endif
 }
 
 void helper_fldcw(uint32_t val)
@@ -4772,7 +4843,7 @@ void helper_fninit(void)
 
 void helper_fbld_ST0(target_ulong ptr)
 {
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     uint64_t val;
     unsigned int v;
     int i;
@@ -4782,9 +4853,10 @@ void helper_fbld_ST0(target_ulong ptr)
         v = ldub(ptr + i);
         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
     }
-    tmp = val;
-    if (ldub(ptr + 9) & 0x80)
-        tmp = -tmp;
+    tmp = int64_to_floatx80(val, &env->fp_status);
+    if (ldub(ptr + 9) & 0x80) {
+        floatx80_chs(tmp);
+    }
     fpush();
     ST0 = tmp;
 }
@@ -4795,7 +4867,7 @@ void helper_fbst_ST0(target_ulong ptr)
     target_ulong mem_ref, mem_end;
     int64_t val;
 
-    val = floatx_to_int64(ST0, &env->fp_status);
+    val = floatx80_to_int64(ST0, &env->fp_status);
     mem_ref = ptr;
     mem_end = mem_ref + 9;
     if (val < 0) {
@@ -4819,17 +4891,19 @@ void helper_fbst_ST0(target_ulong ptr)
 
 void helper_f2xm1(void)
 {
-    ST0 = pow(2.0,ST0) - 1.0;
+    double val = floatx80_to_double(ST0);
+    val = pow(2.0, val) - 1.0;
+    ST0 = double_to_floatx80(val);
 }
 
 void helper_fyl2x(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = floatx80_to_double(ST0);
 
-    fptemp = ST0;
     if (fptemp>0.0){
-        fptemp = log(fptemp)/log(2.0);	 /* log2(ST) */
-        ST1 *= fptemp;
+        fptemp = log(fptemp)/log(2.0);    /* log2(ST) */
+        fptemp *= floatx80_to_double(ST1);
+        ST1 = double_to_floatx80(fptemp);
         fpop();
     } else {
         env->fpus &= (~0x4700);
@@ -4839,15 +4913,15 @@ void helper_fyl2x(void)
 
 void helper_fptan(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = floatx80_to_double(ST0);
 
-    fptemp = ST0;
     if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = tan(fptemp);
+        fptemp = tan(fptemp);
+        ST0 = double_to_floatx80(fptemp);
         fpush();
-        ST0 = 1.0;
+        ST0 = floatx80_one;
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg| < 2**52 only */
     }
@@ -4855,49 +4929,57 @@ void helper_fptan(void)
 
 void helper_fpatan(void)
 {
-    CPU86_LDouble fptemp, fpsrcop;
+    double fptemp, fpsrcop;
 
-    fpsrcop = ST1;
-    fptemp = ST0;
-    ST1 = atan2(fpsrcop,fptemp);
+    fpsrcop = floatx80_to_double(ST1);
+    fptemp = floatx80_to_double(ST0);
+    ST1 = double_to_floatx80(atan2(fpsrcop, fptemp));
     fpop();
 }
 
 void helper_fxtract(void)
 {
-    CPU86_LDoubleU temp;
-    unsigned int expdif;
+    CPU_LDoubleU temp;
 
     temp.d = ST0;
-    expdif = EXPD(temp) - EXPBIAS;
-    /*DP exponent bias*/
-    ST0 = expdif;
-    fpush();
-    BIASEXPONENT(temp);
-    ST0 = temp.d;
+
+    if (floatx80_is_zero(ST0)) {
+        /* Easy way to generate -inf and raising division by 0 exception */
+        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, &env->fp_status);
+        fpush();
+        ST0 = temp.d;
+    } else {
+        int expdif;
+
+        expdif = EXPD(temp) - EXPBIAS;
+        /*DP exponent bias*/
+        ST0 = int32_to_floatx80(expdif, &env->fp_status);
+        fpush();
+        BIASEXPONENT(temp);
+        ST0 = temp.d;
+    }
 }
 
 void helper_fprem1(void)
 {
-    CPU86_LDouble dblq, fpsrcop, fptemp;
-    CPU86_LDoubleU fpsrcop1, fptemp1;
+    double st0, st1, dblq, fpsrcop, fptemp;
+    CPU_LDoubleU fpsrcop1, fptemp1;
     int expdif;
     signed long long int q;
 
-#ifndef VBOX /* Unfortunately, we cannot handle isinf/isnan easily in wrapper */
-    if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
-#else
-    if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) {
-#endif
-        ST0 = 0.0 / 0.0; /* NaN */
+    st0 = floatx80_to_double(ST0);
+    st1 = floatx80_to_double(ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+        ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */
         env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
         return;
     }
 
-    fpsrcop = ST0;
-    fptemp = ST1;
-    fpsrcop1.d = fpsrcop;
-    fptemp1.d = fptemp;
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 
     if (expdif < 0) {
@@ -4911,7 +4993,7 @@ void helper_fprem1(void)
         dblq = fpsrcop / fptemp;
         /* round dblq towards nearest integer */
         dblq = rint(dblq);
-        ST0 = fpsrcop - fptemp * dblq;
+        st0 = fpsrcop - fptemp * dblq;
 
         /* convert dblq to q by truncating towards zero */
         if (dblq < 0.0)
@@ -4927,35 +5009,35 @@ void helper_fprem1(void)
     } else {
         env->fpus |= 0x400;  /* C2 <-- 1 */
         fptemp = pow(2.0, expdif - 50);
-        fpsrcop = (ST0 / ST1) / fptemp;
+        fpsrcop = (st0 / st1) / fptemp;
         /* fpsrcop = integer obtained by chopping */
         fpsrcop = (fpsrcop < 0.0) ?
                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        ST0 -= (ST1 * fpsrcop * fptemp);
+        st0 -= (st1 * fpsrcop * fptemp);
     }
+    ST0 = double_to_floatx80(st0);
 }
 
 void helper_fprem(void)
 {
-    CPU86_LDouble dblq, fpsrcop, fptemp;
-    CPU86_LDoubleU fpsrcop1, fptemp1;
+    double st0, st1, dblq, fpsrcop, fptemp;
+    CPU_LDoubleU fpsrcop1, fptemp1;
     int expdif;
     signed long long int q;
 
-#ifndef VBOX /* Unfortunately, we cannot easily handle isinf/isnan in wrapper */
-    if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
-#else
-    if ((ST0 != ST0) || (ST1 != ST1) || (ST1 == 0.0)) {
-#endif
-       ST0 = 0.0 / 0.0; /* NaN */
+    st0 = floatx80_to_double(ST0);
+    st1 = floatx80_to_double(ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+       ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */
        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
        return;
     }
 
-    fpsrcop = (CPU86_LDouble)ST0;
-    fptemp = (CPU86_LDouble)ST1;
-    fpsrcop1.d = fpsrcop;
-    fptemp1.d = fptemp;
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 
     if (expdif < 0) {
@@ -4969,7 +5051,7 @@ void helper_fprem(void)
         dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/;
         /* round dblq towards zero */
         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
-        ST0 = fpsrcop/*ST0*/ - fptemp * dblq;
+        st0 = fpsrcop/*ST0*/ - fptemp * dblq;
 
         /* convert dblq to q by truncating towards zero */
         if (dblq < 0.0)
@@ -4986,22 +5068,23 @@ void helper_fprem(void)
         int N = 32 + (expdif % 32); /* as per AMD docs */
         env->fpus |= 0x400;  /* C2 <-- 1 */
         fptemp = pow(2.0, (double)(expdif - N));
-        fpsrcop = (ST0 / ST1) / fptemp;
+        fpsrcop = (st0 / st1) / fptemp;
         /* fpsrcop = integer obtained by chopping */
         fpsrcop = (fpsrcop < 0.0) ?
                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        ST0 -= (ST1 * fpsrcop * fptemp);
+        st0 -= (st1 * fpsrcop * fptemp);
     }
+    ST0 = double_to_floatx80(st0);
 }
 
 void helper_fyl2xp1(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = floatx80_to_double(ST0);
 
-    fptemp = ST0;
     if ((fptemp+1.0)>0.0) {
         fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
-        ST1 *= fptemp;
+        fptemp *= floatx80_to_double(ST1);
+        ST1 = double_to_floatx80(fptemp);
         fpop();
     } else {
         env->fpus &= (~0x4700);
@@ -5011,27 +5094,23 @@ void helper_fyl2xp1(void)
 
 void helper_fsqrt(void)
 {
-    CPU86_LDouble fptemp;
-
-    fptemp = ST0;
-    if (fptemp<0.0) {
+    if (floatx80_is_neg(ST0)) {
         env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
         env->fpus |= 0x400;
     }
-    ST0 = sqrt(fptemp);
+    ST0 = floatx80_sqrt(ST0, &env->fp_status);
 }
 
 void helper_fsincos(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = floatx80_to_double(ST0);
 
-    fptemp = ST0;
     if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = sin(fptemp);
+        ST0 = double_to_floatx80(sin(fptemp));
         fpush();
-        ST0 = cos(fptemp);
+        ST0 = double_to_floatx80(cos(fptemp));
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg| < 2**63 only */
     }
@@ -5039,23 +5118,27 @@ void helper_fsincos(void)
 
 void helper_frndint(void)
 {
-    ST0 = floatx_round_to_int(ST0, &env->fp_status);
+    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 }
 
 void helper_fscale(void)
 {
-    ST0 = ldexp (ST0, (int)(ST1));
+    if (floatx80_is_any_nan(ST1)) {
+        ST0 = ST1;
+    } else {
+        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
+        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
+    }
 }
 
 void helper_fsin(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = floatx80_to_double(ST0);
 
-    fptemp = ST0;
     if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = sin(fptemp);
+        ST0 = double_to_floatx80(sin(fptemp));
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg| < 2**53 only */
     }
@@ -5063,13 +5146,12 @@ void helper_fsin(void)
 
 void helper_fcos(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = floatx80_to_double(ST0);
 
-    fptemp = ST0;
     if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = cos(fptemp);
+        ST0 = double_to_floatx80(cos(fptemp));
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg5 < 2**63 only */
     }
@@ -5077,7 +5159,7 @@ void helper_fcos(void)
 
 void helper_fxam_ST0(void)
 {
-    CPU86_LDoubleU temp;
+    CPU_LDoubleU temp;
     int expdif;
 
     temp.d = ST0;
@@ -5089,11 +5171,7 @@ void helper_fxam_ST0(void)
     /* XXX: test fptags too */
     expdif = EXPD(temp);
     if (expdif == MAXEXPD) {
-#ifdef USE_X86LDOUBLE
         if (MANTD(temp) == 0x8000000000000000ULL)
-#else
-        if (MANTD(temp) == 0)
-#endif
             env->fpus |=  0x500 /*Infinity*/;
         else
             env->fpus |=  0x100 /*NaN*/;
@@ -5111,7 +5189,7 @@ void helper_fstenv(target_ulong ptr, int
 {
     int fpus, fptag, exp, i;
     uint64_t mant;
-    CPU86_LDoubleU tmp;
+    CPU_LDoubleU tmp;
 
     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
@@ -5127,9 +5205,7 @@ void helper_fstenv(target_ulong ptr, int
                 /* zero */
 	        fptag |= 1;
 	    } else if (exp == 0 || exp == MAXEXPD
-#ifdef USE_X86LDOUBLE
                        || (mant & (1LL << 63)) == 0
-#endif
                        ) {
                 /* NaNs, infinity, denormal */
                 fptag |= 2;
@@ -5181,7 +5257,7 @@ void helper_fldenv(target_ulong ptr, int
 
 void helper_fsave(target_ulong ptr, int data32)
 {
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     int i;
 
     helper_fstenv(ptr, data32);
@@ -5209,7 +5285,7 @@ void helper_fsave(target_ulong ptr, int 
 
 void helper_frstor(target_ulong ptr, int data32)
 {
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     int i;
 
     helper_fldenv(ptr, data32);
@@ -5225,7 +5301,7 @@ void helper_frstor(target_ulong ptr, int
 void helper_fxsave(target_ulong ptr, int data64)
 {
     int fpus, fptag, i, nb_xmm_regs;
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     target_ulong addr;
 
     /* The operand must be 16 byte aligned */
@@ -5286,7 +5362,7 @@ void helper_fxsave(target_ulong ptr, int
 void helper_fxrstor(target_ulong ptr, int data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     target_ulong addr;
 
     /* The operand must be 16 byte aligned */
@@ -5348,61 +5424,23 @@ void helper_fxrstor(target_ulong ptr, in
     }
 }
 
-#ifndef USE_X86LDOUBLE
-
-void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
 {
-    CPU86_LDoubleU temp;
-    int e;
-
-    temp.d = f;
-    /* mantissa */
-    *pmant = (MANTD(temp) << 11) | (1LL << 63);
-    /* exponent + sign */
-    e = EXPD(temp) - EXPBIAS + 16383;
-    e |= SIGND(temp) >> 16;
-    *pexp = e;
-}
-
-CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
-{
-    CPU86_LDoubleU temp;
-    int e;
-    uint64_t ll;
-
-    /* XXX: handle overflow ? */
-    e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
-    e |= (upper >> 4) & 0x800; /* sign */
-    ll = (mant >> 11) & ((1LL << 52) - 1);
-#ifdef __arm__
-    temp.l.upper = (e << 20) | (ll >> 32);
-    temp.l.lower = ll;
-#else
-    temp.ll = ll | ((uint64_t)e << 52);
-#endif
-    return temp.d;
-}
-
-#else
-
-void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
-{
-    CPU86_LDoubleU temp;
+    CPU_LDoubleU temp;
 
     temp.d = f;
     *pmant = temp.l.lower;
     *pexp = temp.l.upper;
 }
 
-CPU86_LDouble cpu_set_fp80(uint64_t mant, uint16_t upper)
+floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
 {
-    CPU86_LDoubleU temp;
+    CPU_LDoubleU temp;
 
     temp.l.upper = upper;
     temp.l.lower = mant;
     return temp.d;
 }
-#endif
 
 #ifdef TARGET_X86_64
 
@@ -5553,7 +5591,7 @@ static void do_hlt(void)
     env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
     env->halted = 1;
     env->exception_index = EXCP_HLT;
-    cpu_loop_exit();
+    cpu_loop_exit(env);
 }
 
 void helper_hlt(int next_eip_addend)
@@ -5600,7 +5638,7 @@ void helper_mwait(int next_eip_addend)
 void helper_debug(void)
 {
     env->exception_index = EXCP_DEBUG;
-    cpu_loop_exit();
+    cpu_loop_exit(env);
 }
 
 void helper_reset_rf(void)
@@ -5691,16 +5729,6 @@ void helper_boundl(target_ulong a0, int 
     }
 }
 
-static float approx_rsqrt(float a)
-{
-    return 1.0 / sqrt(a);
-}
-
-static float approx_rcp(float a)
-{
-    return 1.0 / a;
-}
-
 #if !defined(CONFIG_USER_ONLY)
 
 #define MMUSUFFIX _mmu
@@ -5794,7 +5822,7 @@ void tlb_fill(target_ulong addr, int is_
             if (tb) {
                 /* the PC is inside the translated code. It means that we have
                    a virtual CPU fault */
-                cpu_restore_state(tb, env, pc, NULL);
+                cpu_restore_state(tb, env, pc);
             }
         }
         raise_exception_err(env->exception_index, env->error_code);
@@ -6036,12 +6064,7 @@ int emulate_single_instr(CPUX86State *en
     {
         tc_ptr = tb->tc_ptr;
 
-#if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM)
-        int fake_ret;
-        tcg_qemu_tb_exec(tc_ptr, fake_ret);
-#else
-        tcg_qemu_tb_exec(tc_ptr);
-#endif
+        tcg_qemu_tb_exec(env, tc_ptr);
 
         /*
          * Exit once we detect an external interrupt and interrupts are enabled
@@ -6148,33 +6171,23 @@ int get_ss_esp_from_tss_raw(CPUX86State 
 //*****************************************************************************
 // Needs to be at the bottom of the file (overriding macros)
 
-static inline CPU86_LDouble helper_fldt_raw(uint8_t *ptr)
+static inline floatx80 helper_fldt_raw(uint8_t *ptr)
 {
-#ifdef USE_X86LDOUBLE
-    CPU86_LDoubleU tmp;
+    CPU_LDoubleU tmp;
     tmp.l.lower = *(uint64_t const *)ptr;
     tmp.l.upper = *(uint16_t const *)(ptr + 8);
     return tmp.d;
-#else
-# error "Busted FPU saving/restoring!"
-    return *(CPU86_LDouble *)ptr;
-#endif
 }
 
-static inline void helper_fstt_raw(CPU86_LDouble f, uint8_t *ptr)
+static inline void helper_fstt_raw(floatx80 f, uint8_t *ptr)
 {
-#ifdef USE_X86LDOUBLE
-    CPU86_LDoubleU tmp;
+    CPU_LDoubleU tmp;
     tmp.d = f;
     *(uint64_t *)(ptr +  0) = tmp.l.lower;
     *(uint16_t *)(ptr +  8) = tmp.l.upper;
     *(uint16_t *)(ptr + 10) = 0;
     *(uint32_t *)(ptr + 12) = 0;
     AssertCompile(sizeof(long double) > 8);
-#else
-# error "Busted FPU saving/restoring!"
-    *(CPU86_LDouble *)ptr = f;
-#endif
 }
 
 #undef stw
@@ -6188,7 +6201,7 @@ static inline void helper_fstt_raw(CPU86
 void restore_raw_fp_state(CPUX86State *env, uint8_t *ptr)
 {
     int fpus, fptag, i, nb_xmm_regs;
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     uint8_t *addr;
     int data64 = !!(env->hflags & HF_LMA_MASK);
 
@@ -6266,7 +6279,7 @@ void restore_raw_fp_state(CPUX86State *e
 void save_raw_fp_state(CPUX86State *env, uint8_t *ptr)
 {
     int i, fpus, fptag, nb_xmm_regs;
-    CPU86_LDouble tmp;
+    floatx80 tmp;
     uint8_t *addr;
     int data64 = !!(env->hflags & HF_LMA_MASK); /* don't use HF_CS64_MASK here as cs hasn't been synced when this function is called. */
 
@@ -6370,6 +6383,10 @@ void helper_svm_check_intercept_param(ui
 {
 }
 
+void svm_check_intercept(CPUState *env1, uint32_t type)
+{
+}
+
 void helper_svm_check_io(uint32_t port, uint32_t param,
                          uint32_t next_eip_addend)
 {
@@ -6551,7 +6568,7 @@ void helper_vmrun(int aflag, int next_ei
                 env->exception_next_eip = -1;
                 qemu_log_mask(CPU_LOG_TB_IN_ASM, "INTR");
                 /* XXX: is it always correct ? */
-                do_interrupt(vector, 0, 0, 0, 1);
+                do_interrupt_all(vector, 0, 0, 0, 1);
                 break;
         case SVM_EVTINJ_TYPE_NMI:
                 env->exception_index = EXCP02_NMI;
@@ -6559,7 +6576,7 @@ void helper_vmrun(int aflag, int next_ei
                 env->exception_is_int = 0;
                 env->exception_next_eip = EIP;
                 qemu_log_mask(CPU_LOG_TB_IN_ASM, "NMI");
-                cpu_loop_exit();
+                cpu_loop_exit(env);
                 break;
         case SVM_EVTINJ_TYPE_EXEPT:
                 env->exception_index = vector;
@@ -6567,7 +6584,7 @@ void helper_vmrun(int aflag, int next_ei
                 env->exception_is_int = 0;
                 env->exception_next_eip = -1;
                 qemu_log_mask(CPU_LOG_TB_IN_ASM, "EXEPT");
-                cpu_loop_exit();
+                cpu_loop_exit(env);
                 break;
         case SVM_EVTINJ_TYPE_SOFT:
                 env->exception_index = vector;
@@ -6575,7 +6592,7 @@ void helper_vmrun(int aflag, int next_ei
                 env->exception_is_int = 1;
                 env->exception_next_eip = EIP;
                 qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT");
-                cpu_loop_exit();
+                cpu_loop_exit(env);
                 break;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", env->exception_index, env->error_code);
@@ -6731,7 +6748,7 @@ void helper_svm_check_intercept_param(ui
             switch((uint32_t)ECX) {
             case 0 ... 0x1fff:
                 t0 = (ECX * 2) % 8;
-                t1 = ECX / 8;
+                t1 = (ECX * 2) / 8;
                 break;
             case 0xc0000000 ... 0xc0001fff:
                 t0 = (8192 + ECX - 0xc0000000) * 2;
@@ -6764,6 +6781,16 @@ void helper_svm_check_intercept_param(ui
 #endif /* VBOX */
 }
 
+void svm_check_intercept(CPUState *env1, uint32_t type)
+{
+    CPUState *saved_env;
+
+    saved_env = env;
+    env = env1;
+    helper_svm_check_intercept_param(type, 0);
+    env = saved_env;
+}
+
 void helper_svm_check_io(uint32_t port, uint32_t param,
                          uint32_t next_eip_addend)
 {
@@ -6915,7 +6942,7 @@ void helper_vmexit(uint32_t exit_code, u
     env->error_code = 0;
     env->old_exception = -1;
 
-    cpu_loop_exit();
+    cpu_loop_exit(env);
 }
 
 #endif
@@ -7090,6 +7117,18 @@ uint32_t helper_cc_compute_all(int op)
     }
 }
 
+uint32_t cpu_cc_compute_all(CPUState *env1, int op)
+{
+    CPUState *saved_env;
+    uint32_t ret;
+
+    saved_env = env;
+    env = env1;
+    ret = helper_cc_compute_all(op);
+    env = saved_env;
+    return ret;
+}
+
 uint32_t helper_cc_compute_c(int op)
 {
     switch (op) {
--- target-i386/ops_sse.h	2013-12-18 11:11:38.000000000 -0500
+++ target-i386/ops_sse.h	2014-01-14 15:21:31.000000000 -0500
@@ -788,28 +788,38 @@ int64_t helper_cvttsd2sq(XMMReg *s)
 
 void helper_rsqrtps(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
-    d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
-    d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
-    d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
+    d->XMM_S(0) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(0), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(1) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(1), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(2) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(2), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(3) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(3), &env->sse_status),
+                              &env->sse_status);
 }
 
 void helper_rsqrtss(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+    d->XMM_S(0) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(0), &env->sse_status),
+                              &env->sse_status);
 }
 
 void helper_rcpps(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
-    d->XMM_S(1) = approx_rcp(s->XMM_S(1));
-    d->XMM_S(2) = approx_rcp(s->XMM_S(2));
-    d->XMM_S(3) = approx_rcp(s->XMM_S(3));
+    d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status);
+    d->XMM_S(1) = float32_div(float32_one, s->XMM_S(1), &env->sse_status);
+    d->XMM_S(2) = float32_div(float32_one, s->XMM_S(2), &env->sse_status);
+    d->XMM_S(3) = float32_div(float32_one, s->XMM_S(3), &env->sse_status);
 }
 
 void helper_rcpss(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+    d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status);
 }
 
 static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
@@ -931,14 +941,14 @@ void helper_ ## name ## sd (Reg *d, Reg 
     d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
 }
 
-#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPEQ(size, a, b) float ## size ## _eq_quiet(a, b, &env->sse_status) ? -1 : 0
 #define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
 #define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
-#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
-#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered_quiet(a, b, &env->sse_status) ? - 1 : 0
+#define FPU_CMPNEQ(size, a, b) float ## size ## _eq_quiet(a, b, &env->sse_status) ? 0 : -1
 #define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
 #define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
-#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPORD(size, a, b) float ## size ## _unordered_quiet(a, b, &env->sse_status) ? 0 : -1
 
 SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
 SSE_HELPER_CMP(cmplt, FPU_CMPLT)
@@ -1226,8 +1236,8 @@ void helper_pfadd(MMXReg *d, MMXReg *s)
 
 void helper_pfcmpeq(MMXReg *d, MMXReg *s)
 {
-    d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
-    d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(0) = float32_eq_quiet(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_eq_quiet(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
 }
 
 void helper_pfcmpge(MMXReg *d, MMXReg *s)
@@ -1282,14 +1292,16 @@ void helper_pfpnacc(MMXReg *d, MMXReg *s
 
 void helper_pfrcp(MMXReg *d, MMXReg *s)
 {
-    d->MMX_S(0) = approx_rcp(s->MMX_S(0));
+    d->MMX_S(0) = float32_div(float32_one, s->MMX_S(0), &env->mmx_status);
     d->MMX_S(1) = d->MMX_S(0);
 }
 
 void helper_pfrsqrt(MMXReg *d, MMXReg *s)
 {
     d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
-    d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
+    d->MMX_S(1) = float32_div(float32_one,
+                              float32_sqrt(d->MMX_S(1), &env->mmx_status),
+                              &env->mmx_status);
     d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
     d->MMX_L(0) = d->MMX_L(1);
 }
--- target-i386/translate.c	2013-12-18 11:11:38.000000000 -0500
+++ target-i386/translate.c	2014-01-14 15:21:31.000000000 -0500
@@ -36,7 +36,6 @@
 #endif /* !VBOX */
 
 #include "cpu.h"
-#include "exec-all.h"
 #include "disas.h"
 #include "tcg-op.h"
 
@@ -340,28 +339,16 @@ static inline void gen_op_andl_A0_ffff(v
 
 static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
 {
-    TCGv tmp;
-
     switch(ot) {
     case OT_BYTE:
-        tmp = tcg_temp_new();
-        tcg_gen_ext8u_tl(tmp, t0);
         if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
-            tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
-            tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
+            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
         } else {
-            tcg_gen_shli_tl(tmp, tmp, 8);
-            tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
-            tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
+            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
         }
-        tcg_temp_free(tmp);
         break;
     case OT_WORD:
-        tmp = tcg_temp_new();
-        tcg_gen_ext16u_tl(tmp, t0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
-        tcg_temp_free(tmp);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
         break;
     default: /* XXX this shouldn't be reached;  abort? */
     case OT_LONG:
@@ -389,15 +376,9 @@ static inline void gen_op_mov_reg_T1(int
 
 static inline void gen_op_mov_reg_A0(int size, int reg)
 {
-    TCGv tmp;
-
     switch(size) {
     case 0:
-        tmp = tcg_temp_new();
-        tcg_gen_ext16u_tl(tmp, cpu_A0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
-        tcg_temp_free(tmp);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16);
         break;
     default: /* XXX this shouldn't be reached;  abort? */
     case 1:
@@ -481,9 +462,7 @@ static inline void gen_op_add_reg_im(int
     switch(size) {
     case 0:
         tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
-        tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
         break;
     case 1:
         tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
@@ -505,9 +484,7 @@ static inline void gen_op_add_reg_T0(int
     switch(size) {
     case 0:
         tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
-        tcg_gen_ext16u_tl(cpu_tmp0, cpu_tmp0);
-        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
-        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0);
+        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
         break;
     case 1:
         tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
@@ -2481,7 +2458,7 @@ static inline void gen_goto_tb(DisasCont
         /* jump to same page: we can use a direct jump */
         tcg_gen_goto_tb(tb_num);
         gen_jmp_im(eip);
-        tcg_gen_exit_tb((intptr_t)tb + tb_num);
+        tcg_gen_exit_tb((tcg_target_long)tb + tb_num);
     } else {
         /* jump to another page: currently not optimized */
         gen_jmp_im(eip);
@@ -7977,7 +7954,7 @@ static target_ulong disas_insn(DisasCont
             break;
         case 5: /* lfence */
         case 6: /* mfence */
-            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE))
+            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
                 goto illegal_op;
             break;
         case 7: /* sfence / clflush */
@@ -8364,8 +8341,7 @@ void gen_intermediate_code_pc(CPUState *
     gen_intermediate_code_internal(env, tb, 1);
 }
 
-void gen_pc_load(CPUState *env, TranslationBlock *tb,
-                 uintptr_t searched_pc, int pc_pos, void *puc)
+void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos)
 {
     int cc_op;
 #ifdef DEBUG_DISAS
@@ -8377,8 +8353,8 @@ void gen_pc_load(CPUState *env, Translat
                 qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
             }
         }
-        qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
+                pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
                 (uint32_t)tb->cs_base);
     }
 #endif
--- tcg/README	2013-12-18 11:11:38.000000000 -0500
+++ tcg/README	2014-01-14 15:21:31.000000000 -0500
@@ -75,11 +75,11 @@ destroyed, but local temporaries and glo
 * Helpers:
 
 Using the tcg_gen_helper_x_y it is possible to call any function
-taking i32, i64 or pointer types. By default, before calling an helper,
+taking i32, i64 or pointer types. By default, before calling a helper,
 all globals are stored at their canonical location and it is assumed
-that the function can modify them. This can be overriden by the
+that the function can modify them. This can be overridden by the
 TCG_CALL_CONST function modifier. By default, the helper is allowed to
-modify the CPU state or raise an exception. This can be overriden by
+modify the CPU state or raise an exception. This can be overridden by
 the TCG_CALL_PURE function modifier, in which case the call to the
 function is removed if the return value is not used.
 
@@ -285,6 +285,20 @@ the four high order bytes are set to zer
 Indicate that the value of t0 won't be used later. It is useful to
 force dead code elimination.
 
+* deposit_i32/i64 dest, t1, t2, pos, len
+
+Deposit T2 as a bitfield into T1, placing the result in DEST.
+The bitfield is described by POS/LEN, which are immediate values:
+
+  LEN - the length of the bitfield
+  POS - the position of the first bit, counting from the LSB
+
+For example, pos=8, len=4 indicates a 4-bit field at bit 8.
+This operation would be equivalent to
+
+  dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
+
+
 ********* Conditional moves
 
 * setcond_i32/i64 cond, dest, t1, t2
@@ -364,7 +378,7 @@ formed from two 32-bit arguments.  The r
 
 ********* QEMU specific operations
 
-* tb_exit t0
+* exit_tb t0
 
 Exit the current TB and return the value t0 (word type).
 
@@ -488,9 +502,17 @@ register.
   the speed of the translation.
 
 - Don't hesitate to use helpers for complicated or seldom used target
-  intructions. There is little performance advantage in using TCG to
+  instructions. There is little performance advantage in using TCG to
   implement target instructions taking more than about twenty TCG
-  instructions.
+  instructions. Note that this rule of thumb is more applicable to
+  helpers doing complex logic or arithmetic, where the C compiler has
+  scope to do a good job of optimisation; it is less relevant where
+  the instruction is mostly doing loads and stores, and in those cases
+  inline TCG may still be faster for longer sequences.
+
+- The hard limit on the number of TCG instructions you can generate
+  per target instruction is set by MAX_OP_PER_INSTR in exec-all.h --
+  you cannot exceed this without risking a buffer overrun.
 
 - Use the 'discard' instruction if you know that TCG won't be able to
   prove that a given global is "dead" at a given program point. The
--- tcg/i386/tcg-target.c	2013-12-18 11:11:38.000000000 -0500
+++ tcg/i386/tcg-target.c	2014-01-14 15:21:31.000000000 -0500
@@ -1613,7 +1613,7 @@ static void tcg_out_qemu_st(TCGContext *
         /* Pop and discard.  This is 2 bytes smaller than the add.  */
         tcg_out_pop(s, TCG_REG_ECX);
     } else if (stack_adjust != 0) {
-        tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
+        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
     }
 #  endif
 
@@ -2129,11 +2129,11 @@ static int tcg_target_callee_save_regs[]
 # endif
     TCG_REG_R12,
     TCG_REG_R13,
-    /* TCG_REG_R14, */ /* Currently used for the global env. */
+    TCG_REG_R14, /* Currently used for the global env. */
     TCG_REG_R15,
 #else
 # ifndef VBOX
-    /* TCG_REG_EBP, */ /* Currently used for the global env. */
+    TCG_REG_EBP, /* Currently used for the global env. */
     TCG_REG_EBX,
     TCG_REG_ESI,
     TCG_REG_EDI,
@@ -2153,40 +2153,42 @@ static void tcg_target_qemu_prologue(TCG
 
     /* TB prologue */
 
-    /* Save all callee saved registers.  */
-    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
-        tcg_out_push(s, tcg_target_callee_save_regs[i]);
-    }
 # if defined(VBOX_STRICT) && defined(RT_ARCH_X86)
     tcg_out8(s, 0x31); /* xor ebp, ebp */
     tcg_out8(s, 0xed);
 # endif
 
-    /* Reserve some stack space.  */
+    /* Reserve some stack space, also for TCG temps.  */
     push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
     push_size *= TCG_TARGET_REG_BITS / 8;
 
-    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
-#if defined(VBOX) && defined(__MINGW64__)
-    frame_size += TCG_TARGET_CALL_STACK_OFFSET;
-#endif
+    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
+        CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
         ~(TCG_TARGET_STACK_ALIGN - 1);
     stack_addend = frame_size - push_size;
+    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+    /* Save all callee saved registers.  */
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_push(s, tcg_target_callee_save_regs[i]);
+    }
+
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+
     /* jmp *tb.  */
-    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
+    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
 # ifdef VBOX
     tcg_gen_stack_alignment_check(s);
 # endif
 
-    tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
-
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
 
-    tcg_out_addi(s, TCG_REG_ESP, stack_addend);
+    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
 
     for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
@@ -2225,7 +2227,7 @@ static void tcg_target_init(TCGContext *
     }
 
     tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
 
     tcg_add_target_add_op_defs(x86_op_defs);
 }
--- tcg/tcg-op.h	2013-12-18 11:11:38.000000000 -0500
+++ tcg/tcg-op.h	2014-01-14 15:21:31.000000000 -0500
@@ -254,6 +254,30 @@ static inline void tcg_gen_op5i_i64(TCGO
     *gen_opparam_ptr++ = arg5;
 }
 
+static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 arg1,
+                                     TCGv_i32 arg2, TCGv_i32 arg3,
+                                     TCGArg arg4, TCGArg arg5)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *gen_opparam_ptr++ = arg4;
+    *gen_opparam_ptr++ = arg5;
+}
+
+static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 arg1,
+                                     TCGv_i64 arg2, TCGv_i64 arg3,
+                                     TCGArg arg4, TCGArg arg5)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *gen_opparam_ptr++ = arg4;
+    *gen_opparam_ptr++ = arg5;
+}
+
 static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5,
                                    TCGv_i32 arg6)
@@ -727,7 +751,7 @@ static inline void tcg_gen_divu_i32(TCGv
     sizemask |= tcg_gen_sizemask(1, 0, 0);
     sizemask |= tcg_gen_sizemask(2, 0, 0);
 
-    tcg_gen_helper32(tcg_helper_divu_i32, ret, arg1, arg2, 0);
+    tcg_gen_helper32(tcg_helper_divu_i32, sizemask, ret, arg1, arg2);
 }
 
 static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -738,7 +762,7 @@ static inline void tcg_gen_remu_i32(TCGv
     sizemask |= tcg_gen_sizemask(1, 0, 0);
     sizemask |= tcg_gen_sizemask(2, 0, 0);
 
-    tcg_gen_helper32(tcg_helper_remu_i32, ret, arg1, arg2, 0);
+    tcg_gen_helper32(tcg_helper_remu_i32, sizemask, ret, arg1, arg2);
 }
 #endif
 
@@ -1039,66 +1063,66 @@ static inline void tcg_gen_movi_i64(TCGv
     tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg);
 }
 
-static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_i64 arg2,
+static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2,
                                      tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_i64 arg2, tcg_target_long offset)
+static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset);
 }
 
-static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2,
                                    tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset);
 }
 
-static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset);
 }
 
-static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2,
                                     tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset);
 }
 
-static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_i64 arg2, tcg_target_long offset)
+static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
 {
     tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset);
 }
@@ -2071,6 +2095,44 @@ static inline void tcg_gen_rotri_i64(TCG
     }
 }
 
+static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
+				       TCGv_i32 arg2, unsigned int ofs,
+				       unsigned int len)
+{
+#ifdef TCG_TARGET_HAS_deposit_i32
+  tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
+#else
+  uint32_t mask = (1u << len) - 1;
+  TCGv_i32 t1 = tcg_temp_new_i32 ();
+
+  tcg_gen_andi_i32(t1, arg2, mask);
+  tcg_gen_shli_i32(t1, t1, ofs);
+  tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+  tcg_gen_or_i32(ret, ret, t1);
+
+  tcg_temp_free_i32(t1);
+#endif
+}
+
+static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
+				       TCGv_i64 arg2, unsigned int ofs,
+				       unsigned int len)
+{
+#ifdef TCG_TARGET_HAS_deposit_i64
+  tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+#else
+  uint64_t mask = (1ull << len) - 1;
+  TCGv_i64 t1 = tcg_temp_new_i64 ();
+
+  tcg_gen_andi_i64(t1, arg2, mask);
+  tcg_gen_shli_i64(t1, t1, ofs);
+  tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+  tcg_gen_or_i64(ret, ret, t1);
+
+  tcg_temp_free_i64(t1);
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -2242,8 +2304,8 @@ static inline void tcg_gen_qemu_st64(TCG
 #endif
 }
 
-#define tcg_gen_ld_ptr tcg_gen_ld_i32
-#define tcg_gen_discard_ptr tcg_gen_discard_i32
+#define tcg_gen_ld_ptr(R, A, O) tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O))
+#define tcg_gen_discard_ptr(A) tcg_gen_discard_i32(TCGV_PTR_TO_NAT(A))
 
 #else /* TCG_TARGET_REG_BITS == 32 */
 
@@ -2310,8 +2372,8 @@ static inline void tcg_gen_qemu_st64(TCG
     tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_st64, arg, addr, mem_index);
 }
 
-#define tcg_gen_ld_ptr tcg_gen_ld_i64
-#define tcg_gen_discard_ptr tcg_gen_discard_i64
+#define tcg_gen_ld_ptr(R, A, O) tcg_gen_ld_i64(TCGV_PTR_TO_NAT(R), (A), (O))
+#define tcg_gen_discard_ptr(A) tcg_gen_discard_i64(TCGV_PTR_TO_NAT(A))
 
 #endif /* TCG_TARGET_REG_BITS != 32 */
 
@@ -2384,6 +2446,7 @@ static inline void tcg_gen_qemu_st64(TCG
 #define tcg_gen_rotli_tl tcg_gen_rotli_i64
 #define tcg_gen_rotr_tl tcg_gen_rotr_i64
 #define tcg_gen_rotri_tl tcg_gen_rotri_i64
+#define tcg_gen_deposit_tl tcg_gen_deposit_i64
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #else
@@ -2454,16 +2517,23 @@ static inline void tcg_gen_qemu_st64(TCG
 #define tcg_gen_rotli_tl tcg_gen_rotli_i32
 #define tcg_gen_rotr_tl tcg_gen_rotr_i32
 #define tcg_gen_rotri_tl tcg_gen_rotri_i32
+#define tcg_gen_deposit_tl tcg_gen_deposit_i32
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #endif
 
 #if TCG_TARGET_REG_BITS == 32
-#define tcg_gen_add_ptr tcg_gen_add_i32
-#define tcg_gen_addi_ptr tcg_gen_addi_i32
-#define tcg_gen_ext_i32_ptr tcg_gen_mov_i32
+#define tcg_gen_add_ptr(R, A, B) tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), \
+                                               TCGV_PTR_TO_NAT(A), \
+                                               TCGV_PTR_TO_NAT(B))
+#define tcg_gen_addi_ptr(R, A, B) tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), \
+                                                 TCGV_PTR_TO_NAT(A), (B))
+#define tcg_gen_ext_i32_ptr(R, A) tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A))
 #else /* TCG_TARGET_REG_BITS == 32 */
-#define tcg_gen_add_ptr tcg_gen_add_i64
-#define tcg_gen_addi_ptr tcg_gen_addi_i64
-#define tcg_gen_ext_i32_ptr tcg_gen_ext_i32_i64
+#define tcg_gen_add_ptr(R, A, B) tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), \
+                                               TCGV_PTR_TO_NAT(A), \
+                                               TCGV_PTR_TO_NAT(B))
+#define tcg_gen_addi_ptr(R, A, B) tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R),   \
+                                                 TCGV_PTR_TO_NAT(A), (B))
+#define tcg_gen_ext_i32_ptr(R, A) tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A))
 #endif /* TCG_TARGET_REG_BITS != 32 */
--- tcg/tcg-opc.h	2013-12-18 11:11:38.000000000 -0500
+++ tcg/tcg-opc.h	2014-01-14 15:21:31.000000000 -0500
@@ -78,6 +78,9 @@ DEF(sar_i32, 1, 2, 0, 0)
 DEF(rotl_i32, 1, 2, 0, 0)
 DEF(rotr_i32, 1, 2, 0, 0)
 #endif
+#ifdef TCG_TARGET_HAS_deposit_i32
+DEF(deposit_i32, 1, 2, 2, 0)
+#endif
 
 DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
@@ -168,6 +171,9 @@ DEF(sar_i64, 1, 2, 0, 0)
 DEF(rotl_i64, 1, 2, 0, 0)
 DEF(rotr_i64, 1, 2, 0, 0)
 #endif
+#ifdef TCG_TARGET_HAS_deposit_i64
+DEF(deposit_i64, 1, 2, 2, 0)
+#endif
 
 DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
--- tcg/tcg.c	2013-12-18 11:11:38.000000000 -0500
+++ tcg/tcg.c	2014-01-14 15:21:31.000000000 -0500
@@ -59,7 +59,6 @@
    instructions */
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
-#include "exec-all.h"
 
 #include "tcg-op.h"
 #include "elf.h"
@@ -466,6 +465,10 @@ static inline int tcg_temp_new_internal(
             s->nb_temps++;
         }
     }
+
+#if defined(CONFIG_DEBUG_TCG)
+    s->temps_in_use++;
+#endif
     return idx;
 }
 
@@ -491,6 +494,13 @@ static inline void tcg_temp_free_interna
     TCGTemp *ts;
     int k;
 
+#if defined(CONFIG_DEBUG_TCG)
+    s->temps_in_use--;
+    if (s->temps_in_use < 0) {
+        fprintf(stderr, "More temporaries freed than allocated!\n");
+    }
+#endif
+
     assert(idx >= s->nb_globals && idx < s->nb_temps);
     ts = &s->temps[idx];
     assert(ts->temp_allocated != 0);
@@ -544,6 +554,27 @@ TCGv_i64 tcg_const_local_i64(int64_t val
     return t0;
 }
 
+#if defined(CONFIG_DEBUG_TCG)
+void tcg_clear_temp_count(void)
+{
+    TCGContext *s = &tcg_ctx;
+    s->temps_in_use = 0;
+}
+
+int tcg_check_temp_count(void)
+{
+    TCGContext *s = &tcg_ctx;
+    if (s->temps_in_use) {
+        /* Clear the count so that we don't give another
+         * warning immediately next time around.
+         */
+        s->temps_in_use = 0;
+        return 1;
+    }
+    return 0;
+}
+#endif
+
 void tcg_register_helper(void *func, const char *name)
 {
     TCGContext *s = &tcg_ctx;
@@ -573,7 +604,7 @@ void tcg_register_helper(void *func, con
 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
                    int sizemask, TCGArg ret, int nargs, TCGArg *args)
 {
-#ifdef TCG_TARGET_I386
+#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS < 64
     int call_type;
 #endif
     int i;
@@ -600,7 +631,7 @@ void tcg_gen_callN(TCGContext *s, TCGv_p
 
     *gen_opc_ptr++ = INDEX_op_call;
     nparam = gen_opparam_ptr++;
-#ifdef TCG_TARGET_I386
+#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS < 64
     call_type = (flags & TCG_CALL_TYPE_MASK);
 #endif
     if (ret != TCG_CALL_DUMMY_ARG) {
@@ -1052,7 +1083,7 @@ void tcg_add_target_add_op_defs(const TC
         if (tdefs->op == (TCGOpcode)-1)
             break;
         op = tdefs->op;
-        assert(op >= 0 && op < NB_OPS);
+        assert((unsigned)op < NB_OPS);
         def = &tcg_op_defs[op];
 #if defined(CONFIG_DEBUG_TCG)
         /* Duplicate entry in op definitions? */
@@ -1190,7 +1221,7 @@ static inline void tcg_la_bb_end(TCGCont
     }
 }
 
-/* Liveness analysis : update the opc_dead_iargs array to tell if a
+/* Liveness analysis : update the opc_dead_args array to tell if a
    given input arguments is dead. Instructions updating dead
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
@@ -1200,13 +1231,13 @@ static void tcg_liveness_analysis(TCGCon
     TCGArg *args;
     const TCGOpDef *def;
     uint8_t *dead_temps;
-    unsigned int dead_iargs;
+    unsigned int dead_args;
 
     gen_opc_ptr++; /* skip end */
 
     nb_ops = gen_opc_ptr - gen_opc_buf;
 
-    s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
+    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
 
     dead_temps = tcg_malloc(s->nb_temps);
     memset(dead_temps, 1, s->nb_temps);
@@ -1242,8 +1273,12 @@ static void tcg_liveness_analysis(TCGCon
                 do_not_remove_call:
 
                     /* output args are dead */
+                    dead_args = 0;
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
+                        if (dead_temps[arg]) {
+                            dead_args |= (1 << i);
+                        }
                         dead_temps[arg] = 1;
                     }
 
@@ -1253,17 +1288,16 @@ static void tcg_liveness_analysis(TCGCon
                     }
 
                     /* input args are live */
-                    dead_iargs = 0;
-                    for(i = 0; i < nb_iargs; i++) {
-                        arg = args[i + nb_oargs];
+                    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+                        arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
                             if (dead_temps[arg]) {
-                                dead_iargs |= (1 << i);
+                                dead_args |= (1 << i);
                             }
                             dead_temps[arg] = 0;
                         }
                     }
-                    s->op_dead_iargs[op_index] = dead_iargs;
+                    s->op_dead_args[op_index] = dead_args;
                 }
                 args--;
             }
@@ -1310,8 +1344,12 @@ static void tcg_liveness_analysis(TCGCon
             do_not_remove:
 
                 /* output args are dead */
+                dead_args = 0;
                 for(i = 0; i < nb_oargs; i++) {
                     arg = args[i];
+                    if (dead_temps[arg]) {
+                        dead_args |= (1 << i);
+                    }
                     dead_temps[arg] = 1;
                 }
 
@@ -1324,15 +1362,14 @@ static void tcg_liveness_analysis(TCGCon
                 }
 
                 /* input args are live */
-                dead_iargs = 0;
-                for(i = 0; i < nb_iargs; i++) {
-                    arg = args[i + nb_oargs];
+                for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+                    arg = args[i];
                     if (dead_temps[arg]) {
-                        dead_iargs |= (1 << i);
+                        dead_args |= (1 << i);
                     }
                     dead_temps[arg] = 0;
                 }
-                s->op_dead_iargs[op_index] = dead_iargs;
+                s->op_dead_args[op_index] = dead_args;
             }
             break;
         }
@@ -1349,8 +1386,8 @@ static void tcg_liveness_analysis(TCGCon
     int nb_ops;
     nb_ops = gen_opc_ptr - gen_opc_buf;
 
-    s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
-    memset(s->op_dead_iargs, 0, nb_ops * sizeof(uint16_t));
+    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+    memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
 }
 #endif
 
@@ -1431,17 +1468,23 @@ static void temp_allocate_frame(TCGConte
 {
     TCGTemp *ts;
     ts = &s->temps[temp];
-    s->current_frame_offset = (s->current_frame_offset + sizeof(tcg_target_long) - 1) & ~(sizeof(tcg_target_long) - 1);
+#ifndef __sparc_v9__ /* Sparc64 stack is accessed with offset of 2047 */
+    s->current_frame_offset = (s->current_frame_offset +
+                               (tcg_target_long)sizeof(tcg_target_long) - 1) &
+        ~(sizeof(tcg_target_long) - 1);
+#endif
 #ifndef VBOX
-    if (s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end)
+    if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
 #else
-    if ((tcg_target_long)s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end)
+    if ((tcg_target_long)s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
 #endif
+        s->frame_end) {
         tcg_abort();
+    }
     ts->mem_offset = s->current_frame_offset;
     ts->mem_reg = s->frame_reg;
     ts->mem_allocated = 1;
-    s->current_frame_offset += sizeof(tcg_target_long);
+    s->current_frame_offset += (tcg_target_long)sizeof(tcg_target_long);
 }
 
 /* free register 'reg' by spilling the corresponding temporary if necessary */
@@ -1558,7 +1601,7 @@ static void tcg_reg_alloc_bb_end(TCGCont
     save_globals(s, allocated_regs);
 }
 
-#define IS_DEAD_IARG(n) ((dead_iargs >> (n)) & 1)
+#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
 
 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
 {
@@ -1583,7 +1626,7 @@ static void tcg_reg_alloc_movi(TCGContex
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
                               const TCGArg *args,
-                              unsigned int dead_iargs)
+                              unsigned int dead_args)
 {
     TCGTemp *ts, *ots;
     int reg;
@@ -1593,9 +1636,9 @@ static void tcg_reg_alloc_mov(TCGContext
     ts = &s->temps[args[1]];
     arg_ct = &def->args_ct[0];
 
-    /* XXX: always mark arg dead if IS_DEAD_IARG(0) */
+    /* XXX: always mark arg dead if IS_DEAD_ARG(1) */
     if (ts->val_type == TEMP_VAL_REG) {
-        if (IS_DEAD_IARG(0) && !ts->fixed_reg && !ots->fixed_reg) {
+        if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
             /* the mov can be suppressed */
             if (ots->val_type == TEMP_VAL_REG)
                 s->reg_to_temp[ots->reg] = -1;
@@ -1643,7 +1686,7 @@ static void tcg_reg_alloc_mov(TCGContext
 static void tcg_reg_alloc_op(TCGContext *s,
                              const TCGOpDef *def, TCGOpcode opc,
                              const TCGArg *args,
-                             unsigned int dead_iargs)
+                             unsigned int dead_args)
 {
     TCGRegSet allocated_regs;
     int i, k, nb_iargs, nb_oargs, reg;
@@ -1702,8 +1745,9 @@ static void tcg_reg_alloc_op(TCGContext 
                 /* if the input is aliased to an output and if it is
                    not dead after the instruction, we must allocate
                    a new register and move it */
-                if (!IS_DEAD_IARG(i - nb_oargs))
+                if (!IS_DEAD_ARG(i)) {
                     goto allocate_in_reg;
+                }
             }
         }
         reg = ts->reg;
@@ -1726,9 +1770,9 @@ static void tcg_reg_alloc_op(TCGContext 
         tcg_reg_alloc_bb_end(s, allocated_regs);
     } else {
         /* mark dead temporaries and free the associated registers */
-        for(i = 0; i < nb_iargs; i++) {
-            arg = args[nb_oargs + i];
-            if (IS_DEAD_IARG(i)) {
+        for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+            arg = args[i];
+            if (IS_DEAD_ARG(i)) {
                 ts = &s->temps[arg];
                 if (!ts->fixed_reg) {
                     if (ts->val_type == TEMP_VAL_REG)
@@ -1776,12 +1820,16 @@ static void tcg_reg_alloc_op(TCGContext 
             if (!ts->fixed_reg) {
                 if (ts->val_type == TEMP_VAL_REG)
                     s->reg_to_temp[ts->reg] = -1;
-                ts->val_type = TEMP_VAL_REG;
-                ts->reg = reg;
-                /* temp value is modified, so the value kept in memory is
-                   potentially not the same */
-                ts->mem_coherent = 0;
-                s->reg_to_temp[reg] = arg;
+                if (IS_DEAD_ARG(i)) {
+                    ts->val_type = TEMP_VAL_DEAD;
+                } else {
+                    ts->val_type = TEMP_VAL_REG;
+                    ts->reg = reg;
+                    /* temp value is modified, so the value kept in memory is
+                       potentially not the same */
+                    ts->mem_coherent = 0;
+                    s->reg_to_temp[reg] = arg;
+               }
             }
         oarg_end:
             new_args[i] = reg;
@@ -1809,7 +1857,7 @@ static void tcg_reg_alloc_op(TCGContext 
 
 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
                               TCGOpcode opc, const TCGArg *args,
-                              unsigned int dead_iargs)
+                              unsigned int dead_args)
 {
     int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
     TCGArg arg, func_arg;
@@ -1832,13 +1880,14 @@ static int tcg_reg_alloc_call(TCGContext
         nb_regs = nb_params;
 
     /* assign stack slots first */
-    /* XXX: preallocate call stack */
     call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
         ~(TCG_TARGET_STACK_ALIGN - 1);
     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
     if (allocate_args) {
-        tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size));
+        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
+           preallocate call stack */
+        tcg_abort();
     }
 
     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
@@ -1931,9 +1980,9 @@ static int tcg_reg_alloc_call(TCGContext
 
 
     /* mark dead temporaries and free the associated registers */
-    for(i = 0; i < nb_iargs; i++) {
-        arg = args[nb_oargs + i];
-        if (IS_DEAD_IARG(i)) {
+    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+        arg = args[i];
+        if (IS_DEAD_ARG(i)) {
             ts = &s->temps[arg];
             if (!ts->fixed_reg) {
                 if (ts->val_type == TEMP_VAL_REG)
@@ -1958,10 +2007,6 @@ static int tcg_reg_alloc_call(TCGContext
 
     tcg_out_op(s, opc, &func_arg, &const_func_arg);
 
-    if (allocate_args) {
-        tcg_out_addi(s, TCG_REG_CALL_STACK, STACK_DIR(call_stack_size));
-    }
-
     /* assign output registers and emit moves if needed */
     for(i = 0; i < nb_oargs; i++) {
         arg = args[i];
@@ -1975,10 +2020,14 @@ static int tcg_reg_alloc_call(TCGContext
         } else {
             if (ts->val_type == TEMP_VAL_REG)
                 s->reg_to_temp[ts->reg] = -1;
-            ts->val_type = TEMP_VAL_REG;
-            ts->reg = reg;
-            ts->mem_coherent = 0;
-            s->reg_to_temp[reg] = arg;
+            if (IS_DEAD_ARG(i)) {
+                ts->val_type = TEMP_VAL_DEAD;
+            } else {
+                ts->val_type = TEMP_VAL_REG;
+                ts->reg = reg;
+                ts->mem_coherent = 0;
+                s->reg_to_temp[reg] = arg;
+            }
         }
     }
 
@@ -2008,7 +2057,7 @@ static inline int tcg_gen_code_common(TC
     TCGOpcode opc;
     int op_index;
     const TCGOpDef *def;
-    unsigned int dead_iargs;
+    unsigned int dead_args;
     const TCGArg *args;
 
 #ifdef DEBUG_DISAS
@@ -2061,8 +2110,8 @@ static inline int tcg_gen_code_common(TC
 #if TCG_TARGET_REG_BITS == 64
         case INDEX_op_mov_i64:
 #endif
-            dead_iargs = s->op_dead_iargs[op_index];
-            tcg_reg_alloc_mov(s, def, args, dead_iargs);
+            dead_args = s->op_dead_args[op_index];
+            tcg_reg_alloc_mov(s, def, args, dead_args);
             break;
         case INDEX_op_movi_i32:
 #if TCG_TARGET_REG_BITS == 64
@@ -2098,8 +2147,8 @@ static inline int tcg_gen_code_common(TC
             tcg_out_label(s, args[0], (intptr_t)s->code_ptr);
             break;
         case INDEX_op_call:
-            dead_iargs = s->op_dead_iargs[op_index];
-            args += tcg_reg_alloc_call(s, def, opc, args, dead_iargs);
+            dead_args = s->op_dead_args[op_index];
+            args += tcg_reg_alloc_call(s, def, opc, args, dead_args);
             goto next;
         case INDEX_op_end:
             goto the_end;
@@ -2107,8 +2156,8 @@ static inline int tcg_gen_code_common(TC
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            dead_iargs = s->op_dead_iargs[op_index];
-            tcg_reg_alloc_op(s, def, opc, args, dead_iargs);
+            dead_args = s->op_dead_args[op_index];
+            tcg_reg_alloc_op(s, def, opc, args, dead_args);
             break;
         }
         args += def->nb_args;
@@ -2159,8 +2208,7 @@ int tcg_gen_code_search_pc(TCGContext *s
 }
 
 #ifdef CONFIG_PROFILER
-void tcg_dump_info(FILE *f,
-                   int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
     TCGContext *s = &tcg_ctx;
     int64_t tot;
@@ -2204,8 +2252,7 @@ void tcg_dump_info(FILE *f,
     dump_op_count();
 }
 #else
-void tcg_dump_info(FILE *f,
-                   int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
     cpu_fprintf(f, "[TCG profiler not compiled]\n");
 }
--- tcg/tcg.h	2013-12-18 11:11:38.000000000 -0500
+++ tcg/tcg.h	2014-01-14 15:21:31.000000000 -0500
@@ -129,7 +129,7 @@ typedef tcg_target_ulong TCGArg;
    We use plain int by default to avoid this runtime overhead.
    Users of tcg_gen_* don't need to know about any of this, and should
    treat TCGv as an opaque type.
-   In additon we do typechecking for different types of variables.  TCGv_i32
+   In addition we do typechecking for different types of variables.  TCGv_i32
    and TCGv_i64 are 32/64-bit variables respectively.  TCGv and TCGv_ptr
    are aliases for target_ulong and host pointer sized values respectively.
  */
@@ -150,12 +150,19 @@ typedef struct
     int i64;
 } TCGv_i64;
 
+typedef struct {
+    int iptr;
+} TCGv_ptr;
+
 #define MAKE_TCGV_I32(i) __extension__                  \
     ({ TCGv_i32 make_tcgv_tmp = {i}; make_tcgv_tmp;})
 #define MAKE_TCGV_I64(i) __extension__                  \
     ({ TCGv_i64 make_tcgv_tmp = {i}; make_tcgv_tmp;})
+#define MAKE_TCGV_PTR(i) __extension__                  \
+    ({ TCGv_ptr make_tcgv_tmp = {i}; make_tcgv_tmp; })
 #define GET_TCGV_I32(t) ((t).i32)
 #define GET_TCGV_I64(t) ((t).i64)
+#define GET_TCGV_PTR(t) ((t).iptr)
 #if TCG_TARGET_REG_BITS == 32
 #define TCGV_LOW(t) MAKE_TCGV_I32(GET_TCGV_I64(t))
 #define TCGV_HIGH(t) MAKE_TCGV_I32(GET_TCGV_I64(t) + 1)
@@ -165,10 +172,17 @@ typedef struct
 
 typedef int TCGv_i32;
 typedef int TCGv_i64;
+#if TCG_TARGET_REG_BITS == 32
+#define TCGv_ptr TCGv_i32
+#else
+#define TCGv_ptr TCGv_i64
+#endif
 #define MAKE_TCGV_I32(x) (x)
 #define MAKE_TCGV_I64(x) (x)
+#define MAKE_TCGV_PTR(x) (x)
 #define GET_TCGV_I32(t) (t)
 #define GET_TCGV_I64(t) (t)
+#define GET_TCGV_PTR(t) (t)
 
 #if TCG_TARGET_REG_BITS == 32
 #define TCGV_LOW(t) (t)
@@ -252,9 +266,9 @@ typedef struct TCGTemp {
     unsigned int fixed_reg:1;
     unsigned int mem_coherent:1;
     unsigned int mem_allocated:1;
-    unsigned int temp_local:1; /* If true, the temp is saved accross
+    unsigned int temp_local:1; /* If true, the temp is saved across
                                   basic blocks. Otherwise, it is not
-                                  preserved accross basic blocks. */
+                                  preserved across basic blocks. */
     unsigned int temp_allocated:1; /* never used for code gen */
     /* index of next free temp of same base type, -1 if end */
     int next_free_temp;
@@ -286,8 +300,8 @@ struct TCGContext {
     uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
 
     /* liveness analysis */
-    uint16_t *op_dead_iargs; /* for each operation, each bit tells if the
-                                corresponding input argument is dead */
+    uint16_t *op_dead_args; /* for each operation, each bit tells if the
+                               corresponding argument is dead */
 
     /* tells in which temporary a given register is. It does not take
        into account fixed registers */
@@ -323,6 +337,10 @@ struct TCGContext {
     int64_t restore_count;
     int64_t restore_time;
 #endif
+
+#ifdef CONFIG_DEBUG_TCG
+    int temps_in_use;
+#endif
 };
 
 extern TCGContext tcg_ctx;
@@ -392,8 +410,20 @@ static inline TCGv_i64 tcg_temp_local_ne
 void tcg_temp_free_i64(TCGv_i64 arg);
 char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
 
-void tcg_dump_info(FILE *f,
-                   int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+#if defined(CONFIG_DEBUG_TCG)
+/* If you call tcg_clear_temp_count() at the start of a section of
+ * code which is not supposed to leak any TCG temporaries, then
+ * calling tcg_check_temp_count() at the end of the section will
+ * return 1 if the section did in fact leak a temporary.
+ */
+void tcg_clear_temp_count(void);
+int tcg_check_temp_count(void);
+#else
+#define tcg_clear_temp_count() do { } while (0)
+#define tcg_check_temp_count() 0
+#endif
+
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf);
 
 #define TCG_CT_ALIAS  0x80
 #define TCG_CT_IALIAS 0x40
@@ -456,25 +486,27 @@ extern void qemu_qsort(void* base, size_
 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
 
 #if TCG_TARGET_REG_BITS == 32
-#define tcg_const_ptr tcg_const_i32
-#define tcg_add_ptr tcg_add_i32
-#define tcg_sub_ptr tcg_sub_i32
-#define TCGv_ptr TCGv_i32
-#define GET_TCGV_PTR GET_TCGV_I32
-#define tcg_global_reg_new_ptr tcg_global_reg_new_i32
-#define tcg_global_mem_new_ptr tcg_global_mem_new_i32
-#define tcg_temp_new_ptr tcg_temp_new_i32
-#define tcg_temp_free_ptr tcg_temp_free_i32
+#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I32(n))
+#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I32(GET_TCGV_PTR(n))
+
+#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i32(V))
+#define tcg_global_reg_new_ptr(R, N) \
+    TCGV_NAT_TO_PTR(tcg_global_reg_new_i32((R), (N)))
+#define tcg_global_mem_new_ptr(R, O, N) \
+    TCGV_NAT_TO_PTR(tcg_global_mem_new_i32((R), (O), (N)))
+#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i32())
+#define tcg_temp_free_ptr(T) tcg_temp_free_i32(TCGV_PTR_TO_NAT(T))
 #else
-#define tcg_const_ptr tcg_const_i64
-#define tcg_add_ptr tcg_add_i64
-#define tcg_sub_ptr tcg_sub_i64
-#define TCGv_ptr TCGv_i64
-#define GET_TCGV_PTR GET_TCGV_I64
-#define tcg_global_reg_new_ptr tcg_global_reg_new_i64
-#define tcg_global_mem_new_ptr tcg_global_mem_new_i64
-#define tcg_temp_new_ptr tcg_temp_new_i64
-#define tcg_temp_free_ptr tcg_temp_free_i64
+#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I64(n))
+#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I64(GET_TCGV_PTR(n))
+
+#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i64(V))
+#define tcg_global_reg_new_ptr(R, N) \
+    TCGV_NAT_TO_PTR(tcg_global_reg_new_i64((R), (N)))
+#define tcg_global_mem_new_ptr(R, O, N) \
+    TCGV_NAT_TO_PTR(tcg_global_mem_new_i64((R), (O), (N)))
+#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i64())
+#define tcg_temp_free_ptr(T) tcg_temp_free_i64(TCGV_PTR_TO_NAT(T))
 #endif
 
 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
@@ -500,13 +532,9 @@ extern uint8_t code_gen_prologue[];
 extern uint8_t *code_gen_prologue;
 #endif
 #if defined(_ARCH_PPC) && !defined(_ARCH_PPC64)
-#define tcg_qemu_tb_exec(tb_ptr) \
-    ((intptr_t REGPARM __attribute__ ((longcall)) (*)(void *))code_gen_prologue)(tb_ptr)
+#define tcg_qemu_tb_exec(env, tb_ptr)                                    \
+    ((long REGPARM __attribute__ ((longcall)) (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
 #else
-# if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) && !defined(__MINGW64__)
-#  define tcg_qemu_tb_exec(tb_ptr, ret)        \
-    __asm__ __volatile__("call *%%ecx" : "=a"(ret) : "a"(tb_ptr), "c" (&code_gen_prologue[0]) : "memory", "%edx", "cc")
-# else
-#define tcg_qemu_tb_exec(tb_ptr) ((intptr_t REGPARM (*)(void *))code_gen_prologue)(tb_ptr)
-# endif
+#define tcg_qemu_tb_exec(env, tb_ptr)                                    \
+    ((long REGPARM (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
 #endif
--- translate-all.c	2013-12-18 11:11:38.000000000 -0500
+++ translate-all.c	2014-01-15 15:30:16.000000000 -0500
@@ -36,7 +36,6 @@
 
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
-#include "exec-all.h"
 #include "disas.h"
 #include "tcg.h"
 #include "qemu-timer.h"
@@ -54,8 +53,6 @@ uint8_t gen_opc_instr_start[OPC_BUF_SIZE
 void cpu_gen_init(void)
 {
     tcg_context_init(&tcg_ctx);
-    tcg_set_frame(&tcg_ctx, TCG_AREG0, offsetof(CPUState, temp_buf),
-                  sizeof(((CPUState *)0)->temp_buf));
 }
 
 /* return non zero if the very first instruction is invalid so that
@@ -131,8 +128,7 @@ int cpu_gen_code(CPUState *env, Translat
 /* The cpu state corresponding to 'searched_pc' is restored.
  */
 int cpu_restore_state(TranslationBlock *tb,
-                      CPUState *env, uintptr_t searched_pc,
-                      void *puc)
+                      CPUState *env, uintptr_t searched_pc)
 {
     TCGContext *s = &tcg_ctx;
     int j;
@@ -176,7 +172,7 @@ int cpu_restore_state(TranslationBlock *
         j--;
     env->icount_decr.u16.low -= gen_opc_icount[j];
 
-    gen_pc_load(env, tb, searched_pc, j, puc);
+    restore_state_to_opc(env, tb, j);
 
 #ifdef CONFIG_PROFILER
     s->restore_time += profile_getclock() - ti;